RetroBASIC
Basicprogramming(.org) => Code and examples => Topic started by: Galileo on December 05, 2017, 05:49:27 PM
-
Hello, everybody.
Solution in Yabasic to the problem "Anagrams" of the website "Rosetta code". On my computer it takes about 92 seconds (Windows 7 64 bit, AMD 8 cores 2.8 GHz).
REM Anagrams, by Galileo 2017/12
REM See the problem statement in https://rosettacode.org/wiki/Anagrams
filename$ = "unixdict.txt"
maxw = 0 : c = 0 : dimens(c)
i = 0
dim p(100)
if (not open(1,filename$)) error "Could not open '"+filename$+"' for reading"
print "Be patient, please ...\n"
while(not eof(1))
line input #1 a$
c = c + 1
p$(c) = a$
po$(c) = sort$(lower$(a$))
count = 0
head = 0
insert(1)
if not(mod(c, 10)) dimens(c)
wend
for n = 1 to i
nw = p(n)
repeat
print p$(nw)," ";
nw = d(nw,2)
until(not nw)
print "\n"
next n
print time$
sub sort$(a$)
local n, i, t$, c1$, c2$
for n = 1 to len(a$) - 1
for i = n + 1 to len(a$)
c1$ = mid$(a$, n, 1) : c2$ = mid$(a$, i, 1)
if c1$ > c2$ then
t$ = c1$
c1$ = c2$
c2$ = t$
mid$(a$, n, 1) = c1$ : mid$(a$, i, 1) = c2$
end if
next i
next n
return a$
end sub
sub dimens(c)
redim p$(c + 10)
redim po$(c + 10)
redim d(c + 10, 3)
end sub
sub insert(j)
local p
if po$(c) < po$(j) then
p = 1
elseif po$(c) = po$(j) then
p = 2
if count = 0 head = j
count = count + 1
if count > maxw then
i = 1
p(i) = head
maxw = count
elseif count = maxw then
i = i + 1
p(i) = head
end if
else
p = 3
end if
if d(j,p) then
insert(d(j,p))
else
d(j,p) = c
end if
end sub
-
Hmm...
Choosing the right tools for the right tasks:
Freestyle BASIC Script Language#APPTYPE CONSOLE
DIM gtc = GetTickCount()
Anagram()
PRINT "Done in ", (GetTickCount() - gtc) / 1000, " seconds"
PAUSE
DYNC Anagram()
#include <windows.h>
#include <stdio.h>
char* sortedWord(const char* word, char* wbuf)
{
char* p1, *p2, *endwrd;
char t;
int swaps;
strcpy(wbuf, word);
endwrd = wbuf + strlen(wbuf);
do {
swaps = 0;
p1 = wbuf; p2 = endwrd - 1;
while (p1 < p2) {
if (*p2 >* p1) {
t = *p2; *p2 = *p1; *p1 = t;
swaps = 1;
}
p1++; p2--;
}
p1 = wbuf; p2 = p1 + 1;
while (p2 < endwrd) {
if (*p2 >* p1) {
t = *p2; *p2 = *p1; *p1 = t;
swaps = 1;
}
p1++; p2++;
}
} while (swaps);
return wbuf;
}
static short cxmap[] = {
0x06, 0x1f, 0x4d, 0x0c, 0x5c, 0x28, 0x5d, 0x0e, 0x09, 0x33, 0x31, 0x56,
0x52, 0x19, 0x29, 0x53, 0x32, 0x48, 0x35, 0x55, 0x5e, 0x14, 0x27, 0x24,
0x02, 0x3e, 0x18, 0x4a, 0x3f, 0x4c, 0x45, 0x30, 0x08, 0x2c, 0x1a, 0x03,
0x0b, 0x0d, 0x4f, 0x07, 0x20, 0x1d, 0x51, 0x3b, 0x11, 0x58, 0x00, 0x49,
0x15, 0x2d, 0x41, 0x17, 0x5f, 0x39, 0x16, 0x42, 0x37, 0x22, 0x1c, 0x0f,
0x43, 0x5b, 0x46, 0x4b, 0x0a, 0x26, 0x2e, 0x40, 0x12, 0x21, 0x3c, 0x36,
0x38, 0x1e, 0x01, 0x1b, 0x05, 0x4e, 0x44, 0x3d, 0x04, 0x10, 0x5a, 0x2a,
0x23, 0x34, 0x25, 0x2f, 0x2b, 0x50, 0x3a, 0x54, 0x47, 0x59, 0x13, 0x57,
};
#define CXMAP_SIZE (sizeof(cxmap) / sizeof(short))
int Str_Hash(const char* key, int ix_max)
{
const char* cp;
short mash;
int hash = 33501551;
for (cp = key; *cp; cp++) {
mash = cxmap[*cp % CXMAP_SIZE];
hash = (hash >>4) ^ 0x5C5CF5C ^ ((hash << 1) + (mash << 5));
hash &= 0x3FFFFFFF;
}
return hash % ix_max;
}
typedef struct sDictWord* DictWord;
struct sDictWord {
const char* word;
DictWord next;
};
typedef struct sHashEntry* HashEntry;
struct sHashEntry {
const char* key;
HashEntry next;
DictWord words;
HashEntry link;
short wordCount;
};
#define HT_SIZE 8192
HashEntry hashTable[HT_SIZE];
HashEntry mostPerms = NULL;
int buildAnagrams(FILE* fin)
{
char buffer[40];
char bufr2[40];
char* hkey;
int hix;
HashEntry he, *hep;
DictWord we;
int maxPC = 2;
int numWords = 0;
while (fgets(buffer, 40, fin)) {
for (hkey = buffer; *hkey && (*hkey != '\n'); hkey++);
*hkey = 0;
hkey = sortedWord(buffer, bufr2);
hix = Str_Hash(hkey, HT_SIZE);
he = hashTable[hix]; hep = &hashTable[hix];
while (he && strcmp(he->key, hkey)) {
hep = &he->next;
he = he->next;
}
if (! he) {
he = (HashEntry)malloc(sizeof(struct sHashEntry));
he->next = NULL;
he->key = strdup(hkey);
he->wordCount = 0;
he->words = NULL;
he->link = NULL;
*hep = he;
}
we = (DictWord)malloc(sizeof(struct sDictWord));
we->word = strdup(buffer);
we->next = he->words;
he->words = we;
he->wordCount++;
if (maxPC < he->wordCount) {
maxPC = he->wordCount;
mostPerms = he;
he->link = NULL;
}
else if (maxPC == he->wordCount) {
he->link = mostPerms;
mostPerms = he;
}
numWords++;
}
printf("%d words in dictionary max ana=%d\n", numWords, maxPC);
return maxPC;
}
void main()
{
HashEntry he;
DictWord we;
FILE* f1;
f1 = fopen("unixdict.txt", "r");
buildAnagrams(f1);
fclose(f1);
f1 = fopen("anaout.txt", "w");
for (he = mostPerms; he; he = he->link) {
fprintf(f1, "%d: ", he->wordCount);
for (we = he->words; we; we = we->next) {
fprintf(f1, "%s, ", we->word);
}
fprintf(f1, "\n");
}
fclose(f1);
}
END DYNC
Anaout.txt:
5: vile, veil, live, levi, evil,
5: trace, crate, cater, carte, caret,
5: regal, large, lager, glare, alger,
5: neal, lena, lean, lane, elan,
5: lange, glean, galen, angle, angel,
5: elba, bela, bale, able, abel,
-
Anagrams intoxicate excitation. ;)
-
when looking at the codes in Rosetta code, the thing that impresses me the most is brevity and clarity, in this case I nominate Ruby as the winner http://rosettacode.org/wiki/Anagrams#Ruby
-
QB64
_TITLE "Searching for large sets of Anagrams with same characters, loading data..."
DIM w$(30000, 1): DIM SHARED er$: er$ = STR$(999999999)
index = 0: t = TIMER
OPEN "unixdict.txt" FOR INPUT AS #1
WHILE NOT (EOF(1))
INPUT #1, wd$
IF LEN(wd$) > 2 THEN index = index + 1: w$(index, 0) = wd$: w$(index, 1) = order$(wd$)
WEND
CLOSE #1
FOR i = 1 TO index - 1
b$ = w$(i, 0): anaFlag = 0
FOR j = i + 1 TO index
IF w$(i, 1) = w$(j, 1) THEN b$ = b$ + ", " + w$(j, 0): anaFlag = anaFlag + 1
NEXT
IF anaFlag > 3 THEN PRINT b$
NEXT
PRINT "Done !!! "; TIMER - t
FUNCTION order$ (word$)
DIM a(26)
FOR i = 1 TO LEN(word$)
ac = ASC(MID$(word$, i, 1)) - 96
IF 0 < ac AND ac < 27 THEN a(ac) = a(ac) + 1 ELSE flag = 1
NEXT
b$ = ""
FOR i = 1 TO 26: b$ = b$ + LTRIM$(STR$(a(i))): NEXT
IF flag THEN er$ = STR$(VAL(er$) - 1)
IF flag <> 1 THEN order$ = b$ ELSE order$ = er$
END FUNCTION
-
Oh I see where the sorting helps!
_TITLE "Searching for large sets of Anagrams with same characters, loading data..."
DIM SHARED w$(30000): DIM SHARED er$: er$ = "999"
index = 0: t = TIMER
OPEN "unixdict.txt" FOR INPUT AS #1
WHILE NOT (EOF(1))
INPUT #1, wd$
IF LEN(wd$) > 2 THEN index = index + 1: w$(index) = order$(wd$) + "!" + wd$
WEND
CLOSE #1
QSort 0, index
FOR i = 1 TO index - 1
IF first$(w$(i)) = first$(w$(i + 1)) AND flag THEN b$ = b$ + ", " + w2$(w$(i + 1)): cnt = cnt + 1
IF first$(w$(i)) = first$(w$(i + 1)) AND flag = 0 THEN b$ = w2$(w$(i)) + ", " + w2$(w$(i + 1)): cnt = 2: flag = -1
IF first$(w$(i)) <> first$(w$(i + 1)) THEN
IF cnt > 4 THEN PRINT b$
cnt = 0: b$ = "": flag = 0
END IF
NEXT
PRINT "Done !!! "; TIMER - t
FUNCTION order$ (word$)
DIM a(26)
FOR i = 1 TO LEN(word$)
ac = ASC(MID$(word$, i, 1)) - 96
IF 0 < ac AND ac < 27 THEN a(ac) = a(ac) + 1 ELSE flag = 1
NEXT
b$ = ""
FOR i = 1 TO 26: b$ = b$ + LTRIM$(STR$(a(i))): NEXT
IF flag THEN er$ = STR$(VAL(er$) - 1)
IF flag <> 1 THEN order$ = b$ ELSE order$ = er$
END FUNCTION
SUB QSort (Start, Finish)
i = Start: j = Finish: x$ = w$(INT((i + j) / 2))
WHILE i <= j
WHILE w$(i) < x$: i = i + 1: WEND
WHILE w$(j) > x$: j = j - 1: WEND
IF i <= j THEN
a$ = w$(i): w$(i) = w$(j): w$(j) = a$
i = i + 1: j = j - 1
END IF
WEND
IF j > Start THEN QSort Start, j
IF i < Finish THEN QSort i, Finish
END SUB
FUNCTION first$ (wd$)
first$ = MID$(wd$, 1, INSTR(wd$, "!") - 1)
END FUNCTION
FUNCTION w2$ (wd$)
w2 = MID$(wd$, INSTR(wd$, "!") + 1)
END FUNCTION
-
... I nominate Ruby as the winner
By common rules of the game, RosettaCode discourages the use of specialized libraries for solving the challenges and withdraws the offending code mercilessly from the race. The competing languages are supposed to be using those features and capabilities only that are integral to the respective language as provided originally in its engine by its developer(s).
On closer examination of submissions that claim "brevity and clarity", you'll notice that almost all of them (your "winner" language included) are outright cheats built around "uses", "using", "import", "imports", etc. of 3rd party modules such as regexp, hashing, dictionaries, sorting algos and the like. The "shorter" and "clearer" the code, the more severe the offense.
It's only a matter of time before someone notices a particular cheat and gets sufficiently excited with righteous anger to motivate the site moderators to ban the intoxicated cheater applicant. ;)
-
But what if the language has those extra features built-in? SpecBAS has USING$ as well as regexps and array sorting without importing any libraries at all.
-
It's only a matter of time before someone notices a particular cheat and gets sufficiently excited with righteous anger to motivate the site moderators to ban the intoxicated cheater applicant. ;)
::) Oh good lord!
Shaved some time of my last best:
_TITLE "Searching for large sets of Anagrams with same characters, loading data..."
'anagrams3 is starting to adapt to data,
'there are no 5 set anagrams of 3 letters nor of digits or apostrophes
'so they are not added to the word list to sort.
'The word coding has also been shortened.
'Multiple IF's reduced to IF THEN ELSEIF ELSE where ever possible.
'No calls to subs or functions except the recursive QSort routine.
DIM SHARED w$(24200)
index = 0: t = TIMER
OPEN "unixdict.txt" FOR INPUT AS #1
WHILE NOT (EOF(1))
INPUT #1, wd$
'don't bother with 3 letter words even though possibe to have 6 permutations there are none of 5 or more
IF LEN(wd$) > 3 THEN
REDIM a(26): flag = 0
FOR i = 1 TO LEN(wd$)
ac = ASC(MID$(wd$, i, 1)) - 96
IF 0 < ac AND ac < 27 THEN a(ac) = a(ac) + 1 ELSE flag = 1: EXIT FOR
NEXT
'don't code and store a word unless all letters, no digits or apostrophes
IF flag = 0 THEN
b$ = "": zc = 0
'zc zero counts replaces strings of 0's with a letter according to how many in string
'this shortens the strings considerably before the sort
FOR i = 1 TO 26
IF a(i) = 0 THEN
zc = zc + 1
ELSE
IF zc > 0 THEN b$ = b$ + CHR$(96 + zc): zc = 0
b$ = b$ + LTRIM$(STR$(a(i)))
END IF
NEXT
index = index + 1
w$(index) = b$ + "!" + wd$
END IF
END IF
WEND
CLOSE #1
QSort 0, index
flag = 0
FOR i = 1 TO index - 1
IF MID$(w$(i), 1, INSTR(w$(i), "!") - 1) <> MID$(w$(i + 1), 1, INSTR(w$(i + 1), "!") - 1) THEN
IF cnt > 4 THEN PRINT b$
cnt = 0: b$ = "": flag = 0
ELSEIF flag THEN
b$ = b$ + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = cnt + 1
ELSE
b$ = MID$(w$(i), INSTR(w$(i), "!") + 1) + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = 2: flag = -1
END IF
NEXT
PRINT "Done !!! "; TIMER - t
SUB QSort (Start, Finish)
i = Start: j = Finish: x$ = w$(INT((i + j) / 2))
WHILE i <= j
WHILE w$(i) < x$: i = i + 1: WEND
WHILE w$(j) > x$: j = j - 1: WEND
IF i <= j THEN
a$ = w$(i): w$(i) = w$(j): w$(j) = a$
i = i + 1: j = j - 1
END IF
WEND
IF j > Start THEN QSort Start, j
IF i < Finish THEN QSort i, Finish
END SUB
Oh hey! the new word coding has the list in alpha order again! 8)
-
But what if the language has those extra features built-in? SpecBAS has USING$ as well as regexps and array sorting without importing any libraries at all.
Then of course it's all right, competition-wise. Also, importing regular system wide general purpose libraries and frameworks that come originally under a clean installation of the operating system, like e.g. common WinAPI or msvcrt.dll (MS VC), is legal because they aren't language specific and everyone else could use them at will through a standard header file if available in their respective language distro.
But if "using" or "import" implies pulling in at least one standalone language specific module -- an include file with extra executable task specific code, or a non-standard header file that provides alternative and/or extra interfaces to the system, or a dynamically or statically linked library -- which, in its turn, might also transparently map an infinitely long chain of extraneous dependencies of all sorts into the process space and which is not part of the language standard installation package, then it's cheating. As a minimum, such an extra module should also be viewed as part of the solution. And this will certainly debunk the impression of alleged "brevity" and "clarity".
For example, using Bob Zale's standard set of PowerBASIC header files for a solution is legal but using José Roca's alternative headers isn't.
-
OK well under a sec! and pretending not to know there aren't any large 3 letter word sets (with 5 or 6 permutations that are words), ie taking the time to test all words that could have 4 or more anagrams.
_TITLE "Rosetta Code Anagrams: mod #4 by bplus 2017-12-07"
'anagrams4 the word coding has also been shortened again!
'GET file in one gulp = buf$ and then find words
'Multiple IF's reduced to IF THEN ELSEIF ELSE where ever possible.
'No calls to subs or functions except the recursive QSort routine.
'FOR... NEXT replaced with WHILE... WEND
DIM SHARED w$(25100)
index = 0: t = TIMER
OPEN "unixdict.txt" FOR BINARY AS #1
fl = LOF(1): buf$ = SPACE$(fl)
GET #1, , buf$
CLOSE #1
p = 1
WHILE p < fl
np = INSTR(p, buf$, CHR$(10))
wd$ = MID$(buf$, p, np - p)
IF LEN(wd$) > 2 THEN
REDIM a(26): flag = 0: i = 1
WHILE i <= LEN(wd$)
ac = ASC(MID$(wd$, i, 1)) - 96
IF 0 < ac AND ac < 27 THEN a(ac) = a(ac) + 1 ELSE flag = 1: EXIT WHILE
i = i + 1
WEND
'don't code and store a word unless all letters, no digits or apostrophes
IF flag = 0 THEN
b$ = "": i = 1
WHILE i < 27
IF a(i) <> 0 THEN b$ = b$ + STRING$(a(i), CHR$(96 + i))
i = i + 1
WEND
index = index + 1
w$(index) = b$ + "!" + wd$
END IF
END IF
IF np THEN p = np + 1 ELSE p = fl
WEND
QSort 0, index
flag = 0
WHILE i < index
IF MID$(w$(i), 1, INSTR(w$(i), "!") - 1) <> MID$(w$(i + 1), 1, INSTR(w$(i + 1), "!") - 1) THEN
IF cnt > 4 THEN PRINT b$
cnt = 0: b$ = "": flag = 0
ELSEIF flag THEN
b$ = b$ + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = cnt + 1
ELSE
b$ = MID$(w$(i), INSTR(w$(i), "!") + 1) + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = 2: flag = -1
END IF
i = i + 1
WEND
PRINT "Done !!! "; TIMER - t
SUB QSort (Start, Finish)
i = Start: j = Finish: x$ = w$(INT((i + j) / 2))
WHILE i <= j
WHILE w$(i) < x$: i = i + 1: WEND
WHILE w$(j) > x$: j = j - 1: WEND
IF i <= j THEN
a$ = w$(i): w$(i) = w$(j): w$(j) = a$
i = i + 1: j = j - 1
END IF
WEND
IF j > Start THEN QSort Start, j
IF i < Finish THEN QSort i, Finish
END SUB
-
Criminy!
_TITLE "Rosetta Code Anagrams: mod #4.1 by bplus 2017-12-08"
'anagrams4_1 oh hey integers and other exact types
'anagrams4 the word coding has also been shortened again!
'GET file in one gulp = buf$ and then find words
'Multiple IF's reduced to IF THEN ELSEIF ELSE where ever possible.
'No calls to subs or functions except the recursive QSort routine.
'FOR... NEXT replaced with WHILE... WEND
DEFINT A-Z
DIM SHARED w$(25100)
index = 0: t! = TIMER
OPEN "unixdict.txt" FOR BINARY AS #1
fl& = LOF(1): buf$ = SPACE$(fl&)
GET #1, , buf$
CLOSE #1
p& = 1
WHILE p& < fl&
np& = INSTR(p&, buf$, CHR$(10))
wd$ = MID$(buf$, p&, np& - p&)
IF LEN(wd$) > 2 THEN
REDIM a(26): flag = 0: i = 1
WHILE i <= LEN(wd$)
ac = ASC(MID$(wd$, i, 1)) - 96
IF 0 < ac AND ac < 27 THEN a(ac) = a(ac) + 1 ELSE flag = 1: EXIT WHILE
i = i + 1
WEND
'don't code and store a word unless all letters, no digits or apostrophes
IF flag = 0 THEN
b$ = "": i = 1
WHILE i < 27
IF a(i) <> 0 THEN b$ = b$ + STRING$(a(i), CHR$(96 + i))
i = i + 1
WEND
index = index + 1
w$(index) = b$ + "!" + wd$
END IF
END IF
IF np& THEN p& = np& + 1 ELSE p& = fl&
WEND
QSort 0, index
flag = 0
WHILE i < index
IF MID$(w$(i), 1, INSTR(w$(i), "!") - 1) <> MID$(w$(i + 1), 1, INSTR(w$(i + 1), "!") - 1) THEN
IF cnt > 4 THEN PRINT b$
cnt = 0: b$ = "": flag = 0
ELSEIF flag THEN
b$ = b$ + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = cnt + 1
ELSE
b$ = MID$(w$(i), INSTR(w$(i), "!") + 1) + ", " + MID$(w$(i + 1), INSTR(w$(i + 1), "!") + 1): cnt = 2: flag = -1
END IF
i = i + 1
WEND
PRINT "Done !!! "; TIMER - t!
SUB QSort (Start, Finish)
i = Start: j = Finish: x$ = w$(INT((i + j) / 2))
WHILE i <= j
WHILE w$(i) < x$: i = i + 1: WEND
WHILE w$(j) > x$: j = j - 1: WEND
IF i <= j THEN
a$ = w$(i): w$(i) = w$(j): w$(j) = a$
i = i + 1: j = j - 1
END IF
WEND
IF j > Start THEN QSort Start, j
IF i < Finish THEN QSort i, Finish
END SUB
-
Maybe not the fastest (approx 3.8 seconds) but the shortest on this page until now....
OPTION COLLAPSE TRUE
DECLARE idx$ ASSOC STRING
FOR w$ IN LOAD$("unixdict.txt") STEP NL$
SPLIT w$ BY 1 TO letter$ SIZE length
SORT letter$ SIZE length
JOIN letter$ TO set$ SIZE length
idx$(set$) = APPEND$(idx$(set$), 0, w$)
total = AMOUNT(idx$(set$))
IF MaxCount < total THEN MaxCount = total
NEXT
PRINT "Analyzing took ", TIMER, " msecs.", NL$
LOOKUP idx$ TO name$ SIZE x
FOR y = 0 TO x-1
IF MaxCount = AMOUNT(idx$(name$[y])) THEN PRINT name$[y], ": ", idx$(name$[y])
NEXT
-
Looks very elegant, Peter! Leaves no chance for Ruby, does it? :)
-
Thanks Mike - at least I'm not importing any particular language specific modules. However, the generated C code uses the standard libc system header files and links to libc... :-\
-
However, the generated C code uses the standard libc system header files and links to libc... :-\
But that's perfectly legal in the context of RosettaCode challenges! And so does my C code by the way; it's just that it compiles directly in memory on program load using standard windows.h and stdio.h system headers and links against msvcrt.dll that's a Windows-specific dynamic analog to your libc static library. :)
-
Steve McNeil has Anagrams running very fast with QB64.
3rd page @ http://www.qb64.net/forum/index.php?topic=14622.30
-
Pretty impressive! I mean, very impressive even if not particularly pretty. :)