COMMENT COMPILE WITH $ cim -m64 word-count.sim ; BEGIN COMMENT ----- CLASSES FOR GENERAL USE ; ! ABSTRACT HASH KEY TYPE ; CLASS HASHKEY; VIRTUAL: PROCEDURE HASH IS INTEGER PROCEDURE HASH;; PROCEDURE EQUALTO IS BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K;; BEGIN END HASHKEY; ! ABSTRACT HASH VALUE TYPE ; CLASS HASHVAL; BEGIN ! THERE IS NOTHING REQUIRED FOR THE VALUE TYPE ; END HASHVAL; CLASS HASHMAP; BEGIN CLASS INNERHASHMAP(N); INTEGER N; BEGIN INTEGER PROCEDURE INDEX(K); REF(HASHKEY) K; BEGIN INTEGER I; IF K == NONE THEN ERROR("HASHMAP.INDEX: NONE IS NOT A VALID KEY"); I := MOD(K.HASH,N); LOOP: IF KEYTABLE(I) == NONE OR ELSE KEYTABLE(I).EQUALTO(K) THEN INDEX := I ELSE BEGIN I := IF I+1 = N THEN 0 ELSE I+1; GO TO LOOP; END; END INDEX; ! PUT SOMETHING IN ; PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V; BEGIN INTEGER I; IF V == NONE THEN ERROR("HASHMAP.PUT: NONE IS NOT A VALID VALUE"); I := INDEX(K); IF KEYTABLE(I) == NONE THEN BEGIN IF SIZE = N THEN ERROR("HASHMAP.PUT: TABLE FILLED COMPLETELY"); KEYTABLE(I) :- K; VALTABLE(I) :- V; SIZE := SIZE+1; END ELSE VALTABLE(I) :- V; END PUT; ! GET SOMETHING OUT ; REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K; BEGIN INTEGER I; IF K == NONE THEN ERROR("HASHMAP.GET: NONE IS NOT A VALID KEY"); I := INDEX(K); IF KEYTABLE(I) == NONE THEN GET :- NONE ! ERROR("HASHMAP.GET: KEY NOT FOUND"); ELSE GET :- VALTABLE(I); END GET; PROCEDURE CLEAR; BEGIN INTEGER I; FOR I := 0 STEP 1 UNTIL N-1 DO BEGIN KEYTABLE(I) :- NONE; VALTABLE(I) :- NONE; END; SIZE := 0; END CLEAR; ! DATA MEMBERS OF CLASS HASHMAP ; REF(HASHKEY) ARRAY KEYTABLE(0:N-1); REF(HASHVAL) ARRAY VALTABLE(0:N-1); INTEGER SIZE; END INNERHASHMAP; PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V; BEGIN IF IMAP.SIZE >= 0.75 * IMAP.N THEN BEGIN COMMENT RESIZE HASHMAP ; REF(INNERHASHMAP) NEWIMAP; REF(ITERATOR) IT; NEWIMAP :- NEW INNERHASHMAP(2 * IMAP.N); IT :- NEW ITERATOR(THIS HASHMAP); WHILE IT.MORE DO BEGIN REF(HASHKEY) KEY; KEY :- IT.NEXT; NEWIMAP.PUT(KEY, IMAP.GET(KEY)); END; IMAP.CLEAR; IMAP :- NEWIMAP; END; IMAP.PUT(K, V); END; REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K; GET :- IMAP.GET(K); PROCEDURE CLEAR; IMAP.CLEAR; INTEGER PROCEDURE SIZE; SIZE := IMAP.SIZE; REF(INNERHASHMAP) IMAP; IMAP :- NEW INNERHASHMAP(16); END HASHMAP; CLASS ITERATOR(H); REF(HASHMAP) H; BEGIN INTEGER POS,KEYCOUNT; BOOLEAN PROCEDURE MORE; MORE := KEYCOUNT < H.SIZE; REF(HASHKEY) PROCEDURE NEXT; BEGIN INSPECT H DO INSPECT IMAP DO BEGIN WHILE KEYTABLE(POS) == NONE DO POS := POS+1; NEXT :- KEYTABLE(POS); KEYCOUNT := KEYCOUNT+1; POS := POS+1; END; END NEXT; END ITERATOR; COMMENT ----- PROBLEM SPECIFIC CLASSES ; HASHKEY CLASS TEXTHASHKEY(T); VALUE T; TEXT T; BEGIN INTEGER PROCEDURE HASH; BEGIN INTEGER I; T.SETPOS(1); WHILE T.MORE DO I := 31*I+RANK(T.GETCHAR); HASH := I; END HASH; BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K; EQUALTO := T = K QUA TEXTHASHKEY.T; END TEXTHASHKEY; HASHVAL CLASS COUNTER; BEGIN INTEGER COUNT; END COUNTER; REF(INFILE) INF; REF(HASHMAP) MAP; REF(TEXTHASHKEY) KEY; REF(COUNTER) VAL; REF(ITERATOR) IT; TEXT LINE, WORD; INTEGER I, J, MAXCOUNT, LINENO; INTEGER ARRAY MAXCOUNTS(1:10); REF(TEXTHASHKEY) ARRAY MAXWORDS(1:10); WORD :- BLANKS(1000); MAP :- NEW HASHMAP; COMMENT MAP WORDS TO COUNTERS ; INF :- NEW INFILE("135-0.txt"); INF.OPEN(BLANKS(4096)); WHILE NOT INF.LASTITEM DO BEGIN BOOLEAN INWORD; PROCEDURE SAVE; BEGIN IF WORD.POS > 1 THEN BEGIN KEY :- NEW TEXTHASHKEY(WORD.SUB(1, WORD.POS - 1)); VAL :- MAP.GET(KEY); IF VAL == NONE THEN BEGIN VAL :- NEW COUNTER; MAP.PUT(KEY, VAL); END; VAL.COUNT := VAL.COUNT + 1; WORD := " "; WORD.SETPOS(1); END; END SAVE; LINENO := LINENO + 1; LINE :- COPY(INF.IMAGE).STRIP; INF.INIMAGE; COMMENT SEARCH WORDS IN LINE ; COMMENT A WORD IS ANY SEQUENCE OF LETTERS ; INWORD := FALSE; LINE.SETPOS(1); WHILE LINE.MORE DO BEGIN CHARACTER CH; CH := LINE.GETCHAR; IF CH >= 'a' AND CH <= 'z' THEN CH := CHAR(RANK(CH) - RANK('a') + RANK('A')); IF CH >= 'A' AND CH <= 'Z' THEN BEGIN IF NOT INWORD THEN BEGIN SAVE; INWORD := TRUE; END; WORD.PUTCHAR(CH); END ELSE BEGIN IF INWORD THEN BEGIN SAVE; INWORD := FALSE; END; END; END; SAVE; COMMENT LAST WORD ; END; INF.CLOSE; COMMENT FIND 10 MOST COMMON WORDS ; IT :- NEW ITERATOR(MAP); WHILE IT.MORE DO BEGIN KEY :- IT.NEXT; VAL :- MAP.GET(KEY); FOR I := 1 STEP 1 UNTIL 10 DO BEGIN IF VAL.COUNT >= MAXCOUNTS(I) THEN BEGIN FOR J := 10 STEP -1 UNTIL I + 1 DO BEGIN MAXCOUNTS(J) := MAXCOUNTS(J - 1); MAXWORDS(J) :- MAXWORDS(J - 1); END; MAXCOUNTS(I) := VAL.COUNT; MAXWORDS(I) :- KEY; GO TO BREAK; END; END; BREAK: END; COMMENT OUTPUT 10 MOST COMMON WORDS ; FOR I := 1 STEP 1 UNTIL 10 DO BEGIN IF MAXWORDS(I) =/= NONE THEN BEGIN OUTINT(MAXCOUNTS(I), 10); OUTTEXT(" "); OUTTEXT(MAXWORDS(I) QUA TEXTHASHKEY.T); OUTIMAGE; END; END; END