280 lines
7.3 KiB
Plaintext
280 lines
7.3 KiB
Plaintext
COMMENT COMPILE WITH
|
|
$ cim -m64 word-count.sim
|
|
;
|
|
BEGIN
|
|
|
|
COMMENT ----- CLASSES FOR GENERAL USE ;
|
|
|
|
! ABSTRACT HASH KEY TYPE ;
|
|
CLASS HASHKEY;
|
|
VIRTUAL:
|
|
PROCEDURE HASH IS
|
|
INTEGER PROCEDURE HASH;;
|
|
PROCEDURE EQUALTO IS
|
|
BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K;;
|
|
BEGIN
|
|
END HASHKEY;
|
|
|
|
! ABSTRACT HASH VALUE TYPE ;
|
|
CLASS HASHVAL;
|
|
BEGIN
|
|
! THERE IS NOTHING REQUIRED FOR THE VALUE TYPE ;
|
|
END HASHVAL;
|
|
|
|
CLASS HASHMAP;
|
|
BEGIN
|
|
CLASS INNERHASHMAP(N); INTEGER N;
|
|
BEGIN
|
|
|
|
INTEGER PROCEDURE INDEX(K); REF(HASHKEY) K;
|
|
BEGIN
|
|
INTEGER I;
|
|
IF K == NONE THEN
|
|
ERROR("HASHMAP.INDEX: NONE IS NOT A VALID KEY");
|
|
I := MOD(K.HASH,N);
|
|
LOOP:
|
|
IF KEYTABLE(I) == NONE OR ELSE KEYTABLE(I).EQUALTO(K) THEN
|
|
INDEX := I
|
|
ELSE BEGIN
|
|
I := IF I+1 = N THEN 0 ELSE I+1;
|
|
GO TO LOOP;
|
|
END;
|
|
END INDEX;
|
|
|
|
! PUT SOMETHING IN ;
|
|
PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V;
|
|
BEGIN
|
|
INTEGER I;
|
|
IF V == NONE THEN
|
|
ERROR("HASHMAP.PUT: NONE IS NOT A VALID VALUE");
|
|
I := INDEX(K);
|
|
IF KEYTABLE(I) == NONE THEN BEGIN
|
|
IF SIZE = N THEN
|
|
ERROR("HASHMAP.PUT: TABLE FILLED COMPLETELY");
|
|
KEYTABLE(I) :- K;
|
|
VALTABLE(I) :- V;
|
|
SIZE := SIZE+1;
|
|
END ELSE
|
|
VALTABLE(I) :- V;
|
|
END PUT;
|
|
|
|
! GET SOMETHING OUT ;
|
|
REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K;
|
|
BEGIN
|
|
INTEGER I;
|
|
IF K == NONE THEN
|
|
ERROR("HASHMAP.GET: NONE IS NOT A VALID KEY");
|
|
I := INDEX(K);
|
|
IF KEYTABLE(I) == NONE THEN
|
|
GET :- NONE ! ERROR("HASHMAP.GET: KEY NOT FOUND");
|
|
ELSE
|
|
GET :- VALTABLE(I);
|
|
END GET;
|
|
|
|
PROCEDURE CLEAR;
|
|
BEGIN
|
|
INTEGER I;
|
|
FOR I := 0 STEP 1 UNTIL N-1 DO BEGIN
|
|
KEYTABLE(I) :- NONE;
|
|
VALTABLE(I) :- NONE;
|
|
END;
|
|
SIZE := 0;
|
|
END CLEAR;
|
|
|
|
! DATA MEMBERS OF CLASS HASHMAP ;
|
|
REF(HASHKEY) ARRAY KEYTABLE(0:N-1);
|
|
REF(HASHVAL) ARRAY VALTABLE(0:N-1);
|
|
INTEGER SIZE;
|
|
|
|
END INNERHASHMAP;
|
|
|
|
PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V;
|
|
BEGIN
|
|
IF IMAP.SIZE >= 0.75 * IMAP.N THEN
|
|
BEGIN
|
|
COMMENT RESIZE HASHMAP ;
|
|
REF(INNERHASHMAP) NEWIMAP;
|
|
REF(ITERATOR) IT;
|
|
NEWIMAP :- NEW INNERHASHMAP(2 * IMAP.N);
|
|
IT :- NEW ITERATOR(THIS HASHMAP);
|
|
WHILE IT.MORE DO
|
|
BEGIN
|
|
REF(HASHKEY) KEY;
|
|
KEY :- IT.NEXT;
|
|
NEWIMAP.PUT(KEY, IMAP.GET(KEY));
|
|
END;
|
|
IMAP.CLEAR;
|
|
IMAP :- NEWIMAP;
|
|
END;
|
|
IMAP.PUT(K, V);
|
|
END;
|
|
|
|
REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K;
|
|
GET :- IMAP.GET(K);
|
|
|
|
PROCEDURE CLEAR;
|
|
IMAP.CLEAR;
|
|
|
|
INTEGER PROCEDURE SIZE;
|
|
SIZE := IMAP.SIZE;
|
|
|
|
REF(INNERHASHMAP) IMAP;
|
|
|
|
IMAP :- NEW INNERHASHMAP(16);
|
|
END HASHMAP;
|
|
|
|
CLASS ITERATOR(H); REF(HASHMAP) H;
|
|
BEGIN
|
|
INTEGER POS,KEYCOUNT;
|
|
|
|
BOOLEAN PROCEDURE MORE;
|
|
MORE := KEYCOUNT < H.SIZE;
|
|
|
|
REF(HASHKEY) PROCEDURE NEXT;
|
|
BEGIN
|
|
INSPECT H DO
|
|
INSPECT IMAP DO
|
|
BEGIN
|
|
WHILE KEYTABLE(POS) == NONE DO
|
|
POS := POS+1;
|
|
NEXT :- KEYTABLE(POS);
|
|
KEYCOUNT := KEYCOUNT+1;
|
|
POS := POS+1;
|
|
END;
|
|
END NEXT;
|
|
|
|
END ITERATOR;
|
|
|
|
COMMENT ----- PROBLEM SPECIFIC CLASSES ;
|
|
|
|
HASHKEY CLASS TEXTHASHKEY(T); VALUE T; TEXT T;
|
|
BEGIN
|
|
INTEGER PROCEDURE HASH;
|
|
BEGIN
|
|
INTEGER I;
|
|
T.SETPOS(1);
|
|
WHILE T.MORE DO
|
|
I := 31*I+RANK(T.GETCHAR);
|
|
HASH := I;
|
|
END HASH;
|
|
BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K;
|
|
EQUALTO := T = K QUA TEXTHASHKEY.T;
|
|
END TEXTHASHKEY;
|
|
|
|
HASHVAL CLASS COUNTER;
|
|
BEGIN
|
|
INTEGER COUNT;
|
|
END COUNTER;
|
|
|
|
REF(INFILE) INF;
|
|
REF(HASHMAP) MAP;
|
|
REF(TEXTHASHKEY) KEY;
|
|
REF(COUNTER) VAL;
|
|
REF(ITERATOR) IT;
|
|
TEXT LINE, WORD;
|
|
INTEGER I, J, MAXCOUNT, LINENO;
|
|
INTEGER ARRAY MAXCOUNTS(1:10);
|
|
REF(TEXTHASHKEY) ARRAY MAXWORDS(1:10);
|
|
|
|
WORD :- BLANKS(1000);
|
|
MAP :- NEW HASHMAP;
|
|
|
|
COMMENT MAP WORDS TO COUNTERS ;
|
|
|
|
INF :- NEW INFILE("135-0.txt");
|
|
INF.OPEN(BLANKS(4096));
|
|
WHILE NOT INF.LASTITEM DO
|
|
BEGIN
|
|
BOOLEAN INWORD;
|
|
|
|
PROCEDURE SAVE;
|
|
BEGIN
|
|
IF WORD.POS > 1 THEN
|
|
BEGIN
|
|
KEY :- NEW TEXTHASHKEY(WORD.SUB(1, WORD.POS - 1));
|
|
VAL :- MAP.GET(KEY);
|
|
IF VAL == NONE THEN
|
|
BEGIN
|
|
VAL :- NEW COUNTER;
|
|
MAP.PUT(KEY, VAL);
|
|
END;
|
|
VAL.COUNT := VAL.COUNT + 1;
|
|
WORD := " ";
|
|
WORD.SETPOS(1);
|
|
END;
|
|
END SAVE;
|
|
|
|
LINENO := LINENO + 1;
|
|
LINE :- COPY(INF.IMAGE).STRIP; INF.INIMAGE;
|
|
|
|
COMMENT SEARCH WORDS IN LINE ;
|
|
COMMENT A WORD IS ANY SEQUENCE OF LETTERS ;
|
|
|
|
INWORD := FALSE;
|
|
LINE.SETPOS(1);
|
|
WHILE LINE.MORE DO
|
|
BEGIN
|
|
CHARACTER CH;
|
|
CH := LINE.GETCHAR;
|
|
IF CH >= 'a' AND CH <= 'z' THEN
|
|
CH := CHAR(RANK(CH) - RANK('a') + RANK('A'));
|
|
IF CH >= 'A' AND CH <= 'Z' THEN
|
|
BEGIN
|
|
IF NOT INWORD THEN
|
|
BEGIN
|
|
SAVE;
|
|
INWORD := TRUE;
|
|
END;
|
|
WORD.PUTCHAR(CH);
|
|
END ELSE
|
|
BEGIN
|
|
IF INWORD THEN
|
|
BEGIN
|
|
SAVE;
|
|
INWORD := FALSE;
|
|
END;
|
|
END;
|
|
END;
|
|
SAVE; COMMENT LAST WORD ;
|
|
END;
|
|
INF.CLOSE;
|
|
|
|
COMMENT FIND 10 MOST COMMON WORDS ;
|
|
|
|
IT :- NEW ITERATOR(MAP);
|
|
WHILE IT.MORE DO
|
|
BEGIN
|
|
KEY :- IT.NEXT;
|
|
VAL :- MAP.GET(KEY);
|
|
FOR I := 1 STEP 1 UNTIL 10 DO
|
|
BEGIN
|
|
IF VAL.COUNT >= MAXCOUNTS(I) THEN
|
|
BEGIN
|
|
FOR J := 10 STEP -1 UNTIL I + 1 DO
|
|
BEGIN
|
|
MAXCOUNTS(J) := MAXCOUNTS(J - 1);
|
|
MAXWORDS(J) :- MAXWORDS(J - 1);
|
|
END;
|
|
MAXCOUNTS(I) := VAL.COUNT;
|
|
MAXWORDS(I) :- KEY;
|
|
GO TO BREAK;
|
|
END;
|
|
END;
|
|
BREAK:
|
|
END;
|
|
|
|
COMMENT OUTPUT 10 MOST COMMON WORDS ;
|
|
|
|
FOR I := 1 STEP 1 UNTIL 10 DO
|
|
BEGIN
|
|
IF MAXWORDS(I) =/= NONE THEN
|
|
BEGIN
|
|
OUTINT(MAXCOUNTS(I), 10);
|
|
OUTTEXT(" ");
|
|
OUTTEXT(MAXWORDS(I) QUA TEXTHASHKEY.T);
|
|
OUTIMAGE;
|
|
END;
|
|
END;
|
|
|
|
END
|