RosettaCodeData/Task/Word-frequency/Liberty-BASIC/word-frequency.basic

67 lines
2.1 KiB
Plaintext

dim words$(100000,2)'words$(a,1)=the word, words$(a,2)=the count
dim lines$(150000)
open "135-0.txt" for input as #txt
while EOF(#txt)=0 and total < 150000
input #txt, lines$(total)
total=total+1
wend
for a = 1 to total
token$ = "?"
index=0
new=0
while token$ <> ""
new=0
index = index + 1
token$ = lower$(word$(lines$(a),index))
token$=replstr$(token$,".","")
token$=replstr$(token$,",","")
token$=replstr$(token$,";","")
token$=replstr$(token$,"!","")
token$=replstr$(token$,"?","")
token$=replstr$(token$,"-","")
token$=replstr$(token$,"_","")
token$=replstr$(token$,"~","")
token$=replstr$(token$,"+","")
token$=replstr$(token$,"0","")
token$=replstr$(token$,"1","")
token$=replstr$(token$,"2","")
token$=replstr$(token$,"3","")
token$=replstr$(token$,"4","")
token$=replstr$(token$,"5","")
token$=replstr$(token$,"6","")
token$=replstr$(token$,"7","")
token$=replstr$(token$,"8","")
token$=replstr$(token$,"9","")
token$=replstr$(token$,"/","")
token$=replstr$(token$,"<","")
token$=replstr$(token$,">","")
token$=replstr$(token$,":","")
for b = 1 to newwordcount
if words$(b,1)=token$ then
num=val(words$(b,2))+1
num$=str$(num)
if len(num$)=1 then num$="0000"+num$
if len(num$)=2 then num$="000"+num$
if len(num$)=3 then num$="00"+num$
if len(num$)=4 then num$="0"+num$
words$(b,2)=num$
new=1
exit for
end if
next b
if new<>1 then newwordcount=newwordcount+1:words$(newwordcount,1)=token$:words$(newwordcount,2)="00001":print newwordcount;" ";token$
wend
next a
print
sort words$(), 1, newwordcount, 2
print "Count Word"
print "===== ================="
for a = newwordcount to newwordcount-10 step -1
print words$(a,2);" ";words$(a,1)
next a
print "-----------------------"
print newwordcount;" unique words found."
print "End of program"
close #txt
end