17 lines
868 B
Python
17 lines
868 B
Python
def stretch_stats(url='http://ucrel.lancs.ac.uk/bncfreq/lists/1_2_all_freq.txt'):
|
|
freq = [line.strip().lower().split()
|
|
for line in urllib.request.urlopen(url)
|
|
if len(line.strip().split()) == 3]
|
|
wordfreq = [(word.decode(), int(frq))
|
|
for word, pos, frq in freq[1:]
|
|
if (b'ie' in word) or (b'ei' in word)]
|
|
cie = sum(frq for word, frq in wordfreq if 'cie' in word)
|
|
cei = sum(frq for word, frq in wordfreq if 'cei' in word)
|
|
not_c_ie = sum(frq for word, frq in wordfreq if re.search(r'(^ie|[^c]ie)', word))
|
|
not_c_ei = sum(frq for word, frq in wordfreq if re.search(r'(^ei|[^c]ei)', word))
|
|
return cei, cie, not_c_ie, not_c_ei
|
|
|
|
print('\n\nChecking plausibility of "I before E except after C"')
|
|
print('And taking account of word frequencies in British English:')
|
|
print_result(*stretch_stats())
|