Added new articles.
# Download WordNet database one time:
# import nltk
# nltk.download()
from nltk.stem import WordNetLemmatizer
lemmatiser = WordNetLemmatizer()
import enchant
dict = enchant.Dict('en_US')
FREQ = {}
def add(word, cnt):
if word in FREQ:
FREQ[word] += cnt
else:
FREQ[word] = cnt
in_ = open('oanc.txt', 'r')
sum = 0
for line in in_:
(cnt, word) = line.split('\t')
cnt = int(cnt)
word = word.strip()
quote = word.find("'")
if quote >= 0:
word = word[:quote]
if len(word) == 0:
continue
if not dict.check(word):
continue
word_n = lemmatiser.lemmatize(word, pos='n')
word_v = lemmatiser.lemmatize(word, pos='v')
word_a = lemmatiser.lemmatize(word, pos='a')
word_r = lemmatiser.lemmatize(word, pos='r')
sum += 1
if word_n != word:
add(word_n, cnt)
continue
if word_v != word:
add(word_v, cnt)
continue
if word_a != word:
add(word_a, cnt)
continue
if word_r != word:
add(word_r, cnt)
continue
add(word, cnt)
in_.close()
print(sum)
out = open('oanc.new.txt', 'w')
for word, cnt in FREQ.items():
out.write("{} {}\n".format(cnt, word))
out.close()