input is a file containng a text
Read each line of the text
break lines into words
make a dictionary
keys: words
values: frequencies
for each word:
check if it's in the keys. If it is, increment its value.
if not, enroll it with a vlaue of 1
make a file filename.conc
write the results to the output file
from sys import argv
def sanitize_word(w):
out = ""
for letter in w:
if letter.isalnum() or letter == "'":
out += letter
return out
def read_words(filename):
out = {}
with open(filename, "r") as fp:
for line in fp:
line = line.lower()
words = line.split()
for w in words:
w = sanitize_word(w)
if w in out:
out[w] += 1
else:
out[w] = 1
return out
def write_dict(dictionary, filename):
sorted_keys = sorted(dictionary.keys())
with open(filename, "w") as fp:
for word in sorted_keys:
fp.write(f"{word}\t\t\t{dictionary[word]}\n")
def main():
input_file = argv[1]
output_file = argv[1] + ".conc"
glob = read_words(input_file)
write_dict(glob, output_file)
main()