22 September 2021

input is a file containng a text

Read each line of the text
break lines into words
make a dictionary
    keys: words
    values: frequencies

for each word:
    check if it's in the keys. If it is, increment its value.
    if not, enroll it with a vlaue of 1

make a file filename.conc
write the results to the output file


from sys import argv
def sanitize_word(w):
    out = ""
    for letter in w:
        if letter.isalnum() or letter == "'":
            out += letter
    return out
def read_words(filename):
    out = {}
    with open(filename, "r") as fp:
        for line in fp:
            line = line.lower()
            words = line.split()
            for w in words:
                w = sanitize_word(w)
                if w in out:
                    out[w] += 1
                else:
                    out[w] = 1
    return out    
def write_dict(dictionary, filename):
    sorted_keys = sorted(dictionary.keys())
    with open(filename, "w") as fp:
        for word in sorted_keys:
            fp.write(f"{word}\t\t\t{dictionary[word]}\n")

def main():
    input_file = argv[1]
    output_file = argv[1] + ".conc"
    glob = read_words(input_file)
    write_dict(glob, output_file)
main()