# L-7 MCS 275 Wed 25 Jan 2017 : wordswardict.py

"""
The file "war_of_the_worlds.txt" was downloaded from
http://www.gutenberg.org/files/36/36.txt
"""

BOOK = "war_of_the_worlds.txt"

def word_count(name):
    """
    Opens the file with name and counts the
    number of words.  Anything that is separated
    by spaces is considered a word.
    """
    file = open(name, 'r')
    count = 0
    while True:
        line = file.readline()
        if line == '':
            break
        words = line.split(' ')
        count = count + len(words)
    file.close()
    return count

def word_frequencies(name):
    """
    Returns a dictionary with the frequencies
    of the words occurring on file with name.
    """
    file = open(name, 'r')
    result = {}
    while True:
        line = file.readline()
        if line == '':
            break
        words = line.split(' ')
        for word in words:
            if word in result:
                result[word] += 1
            else:
                result[word] = 1
    file.close()
    return result

def frequencies_of_words(freq):
    """
    Reverts the keys and values of the
    given dictionary freq.  Because several
    words may occur with the same frequency,
    the values are lists of words.
    """
    result = {}
    for key in freq:
        if freq[key] in result:
            result[freq[key]].append(key)
        else:
            result[freq[key]] = [key]
    return result

def main():
    """
    Analysis of words in a book.
    """
    cnt = word_count(BOOK)
    print('words counted :', cnt)
    freq = word_frequencies(BOOK)
    print('number of different words :', len(freq))
    invfreq = frequencies_of_words(freq)
    lstfreq = list(invfreq.items())
    lstfreq.sort(key=lambda e: e[0], reverse=True)
    print("words used more than 100 times :")
    for item in lstfreq:
        if item[0] < 100:
            break
        print(item)

if __name__ == "__main__":
    main()
