# L-7 MCS 275 Wed 27 Jan 2010 : wordswardict.py # The file "war_of_the_worlds.txt" was downloaded from # http://www.gutenberg.org/files/36/36.txt book = "war_of_the_worlds.txt" def word_count(name): """ Opens the file with name and counts the number of words. Anything that is separated by spaces is considered a word. """ f = open(name,'r') cnt = 0 while True: s = f.readline() if s =="": break L = s.split(' ') cnt = cnt + len(L) f.close() return cnt def word_frequencies(name): """ Returns a dictionary with the frequencies of the words occurring on file with name. """ f = open(name,'r') D = {} while True: s = f.readline() if s =="": break L = s.split(' ') for e in L: if D.has_key(e): D[e] = D[e] + 1 else: D[e] = 1 f.close() return D def frequencies_of_words(D): """ Reverts the keys and values of the given dictionary D. Because several words may occur with the same frequency, the values are lists of words. """ F = {} for k in D: if F.has_key(D[k]): F[D[k]].append(k) else: F[D[k]] = [k] return F def main(): """ Analysis of words in a book. """ cnt = word_count(book) print "words counted : ", cnt D = word_frequencies(book) print "number of different words :", len(D) # print "frequency table of words :", D F = frequencies_of_words(D) # print "words of most frequencies :", F L = F.items() L.sort(key=lambda e: e[0],reverse=True) print "words used more than 100 times :" for e in L: if e[0] < 100: break print e main()