# L-6 MCS 275 25 Jan 2010 : kmeans.py # Reads the file generated by makedata.py, shuffles the list, # and applies the k-means algorithm. import random, turtle from math import sqrt from makedata import Distance, Centroids, Radii, ShowData, ShowCentroids def ReadList(file): """ Attempts to read a list from file and will return this list. Returns the empty list if exceptions occur. """ try: s = file.readline() L = eval(s) if isinstance(L,list): return L else: print s print "no list on file..." return [] except: print "exception occurred during reading..." return [] def ReadData(): """ Prompts the user for a name of a file, tries to open the file and to read a list. If all goes well, the list is returned. """ L = [] while True: name = raw_input("give file name : ") try: f = open(name,'r') L = ReadList(f) f.close() return L except IOError: print 'cound not open file ' + name print 'please try again' return L def Flatten(L): """ Flattens the list of lists into one list. """ R = [] for each in L: for e in each: R.append(e) return R def ClassifyPoint(C,p): """ Returns the index of the point q in C, closest to C. """ d = [Distance(p,q) for q in C] m = min(d) return d.index(m) def ClassifyData(L,k,C): """ Returns a lists of k lists, using the centroids in C to classify all points in L. """ R = [ [] for i in range(0,k) ] for p in L: i = ClassifyPoint(C,p) R[i].append(p) return R def kmeans(L,k): """ Applies the K-means algorithm 3 times to divide the data in L into k clusters. """ C0 = random.sample(L,k) # k unique random elements from L K0 = ClassifyData(L,k,C0) C1 = Centroids(K0); R1 = Radii(K0,C1) turtle.color('red','red') ShowCentroids(C1,R1) ans = raw_input("hit enter to continue ...") K1 = ClassifyData(L,k,C1) C2 = Centroids(K1); R2 = Radii(K1,C2) turtle.color('green','green') ShowCentroids(C2,R2) ans = raw_input("hit enter to continue ...") K2 = ClassifyData(L,k,C2) C3 = Centroids(K2); R3 = Radii(K2,C3) turtle.color('blue','blue') ShowCentroids(C3,R3) def main(): """ Prompts the user for a file to read a list of lists of points of integer coordinates (generated by makedata). """ D = ReadData() ShowData(D) L = Flatten(D) print "the flattened list : ", L random.shuffle(L) k = input("Give number of clusters : "); kmeans(L,k) ans = raw_input("exit ? (y/n) ") main()