首页 > 代码库 > python学习笔记 python实现k-means聚类

python学习笔记 python实现k-means聚类

技术分享
  1 # -*- coding: utf-8 -*-
  2 """
  3 Created on Thu Mar 16 14:52:58 2017
  4 
  5 @author: Jarvis
  6 """
  7 import tensorflow as tf
  8 import numpy as np
  9 import pandas as pd
 10 import math
 11 import random
 12 from pandas import Series,DataFrame
 13 def cal_dis(a,b):
 14     sum = 0
 15     for x,y in zip(a,b):
 16         sum = sum+(x-y)*(x-y)
 17     return math.sqrt(sum)
 18 
 19 def is_same_series(a,b):
 20     
 21     for x,y in zip(a,b):
 22         if x != y:
 23             return False
 24     return True
 25 def is_constant_vec(a,b):
 26     if len(a) == 0 or len(b) == 0:
 27         return False
 28     
 29     for x,y in zip(a,b):
 30         if not is_same_series(x,y):
 31             return False 
 32     
 33     return True 
 34 
 35 def init_typeSet(init_set,type_num):
 36     for i in range(type_num):
 37         init_set.append([])
 38     return init_set
 39 
 40 def Mindis_type_no(x,vecs,typ_num = 4):
 41     mindis = cal_dis(x,vecs[0])
 42     ans = 0
 43     for i in range(1,type_num):
 44         tmp  = cal_dis(x,vecs[i])
 45         if mindis > tmp:
 46             ans= i
 47             mindis = cal_dis(x,vecs[i])
 48     return ans
 49 
 50 def reduce_mean(vecs):
 51     
 52     reduce_vec = vecs[0].copy()
 53     print(reduce_vec)
 54     for i in range(1,len(vecs)):
 55         print (vecs[i][5])
 56         print (reduce_vec[5])
 57         reduce_vec = reduce_vec+vecs[i]
 58     
 59         print (reduce_vec[5])
 60     
 61     reduce_vec = reduce_vec/len(vecs)
 62 #    print(reduce_vec)
 63     return reduce_vec
 64 def get_vecs(sets):
 65     vecs = []
 66     for i in sets:
 67         vecs.append(data.ix[i])
 68     return vecs
 69 
 70 raw_data_file = pd.read_csv(NDVI_NDWI_all.csv,header = None,encoding = gbk)
 71 data =http://www.mamicode.com/ (raw_data_file)
 72 del data[0]
 73 del data[1]
 74 del data[2]
 75 
 76 type_num = 4
 77 init_type_vec = []#类质心
 78 tmp_set = set([])
 79 data_size = len(data)
 80 
 81 while (len(tmp_set) < type_num):
 82      tmp_set.add(random.choice(range(data_size)))
 83 
 84 for i in tmp_set:
 85     tmp = data.ix[i]
 86     init_type_vec.append(tmp)
 87 
 88 pre_vec = []
 89 #print (is_constant_vec(pre_vec,init_type_vec))
 90 
 91 while( not is_constant_vec(pre_vec,init_type_vec)):
 92     type_set = []
 93     type_set = init_typeSet(type_set,type_num)
 94     for j in range(len(data)):
 95         tmp_type = Mindis_type_no(data.ix[j],init_type_vec)
 96         type_set[tmp_type].append(j)
 97         #type_set[tmp_type].append(data.ix[j])
 98     if(len(pre_vec) == 0):
 99         pre_vec = init_type_vec.copy()
100     else:
101         for i in range(type_num):
102             pre_vec[i] = init_type_vec[i]
103             need_cal_vecs = get_ves(type_set[i])
104             init_type_vec[i] = reduce_mean(need_cal_vecs).copy()
105 for i in range(type_num):
106     print(--------------------------------)
107     print(type_set[i])
108     
109     
110 with open(output.txt,"w"):
111     for i in range(type_num):
112         print ("type %d"%i)
113         for j in type_set[i]:
114             print(j)
115     
116 #print(reduce_mean(type_set))
117 #for i in range(type_num):
118      #   pre_vec[i] = tf.reduce_mean()
View Code

 

python学习笔记 python实现k-means聚类