首页 > 代码库 > 我的相似度一次迭代聚类
我的相似度一次迭代聚类
package MyCluster;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.io.PrintWriter;import java.io.UnsupportedEncodingException;import java.util.ArrayList;import java.util.Vector;public class SimpleCluster { /** * @param args */ public static void main(String[] args) throws Exception, IOException { // TODO Auto-generated method stub int Featurelenth=GetLenth(); double Theta=0.1; String Addr="raw/data.txt"; String OutPutAddr="raw/clusterresult.txt"; ArrayList<ClusterObject> ResultSet=Ini_ClusterLIST(Featurelenth); ResultSet=Get_Cluster_Result(Theta,ResultSet,Addr,OutPutAddr,Featurelenth); }//针对维度长,多于k个类 public static ArrayList<ClusterObject> Get_Cluster_Result(double Theta,ArrayList<ClusterObject> ResultSet,String Addr,String OutPutAddr,int Featurelenth) throws Exception, FileNotFoundException { BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(Addr),"utf-8")); String line=""; PrintWriter pw=new PrintWriter(OutPutAddr); while((line=reader.readLine())!=null) { Vector<Double> Line_Vector=StringToVector(line,Featurelenth); Distribute_OneVecter_to_Cluster(Theta,ResultSet,Line_Vector,Featurelenth,pw,line); } pw.close(); return ResultSet; } public static ArrayList<ClusterObject> Distribute_OneVecter_to_Cluster(double Theta,ArrayList<ClusterObject> ResultSet,Vector<Double> ToBeComfined,int Featurelenth,PrintWriter pw,String line) { double similaritymax=0; int NO_max=0; for(int i=0;i<ResultSet.size();i++) { double Similarity_cur=Similarity(ToBeComfined,ResultSet.get(i).center,Featurelenth); if(Similarity_cur>similaritymax) { NO_max=i; similaritymax= Similarity_cur; } } //System.out.println(similaritymax); if(similaritymax>Theta) { ResultSet.get(NO_max).setcenter( GetNewCenter(ResultSet.get(NO_max),Featurelenth,ToBeComfined)); ResultSet.get(NO_max).LIST.add(ToBeComfined); pw.write(NO_max+"\r\n"); } else { ClusterObject NewClu=IniClusterOj(1,ResultSet.size(),ToBeComfined); ResultSet.add(NewClu); pw.write(ResultSet.size()-1+"\r\n"); } return ResultSet; } public static Vector<Double> GetNewCenter(ClusterObject Cluster,int Featurelenth,Vector<Double> NewMenber) { Vector<Double> Pre=Cluster.getcenter(); Vector<Double> Fresh=new Vector<Double>(); for(int i=0;i<Featurelenth;i++) { Fresh.add((Pre.get(i)*Cluster.LIST.size()+NewMenber.get(i))/(Cluster.LIST.size()+1)); } return Fresh; } public static double Similarity(Vector<Double> A,Vector<Double> B,int Featurelenth) { double res=0; double Vec_Multi_Sum=0; for(int i=0;i<Featurelenth;i++) Vec_Multi_Sum=Vec_Multi_Sum+A.get(i)*B.get(i); res=Vec_Multi_Sum/(VectorLength(A)*VectorLength(B)); return res; } public static double VectorLength(Vector<Double> A) { double res=0; double sum=0; for(int i=0;i<A.size();i++) sum=sum+A.get(i)*A.get(i); res=Math.sqrt(sum); return res; } public static ArrayList<ClusterObject> Ini_ClusterLIST(int Featurelenth) throws IOException, FileNotFoundException { ArrayList<ClusterObject> ori=new ArrayList<ClusterObject>(); BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("ori/original.txt"),"utf-8")); String line=""; int i=0; while((line=reader.readLine())!=null) { Vector<Double> v=StringToVector(line,Featurelenth); ClusterObject ClusterOj=IniClusterOj(1,i++,v); ori.add(ClusterOj); } reader.close(); return ori; } public static ClusterObject IniClusterOj(int totalsum,int NO,Vector<Double> center) { ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>(); LIST.add(center); ClusterObject ClusterOj=new ClusterObject(totalsum,NO,LIST,center); return ClusterOj; } public static int GetLenth() throws IOException { BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("raw/data.txt"),"utf-8")); String line=""; while((line=reader.readLine())!=null) { line=reader.readLine(); String value[]=line.split(" "); return value.length-1; } reader.close(); return 0; } public static Vector<Double> StringToVector (String line,int Featurelenth) { Vector<Double> res=new Vector<Double>(); String value[]=line.split(" "); for(int i=1;i<=Featurelenth;i++) res.add(Double.valueOf(value[i])); return res; }}
package MyCluster;import java.util.ArrayList;import java.util.Vector;public class ClusterObject { public int totalsum; public int NO; public ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>(); public Vector<Double> center=new<Double> Vector(); public ClusterObject(int totalsum,int NO,ArrayList<Vector<Double>> LIST,Vector<Double> center){ this.totalsum=totalsum; this.NO=NO; this.LIST=LIST; this.center=center; } public int getNO() { return NO; } public Vector<Double> getcenter() { return center; } public void setNO(int X) { this.NO=X; } public void setcenter(Vector<Double> X) { this.center=X; } }
我的相似度一次迭代聚类
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。