首页 > 代码库 > 我的相似度一次迭代聚类

我的相似度一次迭代聚类

package MyCluster;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.io.PrintWriter;import java.io.UnsupportedEncodingException;import java.util.ArrayList;import java.util.Vector;public class SimpleCluster {	/**	 * @param args	 */		public static void main(String[] args) throws Exception, IOException {		// TODO Auto-generated method stub		int Featurelenth=GetLenth();						double Theta=0.1;		String Addr="raw/data.txt";		String OutPutAddr="raw/clusterresult.txt";				ArrayList<ClusterObject> ResultSet=Ini_ClusterLIST(Featurelenth);		ResultSet=Get_Cluster_Result(Theta,ResultSet,Addr,OutPutAddr,Featurelenth);	}//针对维度长,多于k个类	public static ArrayList<ClusterObject> Get_Cluster_Result(double Theta,ArrayList<ClusterObject> ResultSet,String Addr,String OutPutAddr,int Featurelenth) throws Exception, FileNotFoundException	{		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(Addr),"utf-8"));		String line="";		PrintWriter pw=new PrintWriter(OutPutAddr);		while((line=reader.readLine())!=null)		{			Vector<Double> Line_Vector=StringToVector(line,Featurelenth);			Distribute_OneVecter_to_Cluster(Theta,ResultSet,Line_Vector,Featurelenth,pw,line);		}		pw.close();		return ResultSet;	}	public static ArrayList<ClusterObject> Distribute_OneVecter_to_Cluster(double Theta,ArrayList<ClusterObject> ResultSet,Vector<Double> ToBeComfined,int Featurelenth,PrintWriter pw,String line) 	{			    double similaritymax=0;		    int NO_max=0;	    	for(int i=0;i<ResultSet.size();i++)	    	{	    		double Similarity_cur=Similarity(ToBeComfined,ResultSet.get(i).center,Featurelenth);	    		if(Similarity_cur>similaritymax)	    		{  NO_max=i;	    		   similaritymax= Similarity_cur;	    		}	    	}	    	//System.out.println(similaritymax);	    	if(similaritymax>Theta)	    	{	    	 ResultSet.get(NO_max).setcenter( GetNewCenter(ResultSet.get(NO_max),Featurelenth,ToBeComfined));	    	 ResultSet.get(NO_max).LIST.add(ToBeComfined);	    	 	    	 pw.write(NO_max+"\r\n");	    	}	    	else	    	{	    		ClusterObject NewClu=IniClusterOj(1,ResultSet.size(),ToBeComfined);		    		ResultSet.add(NewClu);	    			    		pw.write(ResultSet.size()-1+"\r\n");	    	}	    	return ResultSet;	}	public static Vector<Double> GetNewCenter(ClusterObject Cluster,int Featurelenth,Vector<Double> NewMenber)	{		Vector<Double> Pre=Cluster.getcenter();		Vector<Double> Fresh=new Vector<Double>();		for(int i=0;i<Featurelenth;i++)		{			Fresh.add((Pre.get(i)*Cluster.LIST.size()+NewMenber.get(i))/(Cluster.LIST.size()+1));		}			return Fresh;	}	public static double Similarity(Vector<Double> A,Vector<Double> B,int Featurelenth)	{		double res=0;		double Vec_Multi_Sum=0;		for(int i=0;i<Featurelenth;i++)			Vec_Multi_Sum=Vec_Multi_Sum+A.get(i)*B.get(i);		    res=Vec_Multi_Sum/(VectorLength(A)*VectorLength(B));		return res;	}	public static double VectorLength(Vector<Double> A)	{		double res=0;		double sum=0;		for(int i=0;i<A.size();i++)			sum=sum+A.get(i)*A.get(i);		    res=Math.sqrt(sum);		return res;	}	public static ArrayList<ClusterObject> Ini_ClusterLIST(int Featurelenth) throws IOException, FileNotFoundException	{		ArrayList<ClusterObject> ori=new ArrayList<ClusterObject>();		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("ori/original.txt"),"utf-8"));		String line="";		int i=0;		while((line=reader.readLine())!=null)		{		   Vector<Double> v=StringToVector(line,Featurelenth);		   ClusterObject ClusterOj=IniClusterOj(1,i++,v);		   ori.add(ClusterOj);		}				reader.close();		return ori;	}	public static ClusterObject IniClusterOj(int totalsum,int NO,Vector<Double> center)	{		ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();		LIST.add(center);		ClusterObject ClusterOj=new ClusterObject(totalsum,NO,LIST,center);		return ClusterOj;	}	public static int GetLenth() throws IOException	{		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("raw/data.txt"),"utf-8"));		String line="";		while((line=reader.readLine())!=null)		{		 line=reader.readLine();		 String value[]=line.split(" ");		 return value.length-1;		}				reader.close();		return 0;	}	public static Vector<Double> StringToVector (String line,int Featurelenth)	{		Vector<Double> res=new Vector<Double>();		String value[]=line.split(" ");		for(int i=1;i<=Featurelenth;i++)			res.add(Double.valueOf(value[i]));					return res;	}}

package MyCluster;import java.util.ArrayList;import java.util.Vector;public class ClusterObject {	 public int totalsum;	 public int NO;	 public ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();	 public Vector<Double> center=new<Double> Vector();	 	 public  ClusterObject(int totalsum,int NO,ArrayList<Vector<Double>> LIST,Vector<Double> center){		 this.totalsum=totalsum;		 this.NO=NO;		 this.LIST=LIST;		 this.center=center;		 }	 public int getNO()	 {  return NO;	 	 }	 public Vector<Double> getcenter()	 {  return center;	 	 }	 public void setNO(int X)	 {		 this.NO=X;	 }	 public void setcenter(Vector<Double> X)	 {		 this.center=X;	 }	 	 }

 

 

我的相似度一次迭代聚类