首页 > 代码库 > mahout推荐12-相似度方法汇总

mahout推荐12-相似度方法汇总

将各个计算用户相似度的方法弄过来了,可以参考下。实际运行代码

数据文件 intro.csv内容: 直接复制就行了

1,101,5.0
1,102,3.0
1,103,2.5

2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0

3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0

4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0

5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0

 

代码:都带有注释的。结果就不发了。

package mahout;import java.io.File;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.common.Weighting;import org.apache.mahout.cf.taste.eval.DataModelBuilder;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;import org.apache.mahout.cf.taste.impl.common.FastByIDMap;import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity;import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.model.PreferenceArray;import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.similarity.UserSimilarity;import org.apache.mahout.common.RandomUtils;/** *  * @author Administrator * */public class TestRecommenderEvaluator2 {	public static void main(String[] args) throws Exception {		//强制每次生成相同的随机值,生成可重复的结果		RandomUtils.useTestSeed();		//数据装填,无偏好值的处理		//DataModel dataModel = new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File("data/ua.base"))));		DataModel dataModel = new FileDataModel(new File("data/intro.csv"));				//推荐评估,使用平均值		RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();		//推荐评估,使用均方差		//RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();		//用于生成推荐引擎的构建器,与上一例子实现相同		RecommenderBuilder builder = new RecommenderBuilder() {						public Recommender buildRecommender(DataModel model) throws TasteException {				// TODO Auto-generated method stub				//用户相似度,多种方法				//皮尔逊相关系数,未引入权重,同余弦相似度				//UserSimilarity similarity = new PearsonCorrelationSimilarity(model);				// 皮尔逊相关系数,引入了权重				//UserSimilarity similarity = new PearsonCorrelationSimilarity(model,Weighting.WEIGHTED);				// 欧式距离定义相似度				//UserSimilarity similarity = new EuclideanDistanceSimilarity(model);				// 斯皮尔曼相关系数				//UserSimilarity similarity = new SpearmanCorrelationSimilarity(model);				// 斯皮尔曼相关系数 缓存级别的				//UserSimilarity similarity = new CachingUserSimilarity(new SpearmanCorrelationSimilarity(model), model);				// 谷本系数(忽略偏好值的)				// UserSimilarity similarity = new TanimotoCoefficientSimilarity(model);				// 对数似然法				UserSimilarity similarity = new LogLikelihoodSimilarity(model);				//用户邻居				UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);				//一个推荐器				return new GenericUserBasedRecommender(model, neighborhood, similarity);			}		};		/*DataModelBuilder modelBuilder = new DataModelBuilder() {						public DataModel buildDataModel(FastByIDMap<PreferenceArray> arg0) {				// TODO Auto-generated method stub				return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(arg0));			}		};*/		//推荐程序评估值(平均差值)训练90%的数据,测试数据10%,《mahout in Action》使用的是0.7,但是出现结果为NaN		double score = evaluator.evaluate(builder, null, dataModel, 0.9, 1.0);		System.out.println(score);	}}