另一种称为皮尔逊相关系数,可以以评价者作为坐标轴将电影在坐标系内标出,计算最佳拟合线。该取值在 [-1,+1] 之间。




如上表格,是给其中一位名为Toby的评论者推荐他没有看过的电影的过程。首先运用欧几里得或者皮尔逊方法得到相似度值(Similarity 列)。然后用相似度值乘以每一位评论者的打分,从而得到一个新的“得分”。最后将得分总和除以相似度值的总和得到每部电影最终的“推荐值”。“推荐值”越高,越可能得到Toby的认可。



  1 # -*- coding: utf-8 -*-    2   3 # A dictionary of movie critics and their ratings of a small  4 # set of movies  5 critics = {Lisa Rose: {Lady in the Water: 2.5, Snakes on a Plane: 3.5,  6                          Just My Luck: 3.0, Superman Returns: 3.5, You, Me and Dupree: 2.5,  7                          The Night Listener: 3.0},  8            Gene Seymour: {Lady in the Water: 3.0, Snakes on a Plane: 3.5,  9                             Just My Luck: 1.5, Superman Returns: 5.0, The Night Listener: 3.0, 10                             You, Me and Dupree: 3.5}, 11            Michael Phillips: {Lady in the Water: 2.5, Snakes on a Plane: 3.0, 12                                 Superman Returns: 3.5, The Night Listener: 4.0}, 13            Claudia Puig: {Snakes on a Plane: 3.5, Just My Luck: 3.0, 14                             The Night Listener: 4.5, Superman Returns: 4.0, 15                             You, Me and Dupree: 2.5}, 16            Mick LaSalle: {Lady in the Water: 3.0, Snakes on a Plane: 4.0, 17                             Just My Luck: 2.0, Superman Returns: 3.0, The Night Listener: 3.0, 18                             You, Me and Dupree: 2.0}, 19            Jack Matthews: {Lady in the Water: 3.0, Snakes on a Plane: 4.0, 20                              The Night Listener: 3.0, Superman Returns: 5.0, You, Me and Dupree: 3.5}, 21            Toby: {Snakes on a Plane: 4.5, You, Me and Dupree: 1.0, Superman Returns: 4.0}} 22  23 from math import sqrt 24  25 def sim_distance(prefs, person1, person2): 26     si = {} 27     for item in prefs[person1]: 28         if item in prefs[person2]: 29             si[item] = 1 30  31     if len(si) == 0: return 0 32     sum_of_squares = sum( 33         [pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]]) 34     return 1 / (1 + sqrt(sum_of_squares)) 35  36  37 def sim_pearson(prefs, person1, person2): 38     si = {} 39     for item in prefs[person1]: 40         if item in prefs[person2]: 41             si[item] = 1 42  43     n = len(si) 44     if n == 0: return 1 45  46     sum1 = sum([prefs[person1][it] for it in si]) 47     sum2 = sum([prefs[person2][it] for it in si]) 48  49     sum1Sq = sum([pow(prefs[person1][it], 2) for it in si]) 50     sum2Sq = sum([pow(prefs[person2][it], 2) for it in si]) 51  52     pSum = sum([prefs[person1][it] * prefs[person2][it] for it in si]) 53  54     num = pSum - (sum1 * sum2 / n) 55     den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n)) 56     if den == 0: return 0 57  58     r = num / den 59     return r 60  61  62 # Returns the best matches for person from the prefs dictionary. 63 # Number of results and similarity function are optional params. 64 def topMatches(prefs, person, n=5, similarity=sim_pearson): 65     scores = [(similarity(prefs, person, other), other) for other in prefs if other != person] 66     # Sort the list so the highest scores appear at the top 67     scores.sort() 68     scores.reverse() 69     return scores[0:n] 70  71  72 # Gets recommendations for a person by using a weighted average 73 # of every other user‘s rankings 74 def getRecommendations(prefs, person, similarity=sim_pearson): 75     totals = {} 76     simSums = {} 77     for other in prefs: 78         # don‘t compare me to myself 79         if other == person: continue 80         sim = similarity(prefs, person, other) 81         # ignore scores of zero or lower 82         if sim <= 0: continue 83         for item in prefs[other]: 84             # only score movies I haven‘t seen yet 85             if item not in prefs[person] or prefs[person][item] == 0: 86                 # Similarity * Score 87                 totals.setdefault(item, 0) 88                 totals[item] += prefs[other][item] * sim 89                 # Sum of similarities 90                 simSums.setdefault(item, 0) 91                 simSums[item] += sim 92     # Create the normalized list 93     rankings = [(total / simSums[item], item) for item, total in totals.items()] 94     # Return the sorted list 95     rankings.sort() 96     rankings.reverse() 97     return rankings 98  99 if __name__ == __main__:100     print getRecommendations(critics,Toby,similarity=sim_pearson)
