转:TopN推荐系统——推荐的实现与推荐效果的评价指标

时间:2022-02-28 09:35:10

转自:用户推荐系统_python 代码-豆瓣
书籍:项亮的<推荐系统实践>

import random
import math

class UserBasedCF:
def __init__(self,train = None,test = None):
self.trainfile
= train
self.testfile
= test
self.readData()

def readData(self,train = None,test = None):
self.trainfile
= train or self.trainfile
self.testfile
= test or self.testfile
self.traindata
= {}
self.testdata
= {}
for line in open(self.trainfile):
userid,itemid,record,_
= line.split()
self.traindata.setdefault(userid,{})
self.traindata[userid][itemid]
=record
for line in open(self.testfile):
userid,itemid,record,_
= line.split()
self.testdata.setdefault(userid,{})
self.testdata[userid][itemid]
=record

def userSimilarityBest(self,train = None):
train
= train or self.traindata
self.userSimBest
= dict()
item_users
= dict()
for u,item in train.items():
for i in item.keys():
item_users.setdefault(i,set())
item_users[i].add(u)
user_item_count
= dict()
count
= dict()
for item,users in item_users.items():
for u in users:
user_item_count.setdefault(u,0)
user_item_count[u]
+= 1
for v in users:
if u == v:continue
count.setdefault(u,{})
count[u].setdefault(v,0)
count[u][v]
+= 1
for u ,related_users in count.items():
self.userSimBest.setdefault(u,dict())
for v, cuv in related_users.items():
self.userSimBest[u][v]
= cuv / math.sqrt(user_item_count[u] * user_item_count[v] * 1.0)

def recommend(self,user,train = None,k = 8,nitem = 40):
train
= train or self.traindata
rank
= dict()
interacted_items
= train.get(user,{})
for v ,wuv in sorted(self.userSimBest[user].items(),key = lambda x : x[1],reverse = True)[0:k]:#获取与user相似度最高的k个用户
for i , rvi in train[v].items():
if i in interacted_items:
continue #只选择user没有评分过的物品进行推荐
rank.setdefault(i,0)#设置初始值,以便做下面的累加运算
rank[i]
+= wuv #书中为rank[i] +=rvi*wuv
return dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem])#用sorted方法对推荐的物品进行排序,预计评分高的排在前面,再取其中nitem个,nitem为每个用户推荐的物品数量

def recallAndPrecision(self,train = None,test = None,k = 8,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
hit
= 0
recall
= 0
precision
= 0
for user in train.keys():
tu
= test.get(user,{})#如果测试集中没有这个用户,则将tu初始化为空,避免test[user]报错
rank
= self.recommend(user, train = train,k = k,nitem = nitem)
for item,_ in rank.items():
if item in tu:
hit
+= 1
recall
+= len(tu)
precision
+= nitem
return (hit / (recall * 1.0),hit / (precision * 1.0))

def coverage(self,train = None,test = None,k = 8,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
recommend_items
= set()
all_items
= set()
for user in train.keys():
for item in train[user].keys():
all_items.add(item)
rank
= self.recommend(user, train, k = k, nitem = nitem)
for item,_ in rank.items():
recommend_items.add(item)
return len(recommend_items) / (len(all_items) * 1.0)

def popularity(self,train = None,test = None,k = 8,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
item_popularity
= dict()
for user ,items in train.items():
for item in items.keys():
item_popularity.setdefault(item,0)
item_popularity[item]
+= 1
ret
= 0
n
= 0
for user in train.keys():
rank
= self.recommend(user, train, k = k, nitem = nitem)
for item ,_ in rank.items():
ret
+= math.log(1+item_popularity[item])
n
+= 1
return ret / (n * 1.0)


def testUserBasedCF():
train
= 'u1.base'
test
= 'u1.test'
cf
= UserBasedCF(train,test)
cf.userSimilarityBest()
print("%3s%20s%20s%20s%20s" % ('K',"precision",'recall','coverage','popularity'))
for k in [5,10,20,40,80,160]:
recall,precision
= cf.recallAndPrecision( k = k)
coverage
= cf.coverage(k = k)
popularity
= cf.popularity(k = k)
print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity))

if __name__ == "__main__":
testUserBasedCF()

基于项目的推荐系统,IBCF:

'''
Created on 2013-10-10

@author: Administrator
'''
import random
import math

class KNN:
def __init__(self,train = None,test = None):
self.trainfile
= train
self.testfile
= test
self.readData()

def readData(self,train = None,test = None):
self.trainfile
= train or self.trainfile
self.testfile
= test or self.testfile
self.traindata
= {}
self.testdata
= {}
for line in open(self.trainfile):
userid,itemid,record,_
= line.split()
self.traindata.setdefault(userid,{})
self.traindata[userid][itemid]
=record
for line in open(self.testfile):
userid,itemid,record,_
= line.split()
self.testdata.setdefault(userid,{})
self.testdata[userid][itemid]
=record


def ItemSim(self,train = None):
train
= train or self.traindata
ItemSimcount
= dict()
Item_count
= dict()
for _,items in train.items():
for itemidi in items.keys():
Item_count.setdefault(itemidi,0)
Item_count[itemidi]
+= 1
for itemidj in items.keys():
if itemidi == itemidj:
continue
ItemSimcount.setdefault(itemidi,{})
ItemSimcount[itemidi].setdefault(itemidj,0)
ItemSimcount[itemidi][itemidj]
+=1
self.ItemSimlist
= dict()
for itemidi, related_item in ItemSimcount.items():
self.ItemSimlist.setdefault(itemidi,{})
for itemidj,wij in related_item.items():
self.ItemSimlist[itemidi].setdefault(itemidj,0)
self.ItemSimlist[itemidi][itemidj]
= wij/math.sqrt(Item_count[itemidi]*Item_count[itemidj]*1.0)

def recommend(self,user,train = None,k = 5,nitem = 10):
train
= train or self.traindata
recommendlist
= dict()
User_Itemlist
= train.get(user,{})
for i,ri in User_Itemlist.items():
for j,wij in sorted(self.ItemSimlist[i].items(),key = lambda x:x[1],reverse = True)[0:k]:
if j in User_Itemlist:
continue
recommendlist.setdefault(j,0)
recommendlist[j]
+= float(ri)*wij
return dict(sorted(recommendlist.items(),key = lambda x :x[1],reverse = True)[0:nitem])

def recallAndPrecision(self,train = None,test = None,k = 5,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
hit
= 0
recall
= 0
precision
= 0
for user in train.keys():
tu
= test.get(user,{})
rank
= self.recommend(user, train = train,k = k,nitem = nitem)
for item,_ in rank.items():
if item in tu:
hit
+= 1
recall
+= len(tu)
precision
+= nitem
return (hit / (recall * 1.0),hit / (precision * 1.0))

def coverage(self,train = None,test = None,k = 5,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
recommend_items
= set()
all_items
= set()
for user in train.keys():
for item in train[user].keys():
all_items.add(item)
rank
= self.recommend(user, train, k = k, nitem = nitem)
for item,_ in rank.items():
recommend_items.add(item)
return len(recommend_items) / (len(all_items) * 1.0)

def popularity(self,train = None,test = None,k = 5,nitem = 10):
train
= train or self.traindata
test
= test or self.testdata
item_popularity
= dict()
for user ,items in train.items():
for item in items.keys():
item_popularity.setdefault(item,0)
item_popularity[item]
+= 1
ret
= 0
n
= 0
for user in train.keys():
rank
= self.recommend(user, train, k = k, nitem = nitem)
for item ,_ in rank.items():
if item in item_popularity:
ret
+= math.log(1+item_popularity[item])
n
+= 1
return ret / (n * 1.0)


def testKNNCF():
train
= 'u1.base'
test
= 'u1.test'
cf
= KNN(train,test)
cf.ItemSim()
print("%3s%20s%20s%20s%20s" % ('K',"precision",'recall','coverage','popularity'))
for k in [5,10,20,40,80,160]:
recall,precision
= cf.recallAndPrecision( k = k)
coverage
= cf.coverage(k = k)
popularity
= cf.popularity(k = k)
print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity))

if __name__ == "__main__":
testKNNCF()