基于ItemCF算法
- #!/usr/sbin/env python
- # -*- coding:utf-8 -*-
- import math
- # ItemCF算法
- def ItemSimilarity(train):
- C = dict()
- N = dict()
- for u,items in train.items():
- for i in items.keys():
- N[i] += 1
- for j in items.keys():
- if i == j:
- continue
- C[i][j] += 1
- W = dict()
- for i,related_items in C.items():
- for j,cij in related_items.items():
- W[i][j] = cij / math.sqrt( N[i] * N[j])
- return W
- # ItemCF-IUF算法
- def ItemSimilarity_v2(train):
- C = dict()
- N = dict()
- for u,items in train.items():
- for i in items.keys():
- N[i] += 1
- for j in items.keys():
- if i == j:
- continue
- C[i][j] += 1 / math.log(1+len(items)*1.0)
- W = dict()
- for i,related_items in C.items():
- for j,cij in related_items.items():
- W[i][j] = cij / math.sqrt( N[i] * N[j])
- return W
- def Recommend(train,user_id,W,K):
- rank = dict()
- ru = train[user_id]
- for i,pi in ru.items():
- for j,wj in sorted(W[i].items,key=itemgetter(1),reverse=True)[0:K]:
- if j in ru:
- continue
- rank[j] += pi*wj
- return rank
复制代码
基于UserCF算法
- #!/usr/sbin/env python
- # -*- coding:utf-8 -*-
- import math
- '''
- 基于UserCF的推荐算法
- '''
- # UserCF算法
- def UserSimilarity(train):
- item_users = dict()
- for u,items in train.items():
- for i in items.keys():
- if i not in item_users:
- item_users[i] = set()
- item_users[i].add(u)
- C = dict()
- N = dict()
- for i,users in item_users.items():
- for u in users:
- N[u] += 1
- for v in users:
- if u == v:
- continue
- C[u][v] += 1
- W = dict()
- for u,related_users in C.items():
- for v,cuv in related_users.items():
- W[u][v] = cuv / math.sqrt(N[u] * N[v])
- return W
- # User-IIF算法
- def UserSimilarity_v2(train):
- item_users = dict()
- for u,items in train.items():
- for i in items.keys():
- if i not in item_users:
- item_users[i] = set()
- item_users[i].add(u)
- C = dict()
- N = dict()
- for i,users in item_users.items():
- for u in users:
- N[u] += 1
- for v in users:
- if u == v:
- continue
- C[u][v] += 1 / math.log(1+len(users))
- W = dict()
- for u,related_users in C.items():
- for v,cuv in related_users.items():
- W[u][v] = cuv / math.sqrt(N[u] * N[v])
- return W
- def Recommend(user,train,W):
- rank = dict()
- interacted_items = train[user]
- for v,wuv in sorted(W[u].items,key=itemgetter(1),reverse=True)[0:K]:
- for i,rvi in train[v].items:
- if i in interacted_items:
- continue
- rank[i] += wuv*rvi
- return rank
复制代码
基于时间上下文的个性化推荐
- #!/usr/sbin/env python
- # -*- coding:utf-8 -*-
- import math
- def RecentPopularity(records,alpha,T):
- ret = dict()
- for user,item,tm in records:
- if tm >= T:
- continue
- addToDict(ret,item,1/(1.0+alpha*(T-tm)))
- return ret
- def addToDict(dicts,item,value):
- pass
- def ItemSimilarity(train,alpha):
- C = dict()
- N = dict()
- for u,items in train.items():
- for i,tui in items.items():
- N[i] += 1
- for j,tuj in items.items():
- if i == j:
- continue
- C[i][j] += 1 / (1+alpha*abs(tui-tuj))
- W = dict()
- for i,related_items in C.items():
- for j,cij in related_items.items():
- W[i][j] = cij / math.sqrt(N[i] * N[j])
- return W
- def RecommendItemCF(train,user_id,W,K,t0):
- rank = dict()
- ru = train[user_id]
- for i,pi in ru.items():
- for j,wj in sorted(W[i].items(),\
- key=itemgetter(1),reverse=True)[0:K]:
- if j,tuj in ru.items():
- continue
- rank[j] += pi * wj / (1 + alpha * (t0 - tuj))
- return rank
- def UserSimilarity(train):
- item_users = dict()
- for u,items in train.items():
- for i,tui in items.items():
- if i not in item_users:
- item_users[i] = dict()
- item_users[i][u] = tui
- C = dict()
- N = dict()
- for i,users in item_users.items():
- for u,tui in users.items():
- N[u] += 1
- for v,tvi in users.items():
- if u == v:
- continue
- C[u][v] += 1 / (1 + alpha * abs(tui - tvi))
- W = dict()
- for u,related_users in C.items():
- for v,cuv in related_users.items():
- W[u][v] = cuv / math.sqrt(N[u] * N[v])
- return W
- def RecommendUserCF(user,T,train,W):
- rank = dict()
- interacted_items = train[user]
- for v,wuv in sorted(W[u].items,key=itemgetter(1),\
- reverse=True)[0:K]:
- for i,tvi in train[v].items:
- if i in interacted_items:
- continue
- rank[i] += wuv / (1 + alpha * (T - tvi))
- return rank
复制代码
基于LFM算法的个性化推荐
- #!/usr/bin/env python
- import random
- '''
- items => {'12':'PHP','1203':'Storm','123':'Ubuntu'}
- items_pool => [12,32,121,324,532,123,53,1203,429,2932]
- user_items => {'1010':[12,1203,123,429]}
- '''
- def RandomSelectNagativeSample(items):
- ret = dict()
- for i in items.keys():
- ret[i] = 1
- n = 0
- for i in range(0,len(items)*3):
- item = items_pool[random.randint(0,len(items_pool)-1)]
- if item in ret:
- continue
- ret[item] = 0
- n += 1
- if n > len(items):
- break
- return ret
- def InitModel(user_items,F):
- P = dict()
- Q = dict()
- for u in user_items.keys():
- if u not in P:
- P[u] = {}
- for f in range(0,F):
- P[u][f] = 1
- items = user_items.values()
- itemLen = len(items[0])
- i = 0
- while i< itemLen:
- ii = items[0][i]
- if ii not in Q:
- Q[ii] = {}
- for f in range(0,F):
- Q[ii][f] = 1
- i += 1
- return [P,Q]
- def LatentFactorModel(user_items,F,N,alpha,lambda1):
- [P,Q] = InitModel(user_items,F)
- for setup in range(0,N):
- for user,items in user_items.items():
- samples = RandomSelectNagativeSample(items)
- for item,rui in samples.items():
- eui = rui - Predict(user,item)
- for f in range(0,F):
- P[user][f] += alpha * (eui * Q[item][f] - lambda1 * P[user][f])
- Q[item][f] += alpha * (eui * P[user][f] - lambda1 * Q[item][f])
- alpha *= 0.9
- return [P,Q]
- def Recommend(user,P,Q):
- rank = dict()
- for f,puf in P[user].items():
- for i,pfi in Q[f].items():
- if i not in rank:
- rank[i] += puf * qfi
- return rank
- def PersonalRank(G,alpha,root,maxsetup):
- rank = dict()
- #rank = {x:0 for x in G.keys()}
- rank = rank.fromkeys(G.keys(),0)
- rank[root] = 1
- for k in range(maxsetup):
- tmp = dict()
- #tmp = {x:0 for x in G.keys()}
- tmp = tmp.fromkeys(G.keys(),0)
- for i,ri in G.items():
- for j,wij in ri.items():
- if j not in tmp:
- tmp[j] = 0
- tmp[j] += alpha * rank[i]/(1.0*len(ri))
- if j == root:
- tmp[j] += 1 - alpha
- rank = tmp
- print 'iter:' + str(k) + "\t",
- for key,value in rank.items():
- print "%s:%.3f,\t" % (key,value),
- print
- return rank
- if __name__ == '__main__':
- G = {'A':{'a':1,'c':1},
- 'B':{'a':1,'b':1,'c':1,'d':1},
- 'C':{'c':1,'d':1},
- 'a':{'A':1,'B':1},
- 'b':{'B':1},
- 'c':{'A':1,'B':1,'C':1},
- 'd':{'B':1,'C':1}}
- PersonalRank(G,0.85,'A',20)
- '''
- #items_pool = {'12':'PHP','32':'Nginx','121':'Apache','324':'Erlang','532':'Linux','123':'Ubuntu','53':'Java','1203':'Storm','429':'Kafka','2932':'Flume'}
- items_pool = [12,32,121,324,532,123,53,1203,429,2932]
- items = {'12':'PHP','1203':'Storm','123':'Ubuntu'}
- user_items = {'1010':[12,1203,123,429]}
- #print RandomSelectNagativeSample(items)
- print InitModel(user_items,4)
- '''
复制代码
基于图的推荐算法
- #!/usr/sbin/env python
- # -*- coding:utf-8 -*-
- '''
- 基于图的推荐算法,二分图
- '''
- def PersonalRank(G,alpha,root,maxsetup):
- rank = dict()
- #rank = {x:0 for x in G.keys()}
- rank = rank.fromkeys(G.keys(),0)
- rank[root] = 1
- for k in range(maxsetup):
- tmp = dict()
- #tmp = {x:0 for x in G.keys()}
- tmp = tmp.fromkeys(G.keys(),0)
- for i,ri in G.items():
- for j,wij in ri.items():
- if j not in tmp:
- tmp[j] = 0
- tmp[j] += alpha * rank[i]/(1.0*len(ri))
- if j == root:
- tmp[j] += 1 - alpha
- rank = tmp
- print 'iter:' + str(k) + "\t",
- for key,value in rank.items():
- print "%s:%.3f,\t" % (key,value),
- print
- return rank
- if __name__ == '__main__':
- G = {'A':{'a':1,'c':1},
- 'B':{'a':1,'b':1,'c':1,'d':1},
- 'C':{'c':1,'d':1},
- 'a':{'A':1,'B':1},
- 'b':{'B':1},
- 'c':{'A':1,'B':1,'C':1},
- 'd':{'B':1,'C':1}}
- PersonalRank(G,0.85,'C',20)
复制代码
基于标签的推荐算法
- #!/usr/sbin/env python
- # -*- coding:utf-8 -*-
- import math
- #标签流行度算法
- def TagPopularity(records):
- tagfreq = dict()
- for user,item,tag in records:
- if tag not in tagfreq:
- tagfreq[tag] = 1
- else:
- tagfreq[tag] += 1
- return tagfreq
- #物品相似度余弦算法
- def CosineSim(item_tags,i,j):
- ret = 0
- for b,wib in item_tags[i].items():
- if b in item_tags[j]:
- ret += wib * item_tags[j][b]
- ni = 0
- nj = 0
- for b,w in item_tags[i].items():
- ni += w * w
- for b,w in item_tags[j].items():
- nj += w * w
- if ret == 0:
- return 0
- return ret / math.sqrt(ni * nj)
- #推荐物品的多样性算法
- def Diversity(item_tags,recommend_items):
- ret = 0
- n = 0
- for i in recommend_items.keys():
- for j in recommend_items.keys():
- if i == j:
- continue
- ret += CosineSim(item_tags,i,j)
- n += 1
- return ret / (n * 1.0)
- def addValueToMat(dicts,index,k,v):
- if index not in dicts:
- dicts[index] = dict()
- dicts[index][k] = v
- else:
- if k not in dicts[index]:
- dicts[index][k] = v
- else:
- dicts[index][k] += v
- def InitStat(records):
- user_tags = dict() #存储 user_tags[u][b] = n(u,b)
- tag_items = dict() # tag_items[b][i] = n(b,i)
- user_items = dict()
- for user,item,tag in records.items():
- addValueToMat(user_tags,user,tag,1)
- addValueToMat(tag_items,tag,item,1)
- addValueToMat(user_items,user,item,1)
- def Recommend(user):
- recommend_items = dict()
- tagged_items = user_items[user]
- for tag,wut in user_tags[user].items():
- # wut = wut*1.0/math.log(1+len(tag_users[tag])) #TagBasedTFIDF and TagBasedTFIDF++
- for item,wti in tag_items[tag].items():
- # wti = wti*1.0/math.log(1+len(user_items[user])) #TagBasedTFIDF++
- if item in tagged_items:
- continue
- if item not in recommend_items:
- recommend_items[item] = wut * wti
- else:
- recommend_items[item] += wut * wti
- return recommend_items
- if __name__ == "main":
- user_tags = dict()
- user_items = dict()
- tag_items = dict()
- records = dict()
- user = '1220';
- InitStat(records)
- rec_items = Recommend(user)
复制代码