应用实例

海量数据中的商品推荐

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
from collections import defaultdict
from operator import itemgetter

dataset_filename="C:\Python数据挖掘入门与实践代码和数据集\Code_REWRITE\Chapter 1"
X=np.loadtxt(dataset_filename+r"\affinity_dataset.txt")
#print(X)
v=defaultdict(int)
i=defaultdict(int)
n=defaultdict(int)

for sample in X:
for permiss in range(4):
if sample[permiss]==0:
continue
n[permiss]+=1
for c in range(4):
if permiss==c:
continue
if sample[c]==1:
v[(permiss,c)]+=1#两者均为1时加一
else:
i[(permiss,c)]+=1#两者不同时为1

support=v
#permiss=1
#c=3
#print(" -Support:{0}".format(support[(permiss,c)]))
confidence=defaultdict(float)
'''
for permiss,c in v.keys():
rule=(permiss,c)
confidence[rule]=v[rule]/n[permiss]
print(confidence[rule])
'''
def print_rule(permiss,c,support,confidence,features):
permiss_name=features[permiss]
c_name=features[c]
print("Rule if a people buy {0} they will also buy {1}".format(permiss_name,c_name))

#s输出置信度
#输出支持度
print(" -Support: {0}".format(support[(permiss,c)]))
print(" -Confidence".format(confidence[(permiss,c)]))
##排序找出最佳规则

print(support)#内涵键值对
sorted_support=sorted(support.items(),key=itemgetter(1),reverse=True)
#输出支持度最高的五个规则
for index in range(5):
print(" Rule #{0}".format(index+1))
permiss,c=sorted_support[index][0]
print_rule(permiss,c,support,confidence,features)

#输出置信度最高的规则
sorted_confidence=sorted(support.items(),key=itemgetter(1),reverse=True)
for index in sorted_confidence:
permiss,c=sorted_confidence[index][0]
print_rule(permiss,c,support,confidence,features)