-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathMarket Basket Analysis.py
57 lines (41 loc) · 1.65 KB
/
Market Basket Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from mlxtend.frequent_patterns import apriori, association_rules
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
get_ipython().magic('matplotlib inline')
grocery_items = set()
with open("grocery_dataset.txt") as f:
reader = csv.reader(f, delimiter=",")
for i, line in enumerate(reader):
grocery_items.update(line)
output_list = list()
with open("grocery_dataset.txt") as f:
reader = csv.reader(f, delimiter=",")
for i, line in enumerate(reader):
row_val = {item:0 for item in grocery_items}
row_val.update({item:1 for item in line})
output_list.append(row_val)
grocery = pd.DataFrame(output_list)
grocery.head()
#View Top Sold items
total_item_count = sum(grocery.sum())
print(total_item_count)
item_summary = grocery.sum().sort_values(ascending = False).reset_index()
item_summary.rename(columns={item_summary.columns[0]:'item_name',item_summary.columns[1]:'item_count'}, inplace=True)
item_summary.head()
#Visualize Top Sold Items
objects = (list(item_summary['item_name'].head(n=20)))
y_pos = np.arange(len(objects))
performance = list(item_summary['item_count'].head(n=20))
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects, rotation='vertical')
plt.ylabel('Item count')
plt.title('Item sales distribution')
#Generatig frequent itemsets
frequent_itemsets = apriori(grocery, min_support=0.047, use_colnames=True)
#generating rules
my_rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
#viewing top 100 rules
my_rules.head(100)
print(my_rules[['antecedents','consequents']]