-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter-sales.py
44 lines (42 loc) · 1.63 KB
/
filter-sales.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
import time
import csv
import pandas as pd
# check no of sales for items
def no_of_sales(items, sales, filename):
with open(filename, mode='w', newline='') as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# write header
writer.writerow(['item','matched-name','description','marketing-company','count', 'total-sales'])
total = 0
# iterate over items
for index, row in items.iterrows():
# find how many matches in the sales list
count = 0
total_sales = 0
total_qty = 0
# find match
match = sales.loc[sales['Product']==row['Original Name']]
if match.shape[0] > 0:
data = [row['Original Name'], row['Matched name'], row['Description']
, match['MarketingCo'].unique()[0], match.shape[0], match['Sale Amount'].sum()]
writer.writerow(data)
total += 1
print('total for', filename,total)
if __name__ == '__main__':
t1 = time.time()
# Read data
items = pd.read_csv('data/matched-with-desc.csv', error_bad_lines=False, engine='python')
print('total',items.shape[0])
sales = pd.read_csv('data/running.csv', error_bad_lines=False, engine='python')
# get only high confidence matches
items_match = items.loc[items['Match Confidence (lower is better)']<=0.95]
items_no_match = items.loc[items['Match Confidence (lower is better)']>0.95]
print('good match',items_match.shape[0])
print('bad match',items_no_match.shape[0])
# no of sales of low confidence matches
no_of_sales(items_no_match, sales, 'low.csv')
# no of sales for high confidence matches
no_of_sales(items_match, sales, 'high.csv')
t = time.time()-t1
print('Completed in:', round(t,2), 'secs')