-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori
48 lines (44 loc) · 1.71 KB
/
apriori
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#This code hasn't been adjusted according to the datset at hand as of now. I'll make commits to it on a later date.
def generate_candidates(itemset, k):
candidates = []
n = len(itemset)
for i in range(n):
for j in range(i+1, n):
L1 = itemset[i][:k-2]
L2 = itemset[j][:k-2]
if L1 == L2:
candidates.append(itemset[i] + [itemset[j][-1]])
return candidates
def prune(itemset, candidates):
pruned = []
for candidate in candidates:
subsets = [candidate[:i] + candidate[i+1:] for i in range(len(candidate))]
if all(subset in itemset for subset in subsets):
pruned.append(candidate)
return pruned
def apriori(dataset, min_support):
itemset = [[item] for item in dataset]
k = 2
frequent_itemsets = []
while itemset:
candidates = generate_candidates(itemset, k)
counts = {tuple(candidate): 0 for candidate in candidates} # Convert lists to tuples
for transaction in dataset:
for candidate in candidates:
if set(candidate).issubset(set(transaction)):
counts[tuple(candidate)] += 1 # Convert lists to tuples
frequent_itemsets.extend([list(itemset) for itemset, count in counts.items() if count >= min_support]) # Convert tuples back to lists
itemset = prune(itemset, candidates)
k += 1
return frequent_itemsets
# Example usage
dataset = [
['bread', 'milk'],
['bread', 'diapers', 'beer', 'eggs'],
['milk', 'diapers', 'beer', 'cola'],
['bread', 'milk', 'diapers', 'beer'],
['bread', 'milk', 'diapers', 'cola']
]
min_support = 3
frequent_itemsets = apriori(dataset, min_support)
print(frequent_itemsets)