-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjottings.py
117 lines (95 loc) · 3.55 KB
/
jottings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Test
nutrients = [['A',1],['B',5],['E',3]]
# A loop to scrape the nutrients for every food in the list
name = ['n']
header = ''
foodDict = {}
for food in foods:
foodID = foods.index(food)
# searchFood(food)
# name = openpage()
# nutrients = getData(food,name)
if header == '':
header = [d[0] for d in nutrients]
if len(foodDict) == 0:
foodDict = {'ID':[], 'food':[], 'entry':[]}
for h in header:
foodDict[h] = []
foodDict['ID'].append(foodID)
foodDict['food'].append(food)
foodDict['entry'].append(name)
for h in header:
col = header.index(h)
if nutrients[col][0] in h:
foodDict[h].append(nutrients[col][1])
else:
foodDict[h].append('NaN')
nutrients = [['A', 1], ['D', 5], ['E', 3]]
print(foodDict)
# create a DataFrame
header = ['food'] + [d[0] for d in data]
amount = [name] + [d[1] for d in data]
# Empty DataFrame
df = pd.DataFrame(columns=header)
# As long as I only have one or a few lines of data, I'm using this command:
df.loc[len(df)] = amount
print(df)
#df = pd.DataFrame(data,columns=header)
# Vitamins
# Columns
for n in dfNutrientsTable.columns[15:31]:
e = n.split(' ')[1:]
print(e[0]+" ("+' '.join(e[1:])+")")
###############
food = 'A'
data = [['food', food]]
soup = BeautifulSoup(driver.page_source,'html.parser')
tables = soup.find_all('table', class_='alt nwdetails')
t = tables[8]
rows = t.find_all('tr')
f_rows = [r for r in rows if not 'Tagesbedarf' in r.text and not 'Richtwert' in r.text]
columns = [[c.text.strip() for c in r.find_all('td')[:2]] for r in f_rows]
f_columns = [e for e in columns if len(e) > 1]
print(f_columns)
# The data should get extended by nested tables (separated by nutritional category)
data.extend([f_columns])
# tables = getData(food)
tables = data
macros,vitamins,minerals,traceminerals,carbs,aminos,fibre,omegafats,fats = tables
comptable = [macros,vitamins,minerals+traceminerals,carbs,aminos,fibre,omegafats+fats]
tablenames = ['macros','vitamins','minerals','carbs','aminos','fibre','fats']
########################################################################################################################
# Data cleaning
# Notes
dfVitamins['calc']= dfVitamins['E Tocopherole (µg)'] + dfVitamins['K (µg)']
print(dfVitamins['calc'])
dft = pd.DataFrame({
'a':[0,0,2],
'b':[0,1,3],
'c':[5,2,0]})
dft.loc[dft['a'] < 0.1, 'a'] = np.nan
dft.loc[dft['b'] < 0.1, 'b'] = np.nan
dft['c'] = dft['c'] * 5
dft['f'] = dft['a'].combine_first(dft['b']).combine_first(dft['c'])
print(dft)
dft['f'] = dft['b'].where(dft['b'] < 1, dft['c'])
dft['a'] = dft['a'].where(dft['a'] >= 1, dft['b'])
# Earlier calculations of vitamin percentages
vitcol = dfVitamins.columns
for c in vitcol:
dfVitP[c] = (dfVitamins[c] / dfVitRec[c].loc[0] * 100).round(2)
dfVitP.columns = ['food'] + [n.split()[0] for n in dfVitamins.columns[1:]]
# Second version, same result
dfVit2 = dfVitamins.loc[:,['food']]
for c in vitcol:
colname = c.split()[0]
dfVit2 = dfVit2.assign(**{colname: lambda x : round((dfVitamins[c] / dfVitRec[c].loc[0]) * 100,2)})
print(dfVit2)
# df['mean'] = df.iloc[:, 0:4].mean(axis=1)
# df['mean'] = df.iloc[:, [0,1,2,3,4].mean(axis=1)
# df["mean_odd_year"] = df.loc[:, ["Y1961","Y1963","Y1965"]].mean(axis = 1)
dfVitamins = pd.concat([dfEmpty[['food']],dfVitamins], axis=1)
with pd.ExcelWriter(my_path + '/CleanedData.xlsx') as writer:
dfVitamins.to_excel(writer, sheet_name='Vitamins')
dfVitPV.to_excel(writer, sheet_name='VitaminCovVol')
dfVitPE.to_excel(writer, sheet_name='VitaminCovEnerg')