Skip to content

Commit a386b1a

Browse files
authored
[FSTORE-1129] Tutorials Update (#224)
* Minor updates to all tutorials - variable naming, etc
1 parent 7a2bcfb commit a386b1a

26 files changed

+277
-273
lines changed

advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb

-2
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,6 @@
576576
" description='Air Quality characteristics of each day',\n",
577577
" version=1,\n",
578578
" primary_key=['unix_time','city_name'],\n",
579-
" online_enabled=False,\n",
580579
" event_time=[\"unix_time\"],\n",
581580
") "
582581
]
@@ -613,7 +612,6 @@
613612
" description='Weather characteristics of each day',\n",
614613
" version=1,\n",
615614
" primary_key=['unix_time','city_name'],\n",
616-
" online_enabled=False,\n",
617615
" event_time=[\"unix_time\"],\n",
618616
") "
619617
]

advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb

+5-19
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@
128128
"metadata": {},
129129
"outputs": [],
130130
"source": [
131-
"# Build a query object with selected features for training dataset\n",
132-
"query = air_quality_fg.select_all().join(\n",
131+
"# Select features for training data.\n",
132+
"selected_features = air_quality_fg.select_all().join(\n",
133133
" weather_fg.select_except(['unix_time']), \n",
134134
" on=['city_name', 'date'],\n",
135135
")"
@@ -145,22 +145,8 @@
145145
},
146146
"outputs": [],
147147
"source": [
148-
"# here you can check out the merged dataframe\n",
149-
"\n",
150-
"# query_df = query.read()"
151-
]
152-
},
153-
{
154-
"cell_type": "code",
155-
"execution_count": null,
156-
"id": "0e582de6-09aa-4160-be66-0cdd831783d2",
157-
"metadata": {
158-
"scrolled": true,
159-
"tags": []
160-
},
161-
"outputs": [],
162-
"source": [
163-
"# query_df.city_name.value_counts()"
148+
"# Uncomment this if you would like to view your selected features\n",
149+
"# selected_features.show(5)"
164150
]
165151
},
166152
{
@@ -198,7 +184,7 @@
198184
"feature_view = fs.get_or_create_feature_view(\n",
199185
" name='air_quality_fv',\n",
200186
" version=1,\n",
201-
" query=query,\n",
187+
" query=selected_features,\n",
202188
")"
203189
]
204190
},

advanced_tutorials/citibike/3_citibike_training_pipeline.ipynb

+5-12
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,6 @@
9797
" version=1,\n",
9898
")\n",
9999
"\n",
100-
"citibike_stations_info_fg = fs.get_or_create_feature_group(\n",
101-
" name=\"citibike_stations_info\",\n",
102-
" version=1,\n",
103-
")\n",
104-
"\n",
105100
"us_holidays_fg = fs.get_or_create_feature_group(\n",
106101
" name=\"us_holidays\",\n",
107102
" version=1,\n",
@@ -138,8 +133,8 @@
138133
"metadata": {},
139134
"outputs": [],
140135
"source": [
141-
"# Select features for training data.\n",
142-
"query = meteorological_measurements_fg.select_except([\"timestamp\"])\\\n",
136+
"# Select features for training data\n",
137+
"selected_features = meteorological_measurements_fg.select_except([\"timestamp\"])\\\n",
143138
" .join(\n",
144139
" us_holidays_fg.select_except([\"timestamp\"]),\n",
145140
" on=\"date\", join_type=\"left\"\n",
@@ -159,10 +154,8 @@
159154
},
160155
"outputs": [],
161156
"source": [
162-
"# # uncomment and run cell below if you want to see some rows from this query\n",
163-
"# # but you will have to wait some time\n",
164-
"\n",
165-
"# query.read()"
157+
"# Uncomment this if you would like to view your selected features\n",
158+
"# selected_features.show(5)"
166159
]
167160
},
168161
{
@@ -198,7 +191,7 @@
198191
"source": [
199192
"feature_view = fs.get_or_create_feature_view(\n",
200193
" name='citibike_fv',\n",
201-
" query=query,\n",
194+
" query=selected_features,\n",
202195
" labels=[\"users_count\"],\n",
203196
" version=1, \n",
204197
")"

advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb

+8-8
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@
214214
"metadata": {},
215215
"outputs": [],
216216
"source": [
217-
"# Build a query object \n",
218-
"query = bureaus_fg.select_except(['sk_id_curr','sk_id_bureau','datetime'])\\\n",
217+
"# Select features for training data\n",
218+
"selected_features = bureaus_fg.select_except(['sk_id_curr','sk_id_bureau','datetime'])\\\n",
219219
" .join(applications_fg.select_except(['sk_id_curr',\n",
220220
" 'datetime',\n",
221221
" 'flag_mobil',\n",
@@ -234,8 +234,8 @@
234234
" .join(credit_card_balances_fg.select_except(['sk_id_prev', 'sk_id_curr']))\\\n",
235235
" .join(previous_loan_counts_fg.select_except('sk_id_curr'))\n",
236236
"\n",
237-
"query_show5 = query.show(5)\n",
238-
"query_show5"
237+
"selected_features_show5 = selected_features.show(5)\n",
238+
"selected_features_show5"
239239
]
240240
},
241241
{
@@ -284,8 +284,8 @@
284284
"metadata": {},
285285
"outputs": [],
286286
"source": [
287-
"# Extracting the names of categorical columns in the 'query_show5' DataFrame\n",
288-
"cat_cols = query_show5.dtypes[query_show5.dtypes == 'object'].index\n",
287+
"# Extracting the names of categorical columns in the 'selected_features_show5query_show5' DataFrame\n",
288+
"cat_cols = selected_features_show5.dtypes[selected_features_show5.dtypes == 'object'].index\n",
289289
"\n",
290290
"# Retrieving the Label Encoder transformation function from Featuretools\n",
291291
"le = fs.get_transformation_function(name='label_encoder') \n",
@@ -338,7 +338,7 @@
338338
" version=1,\n",
339339
" labels=['target'],\n",
340340
" transformation_functions=transformation_functions,\n",
341-
" query=query,\n",
341+
" query=selected_features,\n",
342342
")"
343343
]
344344
},
@@ -619,7 +619,7 @@
619619
"name": "python",
620620
"nbconvert_exporter": "python",
621621
"pygments_lexer": "ipython3",
622-
"version": "3.9.12"
622+
"version": "3.9.18"
623623
}
624624
},
625625
"nbformat": 4,

advanced_tutorials/electricity/3_electricity_training_pipeline.ipynb

+5-4
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@
115115
"metadata": {},
116116
"outputs": [],
117117
"source": [
118-
"fg_query = electricity_prices_fg.select_all()\\\n",
118+
"# Select features for training data\n",
119+
"selected_features = electricity_prices_fg.select_all()\\\n",
119120
" .join(\n",
120121
" meteorological_measurements_fg\\\n",
121122
" .select_except([\"timestamp\"])\n",
@@ -136,8 +137,8 @@
136137
"metadata": {},
137138
"outputs": [],
138139
"source": [
139-
"# uncomment this if you would like to view query results\n",
140-
"fg_query.show(5)"
140+
"# Uncomment this if you would like to view your selected features\n",
141+
"# selected_features.show(5)"
141142
]
142143
},
143144
{
@@ -215,7 +216,7 @@
215216
" version=1,\n",
216217
" labels=[], # you will define our 'y' later manualy\n",
217218
" transformation_functions=mapping_transformers,\n",
218-
" query=fg_query,\n",
219+
" query=selected_features,\n",
219220
")"
220221
]
221222
},

advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb

+1-3
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,6 @@
177177
" event_time=\"pickup_datetime\",\n",
178178
" description=\"Rides features\",\n",
179179
" time_travel_format=\"HUDI\", \n",
180-
" online_enabled=False, \n",
181180
" statistics_config=True,\n",
182181
")\n",
183182
"\n",
@@ -208,7 +207,6 @@
208207
" primary_key=[\"ride_id\"], \n",
209208
" description=\"Taxi fares features\",\n",
210209
" time_travel_format=\"HUDI\", \n",
211-
" online_enabled=False,\n",
212210
" statistics_config=True,\n",
213211
") \n",
214212
"\n",
@@ -246,7 +244,7 @@
246244
"name": "python",
247245
"nbconvert_exporter": "python",
248246
"pygments_lexer": "ipython3",
249-
"version": "3.9.12"
247+
"version": "3.9.18"
250248
}
251249
},
252250
"nbformat": 4,

advanced_tutorials/nyc_taxi_fares/3_nyc_taxi_fares_training_pipeline.ipynb

+5-5
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,15 @@
107107
"metadata": {},
108108
"outputs": [],
109109
"source": [
110-
"# Select features for training data.\n",
111-
"query = fares_fg.select(['total_fare', \"tolls\"])\\\n",
110+
"# Select features for training data\n",
111+
"selected_features = fares_fg.select(['total_fare', \"tolls\"])\\\n",
112112
" .join(rides_fg.select_except(['taxi_id', \"driver_id\", \"pickup_datetime\",\n",
113113
" \"pickup_longitude\", \"pickup_latitude\",\n",
114114
" \"dropoff_longitude\", \"dropoff_latitude\"]),\n",
115115
" on=['ride_id'])\n",
116116
"\n",
117-
"# Uncomment the line below if you want to display the first 2 rows of the resulting DataFrame\n",
118-
"# query.show(2)"
117+
"# Uncomment this if you would like to view your selected features\n",
118+
"# selected_features.show(5)"
119119
]
120120
},
121121
{
@@ -151,7 +151,7 @@
151151
"feature_view = fs.get_or_create_feature_view(\n",
152152
" name='nyc_taxi_fares_fv',\n",
153153
" version=1,\n",
154-
" query=query,\n",
154+
" query=selected_features,\n",
155155
" labels=[\"total_fare\"],\n",
156156
")"
157157
]

advanced_tutorials/recommender-system/1_feature_engineering.ipynb

+16-58
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
"source": [
77
"## <span style=\"color:#ff5f27\">👩🏻‍🔬 Feature Engineering </span>\n",
88
"\n",
9+
"**Note**: This tutorial does not support Google Colab.\n",
10+
"\n",
911
"**Your Python Jupyter notebook should be configured for >8GB of memory.**\n",
1012
"\n",
1113
"In this series of tutorials, we will build a recommender system for fashion items. It will consist of two models: a *retrieval model* and a *ranking model*. The idea is that the retrieval model should be able to quickly generate a small subset of candidate items from a large collection of items. This comes at the cost of granularity, which is why we also train a ranking model that can afford to use more features than the retrieval model.\n",
@@ -31,59 +33,6 @@
3133
"## <span style=\"color:#ff5f27\">📝 Imports </span>"
3234
]
3335
},
34-
{
35-
"cell_type": "code",
36-
"execution_count": null,
37-
"metadata": {},
38-
"outputs": [],
39-
"source": [
40-
"# Hosted notebook environments may not have the local features package\n",
41-
"import os\n",
42-
"\n",
43-
"def need_download_modules():\n",
44-
" if 'google.colab' in str(get_ipython()):\n",
45-
" return True\n",
46-
" if 'HOPSWORKS_PROJECT_ID' in os.environ:\n",
47-
" return True\n",
48-
" return False\n",
49-
"\n",
50-
"if need_download_modules():\n",
51-
" print(\"⚙️ Downloading modules...\")\n",
52-
" os.system('mkdir -p features')\n",
53-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/articles.py')\n",
54-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/customers.py')\n",
55-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/transactions.py')\n",
56-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/ranking.py') \n",
57-
" print('✅ Done!')\n",
58-
"else:\n",
59-
" print(\"Local environment\")"
60-
]
61-
},
62-
{
63-
"cell_type": "code",
64-
"execution_count": null,
65-
"metadata": {},
66-
"outputs": [],
67-
"source": [
68-
"try:\n",
69-
" from features.articles import prepare_articles\n",
70-
" from features.customers import prepare_customers\n",
71-
" from features.transactions import prepare_transactions\n",
72-
" from features.ranking import compute_ranking_dataset\n",
73-
"except ImportError:\n",
74-
" print(\"⚙️ Downloading modules...\")\n",
75-
" os.system('mkdir -p features')\n",
76-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/articles.py')\n",
77-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/customers.py')\n",
78-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/transactions.py')\n",
79-
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/ranking.py') \n",
80-
" print('✅ Done!')\n",
81-
" from features.articles import prepare_articles\n",
82-
" from features.customers import prepare_customers\n",
83-
" from features.transactions import prepare_transactions\n",
84-
" from features.ranking import compute_ranking_dataset "
85-
]
86-
},
8736
{
8837
"cell_type": "code",
8938
"execution_count": null,
@@ -94,7 +43,12 @@
9443
"import numpy as np\n",
9544
"\n",
9645
"import great_expectations as ge\n",
97-
"from great_expectations.core import ExpectationSuite, ExpectationConfiguration"
46+
"from great_expectations.core import ExpectationSuite, ExpectationConfiguration\n",
47+
"\n",
48+
"from features.articles import prepare_articles\n",
49+
"from features.customers import prepare_customers\n",
50+
"from features.transactions import prepare_transactions\n",
51+
"from features.ranking import compute_ranking_dataset "
9852
]
9953
},
10054
{
@@ -613,7 +567,11 @@
613567
"metadata": {},
614568
"outputs": [],
615569
"source": [
616-
"ranking_df = compute_ranking_dataset(trans_fg, articles_fg, customers_fg)"
570+
"ranking_df = compute_ranking_dataset(\n",
571+
" trans_fg, \n",
572+
" articles_fg, \n",
573+
" customers_fg,\n",
574+
")"
617575
]
618576
},
619577
{
@@ -687,7 +645,7 @@
687645
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
688646
},
689647
"kernelspec": {
690-
"display_name": "Python",
648+
"display_name": "Python 3 (ipykernel)",
691649
"language": "python",
692650
"name": "python3"
693651
},
@@ -701,9 +659,9 @@
701659
"name": "python",
702660
"nbconvert_exporter": "python",
703661
"pygments_lexer": "ipython3",
704-
"version": "3.10.11"
662+
"version": "3.9.18"
705663
}
706664
},
707665
"nbformat": 4,
708666
"nbformat_minor": 4
709-
}
667+
}

0 commit comments

Comments
 (0)