Skip to content

Commit 139f7be

Browse files
committed
Merge branch 'master' of github.com:logicalclocks/feature-store-api into the-merge
2 parents 59e03e8 + 19a45c4 commit 139f7be

31 files changed

+857
-327
lines changed
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: optional-dependency
2+
3+
on: pull_request
4+
5+
jobs:
6+
unit_tests_no_great_expectations:
7+
name: Unit Testing (No Great Expectations)
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- name: Set Timezone
12+
run: sudo timedatectl set-timezone UTC
13+
14+
- uses: actions/checkout@v4
15+
- name: Copy README
16+
run: cp README.md python/
17+
18+
- uses: actions/setup-python@v5
19+
name: Setup Python
20+
with:
21+
python-version: "3.10"
22+
cache: "pip"
23+
cache-dependency-path: "python/setup.py"
24+
- run: pip install -e python[python,dev-no-opt]
25+
26+
- name: Run Pytest suite
27+
env:
28+
ENABLE_HOPSWORKS_USAGE: "false"
29+
run: pytest python/tests

CONTRIBUTING.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimpor
8080

8181
2. Install HOPSWORKS with `docs` extras:
8282

83-
```bash
84-
pip install -e .[python,dev,docs]
85-
```
83+
```bash
84+
pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
85+
```
8686

8787
3. To build the docs, first run the auto doc script:
8888

auto_doc.py

+137
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,22 @@
111111
"git_provider_properties": keras_autodoc.get_properties(
112112
"hopsworks.git_provider.GitProvider"
113113
),
114+
},
115+
"api/spine_group_api.md": {
116+
"fg": ["hsfs.feature_group.SpineGroup"],
117+
"fg_create": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
118+
"fg_get": ["hsfs.feature_store.FeatureStore.get_or_create_spine_group"],
119+
"fg_properties": keras_autodoc.get_properties(
120+
"hsfs.feature_group.SpineGroup",
121+
exclude=[
122+
"expectation_suite",
123+
"location",
124+
"online_enabled",
125+
"statistics",
126+
"statistics_config",
127+
"subject",
128+
],
129+
),
114130
"git_provider_methods": keras_autodoc.get_methods(
115131
"hopsworks.git_provider.GitProvider", exclude=["from_response_json", "json"]
116132
),
@@ -133,6 +149,127 @@
133149
"hopsworks.core.dataset_api.DatasetApi"
134150
),
135151
},
152+
"api/feature_view_api.md": {
153+
"fv": ["hsfs.feature_view.FeatureView"],
154+
"fv_create": ["hsfs.feature_store.FeatureStore.create_feature_view"],
155+
"fv_get": ["hsfs.feature_store.FeatureStore.get_feature_view"],
156+
"fvs_get": ["hsfs.feature_store.FeatureStore.get_feature_views"],
157+
"fv_properties": keras_autodoc.get_properties("hsfs.feature_view.FeatureView"),
158+
"fv_methods": keras_autodoc.get_methods("hsfs.feature_view.FeatureView"),
159+
},
160+
"api/feature_api.md": {
161+
"feature": ["hsfs.feature.Feature"],
162+
"feature_properties": keras_autodoc.get_properties("hsfs.feature.Feature"),
163+
"feature_methods": keras_autodoc.get_methods("hsfs.feature.Feature"),
164+
},
165+
"api/expectation_suite_api.md": {
166+
"expectation_suite": ["hsfs.expectation_suite.ExpectationSuite"],
167+
"expectation_suite_attach": [
168+
"hsfs.feature_group.FeatureGroup.save_expectation_suite"
169+
],
170+
"single_expectation_api": [
171+
"hsfs.expectation_suite.ExpectationSuite.add_expectation",
172+
"hsfs.expectation_suite.ExpectationSuite.replace_expectation",
173+
"hsfs.expectation_suite.ExpectationSuite.remove_expectation",
174+
],
175+
"expectation_suite_properties": keras_autodoc.get_properties(
176+
"hsfs.expectation_suite.ExpectationSuite"
177+
),
178+
"expectation_suite_methods": keras_autodoc.get_methods(
179+
"hsfs.expectation_suite.ExpectationSuite"
180+
),
181+
},
182+
"api/feature_store_api.md": {
183+
"fs": ["hsfs.feature_store.FeatureStore"],
184+
"fs_get": ["hsfs.connection.Connection.get_feature_store"],
185+
"fs_properties": keras_autodoc.get_properties(
186+
"hsfs.feature_store.FeatureStore"
187+
),
188+
"fs_methods": keras_autodoc.get_methods("hsfs.feature_store.FeatureStore"),
189+
},
190+
"api/feature_group_api.md": {
191+
"fg": ["hsfs.feature_group.FeatureGroup"],
192+
"fg_create": [
193+
"hsfs.feature_store.FeatureStore.create_feature_group",
194+
"hsfs.feature_store.FeatureStore.get_or_create_feature_group",
195+
],
196+
"fg_get": ["hsfs.feature_store.FeatureStore.get_feature_group"],
197+
"fg_properties": keras_autodoc.get_properties(
198+
"hsfs.feature_group.FeatureGroup"
199+
),
200+
"fg_methods": keras_autodoc.get_methods("hsfs.feature_group.FeatureGroup"),
201+
},
202+
"api/external_feature_group_api.md": {
203+
"fg": ["hsfs.feature_group.ExternalFeatureGroup"],
204+
"fg_create": ["hsfs.feature_store.FeatureStore.create_external_feature_group"],
205+
"fg_get": ["hsfs.feature_store.FeatureStore.get_external_feature_group"],
206+
"fg_properties": keras_autodoc.get_properties(
207+
"hsfs.feature_group.ExternalFeatureGroup"
208+
),
209+
"fg_methods": keras_autodoc.get_methods(
210+
"hsfs.feature_group.ExternalFeatureGroup"
211+
),
212+
},
213+
"api/storage_connector_api.md": {
214+
"sc_get": [
215+
"hsfs.feature_store.FeatureStore.get_storage_connector",
216+
"hsfs.feature_store.FeatureStore.get_online_storage_connector",
217+
],
218+
"hopsfs_methods": keras_autodoc.get_methods(
219+
"hsfs.storage_connector.HopsFSConnector", exclude=["from_response_json"]
220+
),
221+
"hopsfs_properties": keras_autodoc.get_properties(
222+
"hsfs.storage_connector.HopsFSConnector"
223+
),
224+
"s3_methods": keras_autodoc.get_methods(
225+
"hsfs.storage_connector.S3Connector", exclude=["from_response_json"]
226+
),
227+
"s3_properties": keras_autodoc.get_properties(
228+
"hsfs.storage_connector.S3Connector"
229+
),
230+
"redshift_methods": keras_autodoc.get_methods(
231+
"hsfs.storage_connector.RedshiftConnector", exclude=["from_response_json"]
232+
),
233+
"redshift_properties": keras_autodoc.get_properties(
234+
"hsfs.storage_connector.RedshiftConnector"
235+
),
236+
"adls_methods": keras_autodoc.get_methods(
237+
"hsfs.storage_connector.AdlsConnector", exclude=["from_response_json"]
238+
),
239+
"adls_properties": keras_autodoc.get_properties(
240+
"hsfs.storage_connector.AdlsConnector"
241+
),
242+
"snowflake_methods": keras_autodoc.get_methods(
243+
"hsfs.storage_connector.SnowflakeConnector", exclude=["from_response_json"]
244+
),
245+
"snowflake_properties": keras_autodoc.get_properties(
246+
"hsfs.storage_connector.SnowflakeConnector"
247+
),
248+
"jdbc_methods": keras_autodoc.get_methods(
249+
"hsfs.storage_connector.JdbcConnector", exclude=["from_response_json"]
250+
),
251+
"jdbc_properties": keras_autodoc.get_properties(
252+
"hsfs.storage_connector.JdbcConnector"
253+
),
254+
"gcs_methods": keras_autodoc.get_methods(
255+
"hsfs.storage_connector.GcsConnector", exclude=["from_response_json"]
256+
),
257+
"gcs_properties": keras_autodoc.get_properties(
258+
"hsfs.storage_connector.GcsConnector"
259+
),
260+
"bigquery_methods": keras_autodoc.get_methods(
261+
"hsfs.storage_connector.BigQueryConnector", exclude=["from_response_json"]
262+
),
263+
"bigquery_properties": keras_autodoc.get_properties(
264+
"hsfs.storage_connector.BigQueryConnector"
265+
),
266+
"kafka_methods": keras_autodoc.get_methods(
267+
"hsfs.storage_connector.KafkaConnector", exclude=["from_response_json"]
268+
),
269+
"kafka_properties": keras_autodoc.get_properties(
270+
"hsfs.storage_connector.KafkaConnector"
271+
),
272+
},
136273
"api/kafka_topic.md": {
137274
"kafka_api_handle": ["hopsworks.project.Project.get_kafka_api"],
138275
"kafka_config": ["hopsworks.core.kafka_api.KafkaApi.get_default_config"],

docs/CONTRIBUTING.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ We use `mkdocs` together with `mike` ([for versioning](https://github.com/jimpor
8080

8181
2. Install HOPSWORKS with `docs` extras:
8282

83-
```bash
84-
pip install -e .[python,dev,docs]
85-
```
83+
```bash
84+
pip install -e ".[python,dev]" && pip install -r ../requirements-docs.txt
85+
```
8686

8787
3. To build the docs, first run the auto doc script:
8888

docs/index.md

+44-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
src="https://img.shields.io/badge/docs-HSFS-orange"
1010
alt="Hopsworks Feature Store Documentation"
1111
/></a>
12+
<a><img
13+
src="https://img.shields.io/badge/python-3.8+-blue"
14+
alt="python"
15+
/></a>
1216
<a href="https://pypi.org/project/hsfs/"><img
1317
src="https://img.shields.io/pypi/v/hsfs?color=blue"
1418
alt="PyPiStatus"
@@ -21,9 +25,9 @@
2125
src="https://pepy.tech/badge/hsfs/month"
2226
alt="Downloads"
2327
/></a>
24-
<a href="https://github.com/psf/black"><img
25-
src="https://img.shields.io/badge/code%20style-black-000000.svg"
26-
alt="CodeStyle"
28+
<a href=https://github.com/astral-sh/ruff><img
29+
src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json"
30+
alt="Ruff"
2731
/></a>
2832
<a><img
2933
src="https://img.shields.io/pypi/l/hsfs?color=green"
@@ -41,19 +45,44 @@ The library is environment independent and can be used in two modes:
4145

4246
The library automatically configures itself based on the environment it is run.
4347
However, to connect from an external environment such as Databricks or AWS Sagemaker,
44-
additional connection information, such as host and port, is required. For more information about the setup from external environments, see the setup section.
48+
additional connection information, such as host and port, is required. For more information checkout the [Hopsworks documentation](https://docs.hopsworks.ai/latest/).
4549

4650
## Getting Started On Hopsworks
4751

48-
Instantiate a connection and get the project feature store handler
52+
Get started easily by registering an account on [Hopsworks Serverless](https://app.hopsworks.ai/). Create your project and a [new Api key](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/). In a new python environment with Python 3.8 or higher, install the [client library](https://docs.hopsworks.ai/latest/user_guides/client_installation/) using pip:
53+
54+
```bash
55+
# Get all Hopsworks SDKs: Feature Store, Model Serving and Platform SDK
56+
pip install hopsworks
57+
# or minimum install with the Feature Store SDK
58+
pip install hsfs[python]
59+
# if using zsh don't forget the quotes
60+
pip install 'hsfs[python]'
61+
```
62+
63+
You can start a notebook and instantiate a connection and get the project feature store handler.
64+
65+
```python
66+
import hopsworks
67+
68+
project = hopsworks.login() # you will be prompted for your api key
69+
fs = project.get_feature_store()
70+
```
71+
72+
or using `hsfs` directly:
73+
4974
```python
5075
import hsfs
5176

52-
connection = hsfs.connection()
77+
connection = hsfs.connection(
78+
host="c.app.hopsworks.ai", #
79+
project="your-project",
80+
api_key_value="your-api-key",
81+
)
5382
fs = connection.get_feature_store()
5483
```
5584

56-
Create a new feature group
85+
Create a new feature group to start inserting feature values.
5786
```python
5887
fg = fs.create_feature_group("rain",
5988
version=1,
@@ -135,7 +164,7 @@ You can find more examples on how to use the library in our [hops-examples](http
135164

136165
## Usage
137166

138-
Usage data is collected for improving quality of the library. It is turned on by default if the backend
167+
Usage data is collected for improving quality of the library. It is turned on by default if the backend
139168
is "c.app.hopsworks.ai". To turn it off, use one of the following way:
140169
```python
141170
# use environment variable
@@ -159,6 +188,13 @@ For general questions about the usage of Hopsworks and the Feature Store please
159188

160189
Please report any issue using [Github issue tracking](https://github.com/logicalclocks/feature-store-api/issues).
161190

191+
Please attach the client environment from the output below in the issue:
192+
```python
193+
import hopsworks
194+
import hsfs
195+
hopsworks.login().get_feature_store()
196+
print(hsfs.get_env())
197+
```
162198

163199
## Contributing
164200

python/hsfs/constructor/external_feature_group_alias.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(
2626
self, on_demand_feature_group: Dict[str, Any], alias: str, **kwargs
2727
) -> None:
2828
self._on_demand_feature_group: Union[
29-
"feature_group.ExternalFeatureGroup", "feature_group.SpineGroup"
29+
feature_group.ExternalFeatureGroup, "feature_group.SpineGroup"
3030
]
3131
if not on_demand_feature_group["spine"]:
3232
self._on_demand_feature_group = (
@@ -48,7 +48,7 @@ def from_response_json(cls, json_dict: Dict[str, Any]) -> ExternalFeatureGroupAl
4848
@property
4949
def on_demand_feature_group(
5050
self,
51-
) -> Union["feature_group.ExternalFeatureGroup", "feature_group.SpineGroup"]:
51+
) -> Union[feature_group.ExternalFeatureGroup, "feature_group.SpineGroup"]:
5252
return self._on_demand_feature_group
5353

5454
@property

python/hsfs/core/constants.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import importlib.util
2+
3+
4+
# Data Validation / Great Expectations
5+
HAS_GREAT_EXPECTATIONS: bool = (
6+
importlib.util.find_spec("great_expectations") is not None
7+
)
8+
great_expectations_not_installed_message = (
9+
"Great Expectations package not found. "
10+
"If you want to use data validation with Hopsworks you can install the corresponding extras "
11+
"""`pip install hopsworks[great_expectations]` or `pip install "hopsworks[great_expectations]"` if using zsh. """
12+
"You can also install great-expectations directly in your environment e.g `pip install great-expectations`. "
13+
"You will need to restart your kernel if applicable."
14+
)
15+
initialise_expectation_suite_for_single_expectation_api_message = "Initialize Expectation Suite by attaching to a Feature Group to enable single expectation API"

python/hsfs/core/feature_monitoring_config_engine.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -344,13 +344,13 @@ def get_monitoring_job(
344344

345345
def run_feature_monitoring(
346346
self,
347-
entity: Union["feature_group.FeatureGroup", "feature_view.FeatureView"],
347+
entity: Union[feature_group.FeatureGroup, "feature_view.FeatureView"],
348348
config_name: str,
349349
) -> List[FeatureMonitoringResult]:
350350
"""Main function used by the job to actually perform the monitoring.
351351
352352
Args:
353-
entity: Union["feature_group.FeatureGroup", "feature_view.FeatureView"]
353+
entity: Union[feature_group.FeatureGroup, "feature_view.FeatureView"]
354354
Featuregroup or Featureview object containing the feature to monitor.
355355
config_name: str: name of the monitoring config.
356356

0 commit comments

Comments
 (0)