Skip to content

Commit dc8367d

Browse files
authored
[FSTORE-1464] Unify tags_api, inode and dataset_api (logicalclocks#240)
* Move tag, util, dataset_api and inode * Separate part of util independent from hsfs * Merge hopsworks/util into hopsworks_common * Create alias for git_file_status * Fix aliases * Fix mistype in git_file_status * Move tags_api * Adapt the moved files * Add alias for inode * Rename FeatureStoreEncoder to Encoder * Fix mocking * Remove dataset_api from hopsworks_common * Move dataset_api from hopsworks * Adapt dataset_api * Fix dataset_api * Fix DatasetApi.__init__ * Fix test_python
1 parent 7de0e4f commit dc8367d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1554
-1308
lines changed

python/hopsworks/core/dataset_api.py

+9-451
Large diffs are not rendered by default.

python/hopsworks/git_file_status.py

+7-58
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright 2022 Logical Clocks AB
2+
# Copyright 2024 Hopsworks AB
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -14,62 +14,11 @@
1414
# limitations under the License.
1515
#
1616

17-
import json
17+
from hopsworks_common.git_file_status import (
18+
GitFileStatus,
19+
)
1820

19-
import humps
20-
from hopsworks import util
2121

22-
23-
class GitFileStatus:
24-
def __init__(
25-
self,
26-
file=None,
27-
status=None,
28-
extra=None,
29-
**kwargs,
30-
):
31-
self._file = file
32-
self._status = status
33-
self._extra = extra
34-
35-
@classmethod
36-
def from_response_json(cls, json_dict):
37-
json_decamelized = humps.decamelize(json_dict)
38-
if "count" in json_decamelized:
39-
if json_decamelized["count"] == 0:
40-
return []
41-
return [cls(**file_status) for file_status in json_decamelized["items"]]
42-
else:
43-
return cls(**json_decamelized)
44-
45-
@property
46-
def file(self):
47-
"""Path to the file"""
48-
return self._file
49-
50-
@property
51-
def status(self):
52-
"""Status of the file
53-
Unmodified StatusCode = ' '
54-
Untracked StatusCode = '?'
55-
Modified StatusCode = 'M'
56-
Added StatusCode = 'A'
57-
Deleted StatusCode = 'D'
58-
Renamed StatusCode = 'R'
59-
Copied StatusCode = 'C'
60-
UpdatedButUnmerged StatusCode = 'U'"""
61-
return self._status
62-
63-
@property
64-
def extra(self):
65-
"""Extra contains additional information, such as the previous name in a rename"""
66-
return self._extra
67-
68-
def json(self):
69-
return json.dumps(self, cls=util.Encoder)
70-
71-
def __str__(self):
72-
return self.json()
73-
74-
def __repr__(self):
75-
return f"GitFileStatus({self._file!r}, {self._status!r}, {self._extra!r})"
22+
__all__ = [
23+
GitFileStatus,
24+
]

python/hopsworks/util.py

+73-73
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright 2022 Logical Clocks AB
2+
# Copyright 2024 Hopsworks AB
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -14,75 +14,75 @@
1414
# limitations under the License.
1515
#
1616

17-
from json import JSONEncoder
18-
from urllib.parse import urljoin, urlparse
19-
20-
from hopsworks import client
21-
from hopsworks.client.exceptions import JobException
22-
from hopsworks.git_file_status import GitFileStatus
23-
24-
25-
class Encoder(JSONEncoder):
26-
def default(self, obj):
27-
try:
28-
return obj.to_dict()
29-
except AttributeError:
30-
return super().default(obj)
31-
32-
33-
def convert_to_abs(path, current_proj_name):
34-
abs_project_prefix = "/Projects/{}".format(current_proj_name)
35-
if not path.startswith(abs_project_prefix):
36-
return abs_project_prefix + "/" + path
37-
else:
38-
return path
39-
40-
41-
def validate_job_conf(config, project_name):
42-
# User is required to set the appPath programmatically after getting the configuration
43-
if (
44-
config["type"] != "dockerJobConfiguration"
45-
and config["type"] != "flinkJobConfiguration"
46-
and "appPath" not in config
47-
):
48-
raise JobException("'appPath' not set in job configuration")
49-
elif "appPath" in config and not config["appPath"].startswith("hdfs://"):
50-
config["appPath"] = "hdfs://" + convert_to_abs(config["appPath"], project_name)
51-
52-
# If PYSPARK application set the mainClass, if SPARK validate there is a mainClass set
53-
if config["type"] == "sparkJobConfiguration":
54-
if config["appPath"].endswith(".py"):
55-
config["mainClass"] = "org.apache.spark.deploy.PythonRunner"
56-
elif "mainClass" not in config:
57-
raise JobException("'mainClass' not set in job configuration")
58-
59-
return config
60-
61-
62-
def convert_git_status_to_files(files):
63-
# Convert GitFileStatus to list of file paths
64-
if isinstance(files[0], GitFileStatus):
65-
tmp_files = []
66-
for file_status in files:
67-
tmp_files.append(file_status.file)
68-
files = tmp_files
69-
70-
return files
71-
72-
73-
def get_hostname_replaced_url(sub_path: str):
74-
"""
75-
construct and return an url with public hopsworks hostname and sub path
76-
:param self:
77-
:param sub_path: url sub-path after base url
78-
:return: href url
79-
"""
80-
href = urljoin(client.get_instance()._base_url, sub_path)
81-
url_parsed = client.get_instance().replace_public_host(urlparse(href))
82-
return url_parsed.geturl()
83-
84-
85-
def is_interactive():
86-
import __main__ as main
87-
88-
return not hasattr(main, "__file__")
17+
from hopsworks_common.util import (
18+
FEATURE_STORE_NAME_SUFFIX,
19+
VALID_EMBEDDING_TYPE,
20+
Encoder,
21+
FeatureGroupWarning,
22+
JobWarning,
23+
StatisticsWarning,
24+
StorageWarning,
25+
ValidationWarning,
26+
VersionWarning,
27+
_loading_animation,
28+
append_feature_store_suffix,
29+
autofix_feature_name,
30+
check_timestamp_format_from_date_string,
31+
convert_event_time_to_timestamp,
32+
convert_git_status_to_files,
33+
convert_to_abs,
34+
feature_group_name,
35+
get_dataset_type,
36+
get_delta_datestr_from_timestamp,
37+
get_feature_group_url,
38+
get_host_name,
39+
get_hostname_replaced_url,
40+
get_hudi_datestr_from_timestamp,
41+
get_job_url,
42+
get_timestamp_from_date_string,
43+
is_interactive,
44+
is_runtime_notebook,
45+
run_with_loading_animation,
46+
setup_pydoop,
47+
strip_feature_store_suffix,
48+
validate_embedding_feature_type,
49+
validate_job_conf,
50+
verify_attribute_key_names,
51+
)
52+
53+
54+
__all__ = [
55+
FEATURE_STORE_NAME_SUFFIX,
56+
VALID_EMBEDDING_TYPE,
57+
Encoder,
58+
FeatureGroupWarning,
59+
JobWarning,
60+
StatisticsWarning,
61+
StorageWarning,
62+
ValidationWarning,
63+
VersionWarning,
64+
_loading_animation,
65+
append_feature_store_suffix,
66+
autofix_feature_name,
67+
check_timestamp_format_from_date_string,
68+
convert_event_time_to_timestamp,
69+
convert_git_status_to_files,
70+
convert_to_abs,
71+
feature_group_name,
72+
get_dataset_type,
73+
get_delta_datestr_from_timestamp,
74+
get_feature_group_url,
75+
get_host_name,
76+
get_hostname_replaced_url,
77+
get_hudi_datestr_from_timestamp,
78+
get_job_url,
79+
get_timestamp_from_date_string,
80+
is_interactive,
81+
is_runtime_notebook,
82+
run_with_loading_animation,
83+
setup_pydoop,
84+
strip_feature_store_suffix,
85+
validate_embedding_feature_type,
86+
validate_job_conf,
87+
verify_attribute_key_names,
88+
]

0 commit comments

Comments
 (0)