|
1 | 1 | #
|
2 |
| -# Copyright 2022 Logical Clocks AB |
| 2 | +# Copyright 2024 Hopsworks AB |
3 | 3 | #
|
4 | 4 | # Licensed under the Apache License, Version 2.0 (the "License");
|
5 | 5 | # you may not use this file except in compliance with the License.
|
|
14 | 14 | # limitations under the License.
|
15 | 15 | #
|
16 | 16 |
|
17 |
| -from json import JSONEncoder |
18 |
| -from urllib.parse import urljoin, urlparse |
19 |
| - |
20 |
| -from hopsworks import client |
21 |
| -from hopsworks.client.exceptions import JobException |
22 |
| -from hopsworks.git_file_status import GitFileStatus |
23 |
| - |
24 |
| - |
25 |
| -class Encoder(JSONEncoder): |
26 |
| - def default(self, obj): |
27 |
| - try: |
28 |
| - return obj.to_dict() |
29 |
| - except AttributeError: |
30 |
| - return super().default(obj) |
31 |
| - |
32 |
| - |
33 |
| -def convert_to_abs(path, current_proj_name): |
34 |
| - abs_project_prefix = "/Projects/{}".format(current_proj_name) |
35 |
| - if not path.startswith(abs_project_prefix): |
36 |
| - return abs_project_prefix + "/" + path |
37 |
| - else: |
38 |
| - return path |
39 |
| - |
40 |
| - |
41 |
| -def validate_job_conf(config, project_name): |
42 |
| - # User is required to set the appPath programmatically after getting the configuration |
43 |
| - if ( |
44 |
| - config["type"] != "dockerJobConfiguration" |
45 |
| - and config["type"] != "flinkJobConfiguration" |
46 |
| - and "appPath" not in config |
47 |
| - ): |
48 |
| - raise JobException("'appPath' not set in job configuration") |
49 |
| - elif "appPath" in config and not config["appPath"].startswith("hdfs://"): |
50 |
| - config["appPath"] = "hdfs://" + convert_to_abs(config["appPath"], project_name) |
51 |
| - |
52 |
| - # If PYSPARK application set the mainClass, if SPARK validate there is a mainClass set |
53 |
| - if config["type"] == "sparkJobConfiguration": |
54 |
| - if config["appPath"].endswith(".py"): |
55 |
| - config["mainClass"] = "org.apache.spark.deploy.PythonRunner" |
56 |
| - elif "mainClass" not in config: |
57 |
| - raise JobException("'mainClass' not set in job configuration") |
58 |
| - |
59 |
| - return config |
60 |
| - |
61 |
| - |
62 |
| -def convert_git_status_to_files(files): |
63 |
| - # Convert GitFileStatus to list of file paths |
64 |
| - if isinstance(files[0], GitFileStatus): |
65 |
| - tmp_files = [] |
66 |
| - for file_status in files: |
67 |
| - tmp_files.append(file_status.file) |
68 |
| - files = tmp_files |
69 |
| - |
70 |
| - return files |
71 |
| - |
72 |
| - |
73 |
| -def get_hostname_replaced_url(sub_path: str): |
74 |
| - """ |
75 |
| - construct and return an url with public hopsworks hostname and sub path |
76 |
| - :param self: |
77 |
| - :param sub_path: url sub-path after base url |
78 |
| - :return: href url |
79 |
| - """ |
80 |
| - href = urljoin(client.get_instance()._base_url, sub_path) |
81 |
| - url_parsed = client.get_instance().replace_public_host(urlparse(href)) |
82 |
| - return url_parsed.geturl() |
83 |
| - |
84 |
| - |
85 |
| -def is_interactive(): |
86 |
| - import __main__ as main |
87 |
| - |
88 |
| - return not hasattr(main, "__file__") |
| 17 | +from hopsworks_common.util import ( |
| 18 | + FEATURE_STORE_NAME_SUFFIX, |
| 19 | + VALID_EMBEDDING_TYPE, |
| 20 | + Encoder, |
| 21 | + FeatureGroupWarning, |
| 22 | + JobWarning, |
| 23 | + StatisticsWarning, |
| 24 | + StorageWarning, |
| 25 | + ValidationWarning, |
| 26 | + VersionWarning, |
| 27 | + _loading_animation, |
| 28 | + append_feature_store_suffix, |
| 29 | + autofix_feature_name, |
| 30 | + check_timestamp_format_from_date_string, |
| 31 | + convert_event_time_to_timestamp, |
| 32 | + convert_git_status_to_files, |
| 33 | + convert_to_abs, |
| 34 | + feature_group_name, |
| 35 | + get_dataset_type, |
| 36 | + get_delta_datestr_from_timestamp, |
| 37 | + get_feature_group_url, |
| 38 | + get_host_name, |
| 39 | + get_hostname_replaced_url, |
| 40 | + get_hudi_datestr_from_timestamp, |
| 41 | + get_job_url, |
| 42 | + get_timestamp_from_date_string, |
| 43 | + is_interactive, |
| 44 | + is_runtime_notebook, |
| 45 | + run_with_loading_animation, |
| 46 | + setup_pydoop, |
| 47 | + strip_feature_store_suffix, |
| 48 | + validate_embedding_feature_type, |
| 49 | + validate_job_conf, |
| 50 | + verify_attribute_key_names, |
| 51 | +) |
| 52 | + |
| 53 | + |
| 54 | +__all__ = [ |
| 55 | + FEATURE_STORE_NAME_SUFFIX, |
| 56 | + VALID_EMBEDDING_TYPE, |
| 57 | + Encoder, |
| 58 | + FeatureGroupWarning, |
| 59 | + JobWarning, |
| 60 | + StatisticsWarning, |
| 61 | + StorageWarning, |
| 62 | + ValidationWarning, |
| 63 | + VersionWarning, |
| 64 | + _loading_animation, |
| 65 | + append_feature_store_suffix, |
| 66 | + autofix_feature_name, |
| 67 | + check_timestamp_format_from_date_string, |
| 68 | + convert_event_time_to_timestamp, |
| 69 | + convert_git_status_to_files, |
| 70 | + convert_to_abs, |
| 71 | + feature_group_name, |
| 72 | + get_dataset_type, |
| 73 | + get_delta_datestr_from_timestamp, |
| 74 | + get_feature_group_url, |
| 75 | + get_host_name, |
| 76 | + get_hostname_replaced_url, |
| 77 | + get_hudi_datestr_from_timestamp, |
| 78 | + get_job_url, |
| 79 | + get_timestamp_from_date_string, |
| 80 | + is_interactive, |
| 81 | + is_runtime_notebook, |
| 82 | + run_with_loading_animation, |
| 83 | + setup_pydoop, |
| 84 | + strip_feature_store_suffix, |
| 85 | + validate_embedding_feature_type, |
| 86 | + validate_job_conf, |
| 87 | + verify_attribute_key_names, |
| 88 | +] |
0 commit comments