Skip to content

Commit 2a4a1ad

Browse files
committed
[HWORKS-1037] Add support for downloading artifact files from deployment object
1 parent 00092ce commit 2a4a1ad

File tree

6 files changed

+120
-30
lines changed

6 files changed

+120
-30
lines changed

python/hopsworks_common/constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,12 @@ class MODEL:
163163

164164
class MODEL_REGISTRY:
165165
HOPSFS_MOUNT_PREFIX = "/hopsfs/"
166-
ARTIFACTS_DIR_NAME = "Artifacts"
167166
MODEL_FILES_DIR_NAME = "Files"
168167

169168

170169
class MODEL_SERVING:
171170
MODELS_DATASET = "Models"
171+
ARTIFACTS_DIR_NAME = "Artifacts"
172172

173173

174174
class ARTIFACT_VERSION:

python/hsml/deployment.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,10 @@ def get_model(self):
219219
)
220220

221221
@usage.method_logger
222-
def download_artifact(self):
223-
"""Download the model artifact served by the deployment"""
222+
def download_artifact_files(self):
223+
"""Download the artifact files served by the deployment"""
224224

225-
return self._serving_engine.download_artifact(self)
225+
return self._serving_engine.download_artifact_files(self)
226226

227227
def get_logs(self, component="predictor", tail=10):
228228
"""Prints the deployment logs of the predictor or transformer.
@@ -373,9 +373,15 @@ def artifact_version(self):
373373
def artifact_version(self, artifact_version: Union[int, str]):
374374
self._predictor.artifact_version = artifact_version
375375

376+
@property
377+
def artifact_files_path(self):
378+
"""Path of the artifact files deployed by the predictor."""
379+
return self._predictor.artifact_files_path
380+
376381
@property
377382
def artifact_path(self):
378383
"""Path of the model artifact deployed by the predictor."""
384+
# TODO: deprecated
379385
return self._predictor.artifact_path
380386

381387
@property

python/hsml/engine/model_engine.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,9 @@ def _download_model_from_hopsfs_recursive(
156156

157157
if path_attr.get("dir", False):
158158
# otherwise, make a recursive call for the folder
159-
if basename == constants.MODEL_REGISTRY.ARTIFACTS_DIR_NAME:
159+
if (
160+
basename == constants.MODEL_SERVING.ARTIFACTS_DIR_NAME
161+
): # TODO: Not needed anymore
160162
continue # skip Artifacts subfolder
161163
local_folder_path = os.path.join(to_local_path, basename)
162164
os.mkdir(local_folder_path)
@@ -447,7 +449,7 @@ def update_download_progress(n_dirs, n_files, done=False):
447449
)
448450

449451
try:
450-
from_hdfs_model_path = model_instance.files_path
452+
from_hdfs_model_path = model_instance.model_files_path
451453
if from_hdfs_model_path.startswith("hdfs:/"):
452454
projects_index = from_hdfs_model_path.find("/Projects", 0)
453455
from_hdfs_model_path = from_hdfs_model_path[projects_index:]

python/hsml/engine/serving_engine.py

+88-19
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
#
1616

1717
import os
18+
import tempfile
1819
import time
1920
import uuid
2021
from typing import Dict, List, Union
2122

22-
from hsml import util, constants
23+
from hsml import constants
2324
from hsml.client.exceptions import ModelServingException, RestAPIError
2425
from hsml.client.istio.utils.infer_type import InferInput
2526
from hsml.constants import (
@@ -31,6 +32,7 @@
3132
INFERENCE_ENDPOINTS as IE,
3233
)
3334
from hsml.core import dataset_api, serving_api
35+
from hsml.engine import local_engine
3436
from tqdm.auto import tqdm
3537

3638

@@ -51,6 +53,8 @@ def __init__(self):
5153
self._serving_api = serving_api.ServingApi()
5254
self._dataset_api = dataset_api.DatasetApi()
5355

56+
self._engine = local_engine.LocalEngine()
57+
5458
def _poll_deployment_status(
5559
self, deployment_instance, status: str, await_status: int, update_progress=None
5660
):
@@ -304,7 +308,64 @@ def _get_stopped_instances(self, available_instances, requested_instances):
304308
num_instances = requested_instances - available_instances
305309
return num_instances if num_instances >= 0 else 0
306310

307-
def download_artifact(self, deployment_instance):
311+
def _download_files_from_hopsfs_recursive(
312+
self,
313+
from_hdfs_path: str,
314+
to_local_path: str,
315+
update_download_progress,
316+
n_dirs,
317+
n_files,
318+
):
319+
"""Download model files from a model path in hdfs, recursively"""
320+
321+
for entry in self._dataset_api.list(from_hdfs_path, sort_by="NAME:desc")[
322+
"items"
323+
]:
324+
path_attr = entry["attributes"]
325+
path = path_attr["path"]
326+
basename = os.path.basename(path)
327+
328+
if path_attr.get("dir", False):
329+
# otherwise, make a recursive call for the folder
330+
if (
331+
basename == constants.MODEL_SERVING.ARTIFACTS_DIR_NAME
332+
): # TODO: Not needed anymore
333+
continue # skip Artifacts subfolder
334+
local_folder_path = os.path.join(to_local_path, basename)
335+
os.mkdir(local_folder_path)
336+
n_dirs, n_files = self._download_files_from_hopsfs_recursive(
337+
from_hdfs_path=path,
338+
to_local_path=local_folder_path,
339+
update_download_progress=update_download_progress,
340+
n_dirs=n_dirs,
341+
n_files=n_files,
342+
)
343+
n_dirs += 1
344+
update_download_progress(n_dirs=n_dirs, n_files=n_files)
345+
else:
346+
# if it's a file, download it
347+
local_file_path = os.path.join(to_local_path, basename)
348+
self._engine.download(path, local_file_path)
349+
n_files += 1
350+
update_download_progress(n_dirs=n_dirs, n_files=n_files)
351+
352+
return n_dirs, n_files
353+
354+
def _download_files_from_hopsfs(
355+
self, from_hdfs_path: str, to_local_path: str, update_download_progress
356+
):
357+
"""Download files from a model path in hdfs."""
358+
359+
n_dirs, n_files = self._download_files_from_hopsfs_recursive(
360+
from_hdfs_path=from_hdfs_path,
361+
to_local_path=to_local_path,
362+
update_download_progress=update_download_progress,
363+
n_dirs=0,
364+
n_files=0,
365+
)
366+
update_download_progress(n_dirs=n_dirs, n_files=n_files, done=True)
367+
368+
def download_artifact_files(self, deployment_instance):
308369
if deployment_instance.id is None:
309370
raise ModelServingException(
310371
"Deployment is not created yet. To create the deployment use `.save()`"
@@ -316,30 +377,38 @@ def download_artifact(self, deployment_instance):
316377
Download the model files by using `model.download()`"
317378
)
318379

319-
from_artifact_zip_path = deployment_instance.artifact_path
320-
to_artifacts_path = os.path.join(
321-
os.getcwd(),
380+
artifact_files_path = os.path.join(
381+
tempfile.gettempdir(),
322382
str(uuid.uuid4()),
323383
deployment_instance.model_name,
324384
str(deployment_instance.model_version),
325-
constants.MODEL_REGISTRY.ARTIFACTS_DIR_NAME,
326-
)
327-
to_artifact_version_path = (
328-
to_artifacts_path + "/" + str(deployment_instance.artifact_version)
385+
constants.MODEL_SERVING.ARTIFACTS_DIR_NAME,
386+
str(deployment_instance.artifact_version),
329387
)
330-
to_artifact_zip_path = to_artifact_version_path + ".zip"
388+
os.makedirs(artifact_files_path)
331389

332-
os.makedirs(to_artifacts_path)
390+
def update_download_progress(n_dirs, n_files, done=False):
391+
print(
392+
"Downloading artifact files (%s dirs, %s files)... %s"
393+
% (n_dirs, n_files, "DONE" if done else ""),
394+
end="\r",
395+
)
333396

334397
try:
335-
self._dataset_api.download(from_artifact_zip_path, to_artifact_zip_path)
336-
util.decompress(to_artifact_zip_path, extract_dir=to_artifacts_path)
337-
os.remove(to_artifact_zip_path)
338-
finally:
339-
if os.path.exists(to_artifact_zip_path):
340-
os.remove(to_artifact_zip_path)
341-
342-
return to_artifact_version_path
398+
from_hdfs_path = deployment_instance.artifact_files_path
399+
if from_hdfs_path.startswith("hdfs:/"):
400+
projects_index = from_hdfs_path.find("/Projects", 0)
401+
from_hdfs_path = from_hdfs_path[projects_index:]
402+
403+
self._download_files_from_hopsfs(
404+
from_hdfs_path=from_hdfs_path,
405+
to_local_path=artifact_files_path,
406+
update_download_progress=update_download_progress,
407+
)
408+
except BaseException as be:
409+
raise be
410+
411+
return artifact_files_path
343412

344413
def create(self, deployment_instance):
345414
try:

python/hsml/model.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import humps
2424
from hopsworks_common import usage
25-
from hsml import client, util, constants
25+
from hsml import client, constants, util
2626
from hsml.constants import ARTIFACT_VERSION
2727
from hsml.constants import INFERENCE_ENDPOINTS as IE
2828
from hsml.core import explicit_provenance
@@ -544,11 +544,14 @@ def model_path(self):
544544
def version_path(self):
545545
"""path of the model including version folder. Resolves to /Projects/{project_name}/Models/{name}/{version}"""
546546
return "{}/{}".format(self.model_path, str(self.version))
547-
547+
548548
@property
549-
def files_path(self):
549+
def model_files_path(self):
550550
"""path of the model files including version and files folder. Resolves to /Projects/{project_name}/Models/{name}/{version}/Files"""
551-
return "{}/{}/{}".format(self.model_path, str(self.version), constants.MODEL_REGISTRY.MODEL_FILES_DIR_NAME)
551+
return "{}/{}".format(
552+
self.version_path,
553+
constants.MODEL_REGISTRY.MODEL_FILES_DIR_NAME,
554+
)
552555

553556
@property
554557
def shared_registry_project_name(self):

python/hsml/predictor.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Optional, Union
1818

1919
import humps
20-
from hsml import client, deployment, util
20+
from hsml import client, constants, deployment, util
2121
from hsml.constants import (
2222
ARTIFACT_VERSION,
2323
INFERENCE_ENDPOINTS,
@@ -395,9 +395,19 @@ def artifact_version(self):
395395
def artifact_version(self, artifact_version: Union[int, str]):
396396
self._artifact_version = artifact_version
397397

398+
@property
399+
def artifact_files_path(self):
400+
return "{}/{}/{}/{}".format(
401+
self._model_path,
402+
str(self._model_version),
403+
constants.MODEL_SERVING.ARTIFACTS_DIR_NAME,
404+
str(self._artifact_version),
405+
)
406+
398407
@property
399408
def artifact_path(self):
400409
"""Path of the model artifact deployed by the predictor. Resolves to /Projects/{project_name}/Models/{name}/{version}/Artifacts/{artifact_version}/{name}_{version}_{artifact_version}.zip"""
410+
# TODO: Deprecated
401411
artifact_name = "{}_{}_{}.zip".format(
402412
self._model_name, str(self._model_version), str(self._artifact_version)
403413
)

0 commit comments

Comments
 (0)