Skip to content

Commit

Permalink
transformer: replace deprecated etl_meta with etl_args (fix aisto…
Browse files Browse the repository at this point in the history
…re long test)

Signed-off-by: Tony Chen <a122774007@gmail.com>
  • Loading branch information
Nahemah1022 committed Feb 14, 2025
1 parent 914de20 commit db0c5e4
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 29 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/transformer-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ on:
required: true
type: boolean
default: false
build_hash_with_args_image:
description: 'Build hash_with_args transformer image'
required: true
type: boolean
default: false
build_tar2tf_image:
description: 'Build tar2tf transformer image'
required: true
Expand Down Expand Up @@ -61,6 +66,7 @@ env:
ECHO_ENABLE: ${{ github.event.inputs.build_echo_image }}
HELLO_WORLD_ENABLE: ${{ github.event.inputs.build_hello_world_image }}
MD5_ENABLE: ${{ github.event.inputs.build_md5_image }}
HASH_WITH_ARGS_ENABLE: ${{ github.event.inputs.build_hash_with_args_image }}
TAR2TF_ENABLE: ${{ github.event.inputs.build_tar2tf_image }}
COMPRESS_ENABLE: ${{ github.event.inputs.build_compress_image }}
FFMPEG_ENABLE: ${{ github.event.inputs.build_ffmpeg_image }}
Expand Down Expand Up @@ -114,6 +120,9 @@ jobs:
md5:
- transformers/md5/**/*
- transformers/tests/test_md5.py
hash_with_args:
- transformers/hash_with_args/**/*
- transformers/tests/test_hash_with_args.py
tar2tf:
- transformers/tar2tf/**/*
- transformers/tests/test_tar2tf.py
Expand Down Expand Up @@ -141,6 +150,9 @@ jobs:
- name: Update var MD5_ENABLE
if: steps.paths_filter.outputs.md5 == 'true'
run: echo "MD5_ENABLE=true" >> $GITHUB_ENV
- name: Update var HASH_WITH_ARGS_ENABLE
if: steps.paths_filter.outputs.hash_with_args == 'true'
run: echo "HASH_WITH_ARGS_ENABLE=true" >> $GITHUB_ENV
- name: Update var TAR2TF_ENABLE
if: steps.paths_filter.outputs.tar2tf == 'true'
run: echo "TAR2TF_ENABLE=true" >> $GITHUB_ENV
Expand Down Expand Up @@ -202,6 +214,18 @@ jobs:
make -B -C $GITHUB_WORKSPACE/transformers/md5 all
pytest -v test_md5.py
popd
- name: Build and Test HASH_WITH_ARGS
if: ${{ env.HASH_WITH_ARGS_ENABLE == 'true' }}
run: |
docker system prune -a -f --volumes
pushd $GITHUB_WORKSPACE/transformers/tests
export GIT_TEST="true"
make -B -C $GITHUB_WORKSPACE/transformers/hash_with_args all
pytest -v test_hash_with_args.py
export GIT_TEST="false"
make -B -C $GITHUB_WORKSPACE/transformers/hash_with_args all
pytest -v test_hash_with_args.py
popd
- name: Build and Test TAR2TF
if: ${{ env.TAR2TF_ENABLE == 'true' }}
run: |
Expand Down
14 changes: 7 additions & 7 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@ build:md5:
when: always
- when: manual

build:hash_with_metadata:
build:hash_with_args:
extends: .build_template
script:
- cd transformers
- cd hash_with_metadata && make -B all
- cd hash_with_args && make -B all
rules:
- changes:
- transformers/hash_with_metadata/**/*
- transformers/tests/test_hash_with_metadata.py
- transformers/hash_with_args/**/*
- transformers/tests/test_hash_with_args.py
when: always
- when: manual

Expand Down Expand Up @@ -182,11 +182,11 @@ test:md5:
script:
- cd transformers/tests && pytest -v test_md5.py

test:hash_with_metadata:
test:hash_with_args:
extends: .test_template
needs: ["build:hash_with_metadata"]
needs: ["build:hash_with_args"]
script:
- cd transformers/tests && pytest -v test_hash_with_metadata.py
- cd transformers/tests && pytest -v test_hash_with_args.py

test:tar2tf:
extends: .test_template
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ REGISTRY_URL ?= docker.io/aistorage
all: build push

build:
docker build -t $(REGISTRY_URL)/transformer_hash_with_metadata:$(TAG) .
docker build -t $(REGISTRY_URL)/transformer_hash_with_args:$(TAG) .

push:
docker push $(REGISTRY_URL)/transformer_hash_with_metadata:$(TAG)
docker push $(REGISTRY_URL)/transformer_hash_with_args:$(TAG)
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Hash with Metadata Transformer
# Hash with Args Transformer

A simple hash transformer that processes objects (bytes) by extracting ETL metadata from an inline transform request and using it as a seed value to compute a seeded hash. This example demonstrates how to pass custom metadata for each individual object through an ETL inline transform and utilize it within your pod.
A simple hash transformer that processes objects (bytes) by extracting ETL arguments from an inline transform request and using it as a seed value to compute a seeded hash. This example demonstrates how to pass custom metadata for each individual object through an ETL inline transform and utilize it within your pod.

### Initializing ETL with AIStore CLI

The following steps demonstrate how to initialize the `transformer-hash-with-metadata` with using the [AIStore CLI](https://github.com/NVIDIA/aistore/blob/master/docs/cli.md):
The following steps demonstrate how to initialize the `transformer-hash-with-args` with using the [AIStore CLI](https://github.com/NVIDIA/aistore/blob/master/docs/cli.md):

```!bash
$ cd transformers/hash_with_metadata
$ cd transformers/hash_with_args
$ # Mention communication type b/w target and container
$ export COMMUNICATION_TYPE='hpull://'
Expand All @@ -21,6 +21,6 @@ $ ais etl init spec --from-file init_spec.yaml --name <etl-name> --comm-type "hp
$ # Put an object
$ ais object put <your-file> ais://<bck-name>
$ # Transform and retrieve objects from the bucket using this ETL with metadata
$ # Transform and retrieve objects from the bucket using this ETL with arguments
$ curl -L -X GET "${AIS_ENDPOINT}/v1/objects/<bck-name>/<your-file>?etl_name=<etl-name>&etl_meta=100000"
```
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
apiVersion: v1
kind: Pod
metadata:
name: transformer-hash-with-metadata
name: transformer-hash-with-args
annotations:
# Values it can take ["hpull://","hrev://","hpush://"]
communication_type: ${COMMUNICATION_TYPE:-"\"hpull://\""}
wait_timeout: 5m
spec:
containers:
- name: server
image: aistorage/transformer_hash_with_metadata:latest
imagePullPolicy: IfNotPresent
image: aistorage/transformer_hash_with_args:latest
imagePullPolicy: Always
ports:
- name: default
containerPort: 80
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ def do_PUT(self):
post_data = self.rfile.read(content_length)
parsed_url = urlparse(self.path)
seed = seed_default
logging.info("PUT request received")
params = parse_qs(parsed_url.query)
if "etl_meta" in params:
seed = int(params["etl_meta"][0])
if "etl_args" in params:
seed = int(params["etl_args"][0])
logging.info("PUT request with seed %d", seed)

hash_result = self.calculate_xxhash(post_data, seed)
self._set_headers()
Expand All @@ -56,9 +58,11 @@ def do_GET(self):
x = requests.get(host_target + self.path)

seed = seed_default
logging.info("GET request received")
params = parse_qs(parsed_url.query)
if "etl_meta" in params:
seed = int(params["etl_meta"][0])
if "etl_args" in params:
seed = int(params["etl_args"][0])
logging.info("GET request with seed %d", seed)

hash_result = self.calculate_xxhash(x.content, seed)
self._set_headers()
Expand Down
2 changes: 1 addition & 1 deletion transformers/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
aistore
aistore==1.12.1
filetype
keras
numpy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,24 @@
import random

from aistore.sdk.etl.etl_const import ETL_COMM_HPULL, ETL_COMM_HPUSH, ETL_COMM_HREV
from aistore.sdk.etl import ETLConfig

from tests.utils import git_test_mode_format_image_tag_test
from tests.base import TestBase

HASH_WITH_METADATA_SPEC_TEMPLATE = """
HASH_WITH_ARGS_SPEC_TEMPLATE = """
apiVersion: v1
kind: Pod
metadata:
name: transformer-hash-with-metadata
name: transformer-hash-with-args
annotations:
# Values it can take ["hpull://","hrev://","hpush://"]
communication_type: "{communication_type}://"
wait_timeout: 5m
spec:
containers:
- name: server
image: aistorage/transformer_hash_with_metadata:latest
image: aistorage/transformer_hash_with_args:latest
imagePullPolicy: Always
ports:
- name: default
Expand All @@ -38,7 +39,7 @@
value: "{seed_default}"
"""

class TestHashWithMetadataTransformer(TestBase):
class TestHashWithArgsTransformer(TestBase):
def setUp(self):
super().setUp()
self.test_image_filename = "test-image.jpg"
Expand All @@ -57,17 +58,17 @@ def seeded_hash_file(self, filepath, seed):

def compare_transformed_data_with_seeded_hash(self, filename, original_filepath, seed):
transformed_data_bytes = (
self.test_bck.object(filename).get_reader(etl_name=self.test_etl.name).read_all()
self.test_bck.object(filename).get_reader(etl=ETLConfig(name=self.test_etl.name, args=str(seed))).read_all()
)
original_file_hash = self.seeded_hash_file(original_filepath, seed)
self.assertEqual(transformed_data_bytes.decode("utf-8"), original_file_hash)

def run_seeded_hash_test(self, communication_type):
seed_default=random.randint(0, 1000)
template = HASH_WITH_METADATA_SPEC_TEMPLATE.format(communication_type=communication_type, seed_default=seed_default)
template = HASH_WITH_ARGS_SPEC_TEMPLATE.format(communication_type=communication_type, seed_default=seed_default)

if self.git_test_mode == "true":
template = git_test_mode_format_image_tag_test(template, "hash-with-metadata")
template = git_test_mode_format_image_tag_test(template, "hash_with_args")

self.test_etl.init_spec(
template=template, communication_type=communication_type
Expand Down

0 comments on commit db0c5e4

Please sign in to comment.