diff --git a/.github/workflows/transformer-docker.yml b/.github/workflows/transformer-docker.yml index ac7ae7c..3a9e456 100644 --- a/.github/workflows/transformer-docker.yml +++ b/.github/workflows/transformer-docker.yml @@ -18,6 +18,11 @@ on: required: true type: boolean default: false + build_hash_with_args_image: + description: 'Build hash_with_args transformer image' + required: true + type: boolean + default: false build_tar2tf_image: description: 'Build tar2tf transformer image' required: true @@ -61,6 +66,7 @@ env: ECHO_ENABLE: ${{ github.event.inputs.build_echo_image }} HELLO_WORLD_ENABLE: ${{ github.event.inputs.build_hello_world_image }} MD5_ENABLE: ${{ github.event.inputs.build_md5_image }} + HASH_WITH_ARGS_ENABLE: ${{ github.event.inputs.build_hash_with_args_image }} TAR2TF_ENABLE: ${{ github.event.inputs.build_tar2tf_image }} COMPRESS_ENABLE: ${{ github.event.inputs.build_compress_image }} FFMPEG_ENABLE: ${{ github.event.inputs.build_ffmpeg_image }} @@ -114,6 +120,9 @@ jobs: md5: - transformers/md5/**/* - transformers/tests/test_md5.py + hash_with_args: + - transformers/hash_with_args/**/* + - transformers/tests/test_hash_with_args.py tar2tf: - transformers/tar2tf/**/* - transformers/tests/test_tar2tf.py @@ -141,6 +150,9 @@ jobs: - name: Update var MD5_ENABLE if: steps.paths_filter.outputs.md5 == 'true' run: echo "MD5_ENABLE=true" >> $GITHUB_ENV + - name: Update var HASH_WITH_ARGS_ENABLE + if: steps.paths_filter.outputs.hash_with_args == 'true' + run: echo "HASH_WITH_ARGS_ENABLE=true" >> $GITHUB_ENV - name: Update var TAR2TF_ENABLE if: steps.paths_filter.outputs.tar2tf == 'true' run: echo "TAR2TF_ENABLE=true" >> $GITHUB_ENV @@ -202,6 +214,18 @@ jobs: make -B -C $GITHUB_WORKSPACE/transformers/md5 all pytest -v test_md5.py popd + - name: Build and Test HASH_WITH_ARGS + if: ${{ env.HASH_WITH_ARGS_ENABLE == 'true' }} + run: | + docker system prune -a -f --volumes + pushd $GITHUB_WORKSPACE/transformers/tests + export GIT_TEST="true" + make -B -C $GITHUB_WORKSPACE/transformers/hash_with_args all + pytest -v test_hash_with_args.py + export GIT_TEST="false" + make -B -C $GITHUB_WORKSPACE/transformers/hash_with_args all + pytest -v test_hash_with_args.py + popd - name: Build and Test TAR2TF if: ${{ env.TAR2TF_ENABLE == 'true' }} run: | diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b047435..b6950d2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -91,15 +91,15 @@ build:md5: when: always - when: manual -build:hash_with_metadata: +build:hash_with_args: extends: .build_template script: - cd transformers - - cd hash_with_metadata && make -B all + - cd hash_with_args && make -B all rules: - changes: - - transformers/hash_with_metadata/**/* - - transformers/tests/test_hash_with_metadata.py + - transformers/hash_with_args/**/* + - transformers/tests/test_hash_with_args.py when: always - when: manual @@ -182,11 +182,11 @@ test:md5: script: - cd transformers/tests && pytest -v test_md5.py -test:hash_with_metadata: +test:hash_with_args: extends: .test_template - needs: ["build:hash_with_metadata"] + needs: ["build:hash_with_args"] script: - - cd transformers/tests && pytest -v test_hash_with_metadata.py + - cd transformers/tests && pytest -v test_hash_with_args.py test:tar2tf: extends: .test_template diff --git a/transformers/hash_with_metadata/Dockerfile b/transformers/hash_with_args/Dockerfile similarity index 100% rename from transformers/hash_with_metadata/Dockerfile rename to transformers/hash_with_args/Dockerfile diff --git a/transformers/hash_with_metadata/Makefile b/transformers/hash_with_args/Makefile similarity index 53% rename from transformers/hash_with_metadata/Makefile rename to transformers/hash_with_args/Makefile index 2187613..a921601 100644 --- a/transformers/hash_with_metadata/Makefile +++ b/transformers/hash_with_args/Makefile @@ -9,7 +9,7 @@ REGISTRY_URL ?= docker.io/aistorage all: build push build: - docker build -t $(REGISTRY_URL)/transformer_hash_with_metadata:$(TAG) . + docker build -t $(REGISTRY_URL)/transformer_hash_with_args:$(TAG) . push: - docker push $(REGISTRY_URL)/transformer_hash_with_metadata:$(TAG) + docker push $(REGISTRY_URL)/transformer_hash_with_args:$(TAG) diff --git a/transformers/hash_with_metadata/README.md b/transformers/hash_with_args/README.md similarity index 60% rename from transformers/hash_with_metadata/README.md rename to transformers/hash_with_args/README.md index 0a8641e..b51006b 100644 --- a/transformers/hash_with_metadata/README.md +++ b/transformers/hash_with_args/README.md @@ -1,13 +1,13 @@ -# Hash with Metadata Transformer +# Hash with Args Transformer -A simple hash transformer that processes objects (bytes) by extracting ETL metadata from an inline transform request and using it as a seed value to compute a seeded hash. This example demonstrates how to pass custom metadata for each individual object through an ETL inline transform and utilize it within your pod. +A simple hash transformer that processes objects (bytes) by extracting ETL arguments from an inline transform request and using it as a seed value to compute a seeded hash. This example demonstrates how to pass custom metadata for each individual object through an ETL inline transform and utilize it within your pod. ### Initializing ETL with AIStore CLI -The following steps demonstrate how to initialize the `transformer-hash-with-metadata` with using the [AIStore CLI](https://github.com/NVIDIA/aistore/blob/master/docs/cli.md): +The following steps demonstrate how to initialize the `transformer-hash-with-args` with using the [AIStore CLI](https://github.com/NVIDIA/aistore/blob/master/docs/cli.md): ```!bash -$ cd transformers/hash_with_metadata +$ cd transformers/hash_with_args $ # Mention communication type b/w target and container $ export COMMUNICATION_TYPE='hpull://' @@ -21,6 +21,6 @@ $ ais etl init spec --from-file init_spec.yaml --name --comm-type "hp $ # Put an object $ ais object put ais:// -$ # Transform and retrieve objects from the bucket using this ETL with metadata +$ # Transform and retrieve objects from the bucket using this ETL with arguments $ curl -L -X GET "${AIS_ENDPOINT}/v1/objects//?etl_name=&etl_meta=100000" ``` \ No newline at end of file diff --git a/transformers/hash_with_metadata/pod.yaml b/transformers/hash_with_args/pod.yaml similarity index 79% rename from transformers/hash_with_metadata/pod.yaml rename to transformers/hash_with_args/pod.yaml index 0aa6b73..29070ba 100644 --- a/transformers/hash_with_metadata/pod.yaml +++ b/transformers/hash_with_args/pod.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - name: transformer-hash-with-metadata + name: transformer-hash-with-args annotations: # Values it can take ["hpull://","hrev://","hpush://"] communication_type: ${COMMUNICATION_TYPE:-"\"hpull://\""} @@ -9,8 +9,8 @@ metadata: spec: containers: - name: server - image: aistorage/transformer_hash_with_metadata:latest - imagePullPolicy: IfNotPresent + image: aistorage/transformer_hash_with_args:latest + imagePullPolicy: Always ports: - name: default containerPort: 80 diff --git a/transformers/hash_with_metadata/requirements.txt b/transformers/hash_with_args/requirements.txt similarity index 100% rename from transformers/hash_with_metadata/requirements.txt rename to transformers/hash_with_args/requirements.txt diff --git a/transformers/hash_with_metadata/server.py b/transformers/hash_with_args/server.py similarity index 89% rename from transformers/hash_with_metadata/server.py rename to transformers/hash_with_args/server.py index 957363f..1b4cdfb 100755 --- a/transformers/hash_with_metadata/server.py +++ b/transformers/hash_with_args/server.py @@ -34,9 +34,11 @@ def do_PUT(self): post_data = self.rfile.read(content_length) parsed_url = urlparse(self.path) seed = seed_default + logging.info("PUT request received") params = parse_qs(parsed_url.query) - if "etl_meta" in params: - seed = int(params["etl_meta"][0]) + if "etl_args" in params: + seed = int(params["etl_args"][0]) + logging.info("PUT request with seed %d", seed) hash_result = self.calculate_xxhash(post_data, seed) self._set_headers() @@ -56,9 +58,11 @@ def do_GET(self): x = requests.get(host_target + self.path) seed = seed_default + logging.info("GET request received") params = parse_qs(parsed_url.query) - if "etl_meta" in params: - seed = int(params["etl_meta"][0]) + if "etl_args" in params: + seed = int(params["etl_args"][0]) + logging.info("GET request with seed %d", seed) hash_result = self.calculate_xxhash(x.content, seed) self._set_headers() diff --git a/transformers/tests/requirements.txt b/transformers/tests/requirements.txt index cc953b6..0917b32 100644 --- a/transformers/tests/requirements.txt +++ b/transformers/tests/requirements.txt @@ -1,4 +1,4 @@ -aistore +aistore==1.12.1 filetype keras numpy diff --git a/transformers/tests/test_hash_with_metadata.py b/transformers/tests/test_hash_with_args.py similarity index 85% rename from transformers/tests/test_hash_with_metadata.py rename to transformers/tests/test_hash_with_args.py index 10dfc43..6b5bff2 100644 --- a/transformers/tests/test_hash_with_metadata.py +++ b/transformers/tests/test_hash_with_args.py @@ -7,15 +7,16 @@ import random from aistore.sdk.etl.etl_const import ETL_COMM_HPULL, ETL_COMM_HPUSH, ETL_COMM_HREV +from aistore.sdk.etl import ETLConfig from tests.utils import git_test_mode_format_image_tag_test from tests.base import TestBase -HASH_WITH_METADATA_SPEC_TEMPLATE = """ +HASH_WITH_ARGS_SPEC_TEMPLATE = """ apiVersion: v1 kind: Pod metadata: - name: transformer-hash-with-metadata + name: transformer-hash-with-args annotations: # Values it can take ["hpull://","hrev://","hpush://"] communication_type: "{communication_type}://" @@ -23,7 +24,7 @@ spec: containers: - name: server - image: aistorage/transformer_hash_with_metadata:latest + image: aistorage/transformer_hash_with_args:latest imagePullPolicy: Always ports: - name: default @@ -38,7 +39,7 @@ value: "{seed_default}" """ -class TestHashWithMetadataTransformer(TestBase): +class TestHashWithArgsTransformer(TestBase): def setUp(self): super().setUp() self.test_image_filename = "test-image.jpg" @@ -57,17 +58,17 @@ def seeded_hash_file(self, filepath, seed): def compare_transformed_data_with_seeded_hash(self, filename, original_filepath, seed): transformed_data_bytes = ( - self.test_bck.object(filename).get_reader(etl_name=self.test_etl.name).read_all() + self.test_bck.object(filename).get_reader(etl=ETLConfig(name=self.test_etl.name, args=str(seed))).read_all() ) original_file_hash = self.seeded_hash_file(original_filepath, seed) self.assertEqual(transformed_data_bytes.decode("utf-8"), original_file_hash) def run_seeded_hash_test(self, communication_type): seed_default=random.randint(0, 1000) - template = HASH_WITH_METADATA_SPEC_TEMPLATE.format(communication_type=communication_type, seed_default=seed_default) + template = HASH_WITH_ARGS_SPEC_TEMPLATE.format(communication_type=communication_type, seed_default=seed_default) if self.git_test_mode == "true": - template = git_test_mode_format_image_tag_test(template, "hash-with-metadata") + template = git_test_mode_format_image_tag_test(template, "hash_with_args") self.test_etl.init_spec( template=template, communication_type=communication_type