Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into develop
Browse files Browse the repository at this point in the history
# Conflicts:
#	pyproject.toml
  • Loading branch information
frankinspace committed Feb 4, 2025
2 parents 3c02342 + bd00490 commit 37ffe73
Show file tree
Hide file tree
Showing 19 changed files with 540 additions and 760 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
directly in s3 bucket instead of requiring data to be copied.
- [issues/41](https://github.com/podaac/bignbit/issues/41): Module no longer depends on deprecated hashicorp/template provider
- [issues/42](https://github.com/podaac/bignbit/issues/42): Terraform version upgraded to v1.5.3
- Default values for `config_dir` and `bignbit_audit_path` have changed to `big-config` and `bignbit-cnm-output` respectively
### Deprecated
### Removed
- [issues/7](https://github.com/podaac/bignbit/issues/15): Remove the wait for GITC response
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ This module uses the following input variables:
| prefix | string | Prefix used for resource naming (project name, env name, etc...) | |
| data_buckets | list(string) | List of buckets where data is stored. Lambdas will be given read/write access to these buckets. | [] |
| config_bucket | string | Bucket where dataset configuration is stored | |
| config_dir | string | Path relative to `config_bucket` where dataset configuration is stored | "datset-config" |
| pobit_audit_bucket | string | S3 bucket where messages exchanged with GITC will be saved. Typically the cumulus internal bucket | |
| pobit_audit_path | string | Path relative to `pobit_audit_bucket` where messages exchanged with GITC will be saved. | "pobit-cma-output" |
| config_dir | string | Path relative to `config_bucket` where dataset configuration is stored | "big-config" |
| bignbit_audit_bucket | string | S3 bucket where messages exchanged with GITC will be saved. Typically the cumulus internal bucket | |
| bignbit_audit_path | string | Path relative to `bignbit_audit_bucket` where messages exchanged with GITC will be saved. | "bignbit-cnm-output" |
| bignbit_staging_bucket | string | S3 bucket where generated images will be saved. Leave blank to use bucket managed by this module. | _create new bucket named svc-${var.app_name}-${var.prefix}-staging_ |
| harmony_staging_path | string | Path relative to `bignbit_staging_bucket` where harmony results will be saved. | "bignbit-harmony-output" |
| gibs_region | string | Region where GIBS resources are deployed | |
Expand All @@ -157,8 +157,8 @@ This module supplies the following outputs:
| pobit_handle_gitc_response_arn | ARN of the lambda function | aws_lambda_function.handle_gitc_response.arn |
| pobit_gibs_topic | ARN of SNS topic GIBS replies to | aws_sns_topic.gibs_response_topic.arn |
| pobit_gibs_queue | ARN of SQS queue GIBS replies are published to | aws_sqs_queue.gibs_response_queue.arn |
| pobit_audit_bucket | Name of bucket where messages exchanged with GIBS are stored | var.pobit_audit_bucket |
| pobit_audit_path | Path relative to audit bucket where messages with GIBS are stored | var.pobit_audit_path |
| bignbit_audit_bucket | Name of bucket where messages exchanged with GIBS are stored | var.bignbit_audit_bucket |
| bignbit_audit_path | Path relative to audit bucket where messages with GIBS are stored | var.bignbit_audit_path |
| get_dataset_configuration_arn | ARN of the lambda function | aws_lambda_function.get_dataset_configuration.arn |
| get_granule_umm_json_arn | ARN of the lambda function | aws_lambda_function.get_granule_umm_json.arn |
| get_collection_concept_id_arn | ARN of the lambda function | aws_lambda_function.get_collection_concept_id.arn |
Expand Down
4 changes: 3 additions & 1 deletion bignbit/apply_opera_hls_treatment.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ def create_file_metadata(transformed_images_filepaths: List[pathlib.Path], stagi
"fileName": transformed_image.name,
"bucket": staging_bucket,
"key": f'opera_hls_processing/{datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d")}/{transformed_image.name}',
"local_filepath": str(transformed_image.resolve())
"local_filepath": str(transformed_image.resolve()),
"checksum": utils.sha512sum(transformed_image),
"checksumType": "SHA512"
}

new_cma_file_meta_list.append(file_dict)
Expand Down
13 changes: 12 additions & 1 deletion bignbit/get_dataset_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
CUMULUS_LOGGER = CumulusLogger('get_dataset_configuration')


class MissingDatasetConfiguration(Exception):
"""
Exception for missing dataset configuration
"""


class CMA(Process):
"""
A cumulus message adapter
Expand Down Expand Up @@ -58,7 +64,12 @@ def get_collection_config(config_bucket_name: str, config_key_name: str) -> dict
"""
s3_client = boto3.client('s3')

object_result = s3_client.get_object(Bucket=config_bucket_name, Key=config_key_name)
try:
object_result = s3_client.get_object(Bucket=config_bucket_name, Key=config_key_name)
except s3_client.exceptions.NoSuchKey as ex:
raise MissingDatasetConfiguration(
f"Dataset configuration not found s3://{config_bucket_name}/{config_key_name}") from ex

return json.load(object_result['Body'])


Expand Down
6 changes: 3 additions & 3 deletions bignbit/handle_gitc_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def handler(event, _):
umm_json = utils.get_umm_json(granule_concept_id, cmr_env)
granule_ur = umm_json['GranuleUR']

cnm_key_name = os.environ['POBIT_AUDIT_PATH_NAME'] + "/" + collection_name + "/" + granule_ur + "." + message_body['submissionTime'] + "." + "cnm-r.json"
cnm_key_name = os.environ['BIGNBIT_AUDIT_PATH_NAME'] + "/" + collection_name + "/" + granule_ur + "." + message_body['submissionTime'] + "." + "cnm-r.json"

utils.upload_cnm(os.environ['POBIT_AUDIT_BUCKET_NAME'], cnm_key_name, json.dumps(message_body))
s3_path = utils.upload_cnm(os.environ['BIGNBIT_AUDIT_BUCKET_NAME'], cnm_key_name, json.dumps(message_body))

logging.debug('CNM-R uploaded to s3 audit bucket for id %s', gitc_id)
logging.info('CNM-R uploaded to %s for id %s', s3_path, gitc_id)

return {"statusCode": 200, "body": "All good"}
13 changes: 12 additions & 1 deletion bignbit/process_harmony_results.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Cumulus lambda class to extract details about the results from the Harmony job"""
import hashlib
import logging
import os
from urllib.parse import urlparse

import boto3
from cumulus_logger import CumulusLogger
from cumulus_process import Process
from harmony import LinkType
Expand Down Expand Up @@ -55,6 +57,7 @@ def process_results(harmony_job_id: str, cmr_env: str, variable: str):
dict
A list of CMA file dictionaries pointing to the transformed image(s)
"""
s3_client = boto3.client('s3')
harmony_client = utils.get_harmony_client(cmr_env)
result_urls = list(harmony_client.result_urls(harmony_job_id, link_type=LinkType.s3))

Expand All @@ -64,11 +67,19 @@ def process_results(harmony_job_id: str, cmr_env: str, variable: str):
file_dicts = []
for url in result_urls:
bucket, key = urlparse(url).netloc, urlparse(url).path.lstrip("/")

response = s3_client.get_object(Bucket=bucket, Key=key)
md5_hash = hashlib.new('md5')
for chunk in response['Body'].iter_chunks(chunk_size=100 * 1024 * 1024): # 100 MB chunk size
md5_hash.update(chunk)

filename = key.split("/")[-1]
file_dict = {
"fileName": filename,
"bucket": bucket,
"key": key
"key": key,
"checksum": md5_hash.hexdigest(),
"checksumType": 'md5'
}
# Weird quirk where if we are working with a collection that doesn't define variables, the Harmony request
# should specify 'all' as the variable value but the GIBS message should be sent with the variable set to 'none'
Expand Down
14 changes: 7 additions & 7 deletions bignbit/save_cnm_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,28 @@ def process(self):
Same input sent to this function
"""
pobit_audit_bucket = self.config['pobit_audit_bucket']
pobit_audit_path = self.config['pobit_audit_path']
bignbit_audit_bucket = self.config['bignbit_audit_bucket']
bignbit_audit_path = self.config['bignbit_audit_path']

granule_ur = self.config['granule_ur']

cnm_content = self.config['cnm']
collection_name = cnm_content['collection']

cnm_key_name = pobit_audit_path + "/" + collection_name + "/" + granule_ur + "." + cnm_content['submissionTime'] + "." + "cnm.json"
cnm_key_name = bignbit_audit_path + "/" + collection_name + "/" + granule_ur + "." + cnm_content['submissionTime'] + "." + "cnm.json"

upload_cnm(pobit_audit_bucket, cnm_key_name, cnm_content)
upload_cnm(bignbit_audit_bucket, cnm_key_name, cnm_content)

return self.input


def upload_cnm(pobit_audit_bucket: str, cnm_key_name: str, cnm_content: dict):
def upload_cnm(bignbit_audit_bucket: str, cnm_key_name: str, cnm_content: dict):
"""
Upload CNM message into a s3 bucket
Parameters
----------
pobit_audit_bucket: str
bignbit_audit_bucket: str
Bucket name containing where CNM should be uploaded
cnm_key_name: str
Expand All @@ -66,7 +66,7 @@ def upload_cnm(pobit_audit_bucket: str, cnm_key_name: str, cnm_content: dict):
s3_client = boto3.client('s3')
s3_client.put_object(
Body=json.dumps(cnm_content, default=str).encode("utf-8"),
Bucket=pobit_audit_bucket,
Bucket=bignbit_audit_bucket,
Key=cnm_key_name
)

Expand Down
6 changes: 3 additions & 3 deletions bignbit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def sha512sum(filepath: pathlib.Path):
return hash512.hexdigest()


def upload_cnm(bucket_name: str, key_name: str, cnm_content: dict):
def upload_cnm(bucket_name: str, key_name: str, cnm_content: str) -> str:
"""
Upload CNM message into a s3 bucket
Expand All @@ -172,12 +172,12 @@ def upload_cnm(bucket_name: str, key_name: str, cnm_content: dict):
key_name: str
Key to object location in bucket
cnm_content: dict
cnm_content: str
The CNM message to upload
Returns
-------
None
S3 URI of new object
"""
s3_client = boto3.client('s3')
s3_client.put_object(
Expand Down
4 changes: 2 additions & 2 deletions examples/cumulus-tf/browse_image_workflow.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ module "bignbit_module" {
data_buckets = [aws_s3_bucket.protected.id, aws_s3_bucket.public.id, aws_s3_bucket.private.id]

config_bucket = aws_s3_bucket.internal.bucket
config_dir = "dataset-config"
config_dir = "big-config"

pobit_audit_bucket = aws_s3_bucket.internal.bucket
bignbit_audit_bucket = aws_s3_bucket.internal.bucket

gibs_region = var.gibs_region == "mocked" ? "us-west-2" : var.gibs_region
gibs_queue_name = var.gibs_queue_name == "mocked" ? aws_sqs_queue.gitc_input_queue[0].name : var.gibs_queue_name
Expand Down
4 changes: 2 additions & 2 deletions examples/cumulus-tf/tfvars/uat.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
bignbit_stage = "uat"
prefix = "podaac-uat-svc"

gibs_region="us-west-2"
gibs_queue_name="gitc-uat-PODAAC_IN.fifo"
gibs_region="us-east-1"
gibs_queue_name="gitc-uat-PODAAC-IN.fifo"
Loading

0 comments on commit 37ffe73

Please sign in to comment.