diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index d9ea3dd0..48d9abd4 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -4,12 +4,14 @@ # Parameter cwl_workflow: the URL of the CWL workflow to execute. # Parameter args_as_json: JSON string contained the specific values for the workflow specific inputs. import json +import uuid from datetime import datetime -from airflow import DAG -from kubernetes.client import models as k8s -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator + from airflow.models.param import Param -import uuid +from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator +from kubernetes.client import models as k8s + +from airflow import DAG # The Kubernetes Pod that executes the CWL-Docker container # Must use elevated privileges to start/stop the Docker engine @@ -19,35 +21,37 @@ POD_NAMESPACE = "airflow" # Example arguments -default_cwl_workflow = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" -default_args_as_json_dict = {"input_processing_labels": ["label1", "label2"], - "input_cmr_stac": "https://cmr.earthdata.nasa.gov/search/granules.stac?collection_concept_id=C2408009906-LPCLOUD&temporal[]=2023-08-10T03:41:03.000Z,2023-08-10T03:41:03.000Z", - "input_unity_dapa_client": "40c2s0ulbhp9i0fmaph3su9jch", - "input_unity_dapa_api": "https://d3vc8w9zcq658.cloudfront.net", - "input_crid": "001", - "output_collection_id": "urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", - "output_data_bucket": "sps-dev-ds-storage"} +default_cwl_workflow = ( + "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" +) +default_args_as_json_dict = { + "input_processing_labels": ["label1", "label2"], + "input_cmr_stac": "https://cmr.earthdata.nasa.gov/search/granules.stac?collection_concept_id=C2408009906-LPCLOUD&temporal[]=2023-08-10T03:41:03.000Z,2023-08-10T03:41:03.000Z", + "input_unity_dapa_client": "40c2s0ulbhp9i0fmaph3su9jch", + "input_unity_dapa_api": "https://d3vc8w9zcq658.cloudfront.net", + "input_crid": "001", + "output_collection_id": "urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", + "output_data_bucket": "sps-dev-ds-storage", +} # Default DAG configuration -dag_default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'start_date': datetime(2024, 1, 1, 0, 0) -} +dag_default_args = {"owner": "unity-sps", "depends_on_past": False, "start_date": datetime(2024, 1, 1, 0, 0)} # The DAG -dag = DAG(dag_id='cwl-dag', - description='DAG to execute a generic CWL workflow', - tags=['cwl', 'unity-sps', "docker"], - is_paused_upon_creation=True, - catchup=False, - schedule_interval=None, - max_active_runs=1, - default_args=dag_default_args, - params={ - "cwl_workflow": Param(default_cwl_workflow, type="string"), - "args_as_json": Param(json.dumps(default_args_as_json_dict), type="string"), - }) +dag = DAG( + dag_id="cwl-dag", + description="DAG to execute a generic CWL workflow", + tags=["cwl", "unity-sps", "docker"], + is_paused_upon_creation=True, + catchup=False, + schedule_interval=None, + max_active_runs=1, + default_args=dag_default_args, + params={ + "cwl_workflow": Param(default_cwl_workflow, type="string"), + "args_as_json": Param(json.dumps(default_args_as_json_dict), type="string"), + }, +) # Environment variables default_env_vars = {} @@ -62,9 +66,9 @@ get_logs=True, task_id="docker-cwl-task", full_pod_spec=k8s.V1Pod( - metadata=k8s.V1ObjectMeta( - name='docker-cwl-pod-' + uuid.uuid4().hex), + metadata=k8s.V1ObjectMeta(name="docker-cwl-pod-" + uuid.uuid4().hex), ), pod_template_file=POD_TEMPLATE_FILE, arguments=["{{ params.cwl_workflow }}", "{{ params.args_as_json }}"], - dag=dag) + dag=dag, +) diff --git a/airflow/dags/sbg_preprocess_cwl_dag.py b/airflow/dags/sbg_preprocess_cwl_dag.py index e2b44a78..b9012954 100644 --- a/airflow/dags/sbg_preprocess_cwl_dag.py +++ b/airflow/dags/sbg_preprocess_cwl_dag.py @@ -42,10 +42,10 @@ params={ "cwl_workflow": Param(CWL_WORKFLOW, type="string"), "input_cmr_stac": Param(CMR_STAC, type="string"), - #"input_processing_labels": Param(["label1", "label2"], type="string[]"), - #"input_cmr_collection_name": Param("C2408009906-LPCLOUD", type="string"), - #"input_cmr_search_start_time": Param("2024-01-03T13:19:36.000Z", type="string"), - #"input_cmr_search_stop_time": Param("2024-01-03T13:19:36.000Z", type="string"), + # "input_processing_labels": Param(["label1", "label2"], type="string[]"), + # "input_cmr_collection_name": Param("C2408009906-LPCLOUD", type="string"), + # "input_cmr_search_start_time": Param("2024-01-03T13:19:36.000Z", type="string"), + # "input_cmr_search_stop_time": Param("2024-01-03T13:19:36.000Z", type="string"), "input_unity_dapa_api": Param("https://d3vc8w9zcq658.cloudfront.net", type="string"), "input_unity_dapa_client": Param("40c2s0ulbhp9i0fmaph3su9jch", type="string"), "input_crid": Param("001", type="string"), @@ -54,6 +54,7 @@ }, ) + # Task that serializes the job arguments into a JSON string def setup(ti=None, **context): task_dict = { @@ -90,4 +91,4 @@ def setup(ti=None, **context): dag=dag, ) -setup_task >> cwl_task \ No newline at end of file +setup_task >> cwl_task diff --git a/airflow/dags/sbg_preprocess_no_cwl.py b/airflow/dags/sbg_preprocess_no_cwl.py index 3d21d9a4..5c4159ff 100644 --- a/airflow/dags/sbg_preprocess_no_cwl.py +++ b/airflow/dags/sbg_preprocess_no_cwl.py @@ -1,31 +1,31 @@ # DAG for SBG Workflow #1 # See https://github.com/unity-sds/sbg-workflows/blob/main/preprocess/sbg-preprocess-workflow.cwl -import os +import json +import uuid from datetime import datetime -from airflow import DAG -from kubernetes.client import models as k8s -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator -from airflow.operators.python import PythonOperator + from airflow.models.param import Param -import uuid -import json +from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator +from kubernetes.client import models as k8s + +from airflow import DAG # Fixed Parameters UNITY_DS_IMAGE = "ghcr.io/unity-sds/unity-data-services:6.4.3" SBG_PREPROCESS_IMAGE = "gangl/sbg-unity-preprocess:266e40d8" -COGNITO_URL = 'https://cognito-idp.us-west-2.amazonaws.com' -UNITY_USERNAME = '' -UNITY_PASSWORD = '' -UNITY_PASSWORD_TYPE = '' -DOWNLOAD_DIR = '/scratch/granules' -DOWNLOADING_KEYS = 'data, data1' -GRANULES_DOWNLOAD_TYPE = 'DAAC' +COGNITO_URL = "https://cognito-idp.us-west-2.amazonaws.com" +UNITY_USERNAME = "" +UNITY_PASSWORD = "" +UNITY_PASSWORD_TYPE = "" +DOWNLOAD_DIR = "/scratch/granules" +DOWNLOADING_KEYS = "data, data1" +GRANULES_DOWNLOAD_TYPE = "DAAC" -EDL_USERNAME = '/sps/processing/workflows/edl_username' -EDL_PASSWORD = '/sps/processing/workflows/edl_password' -EDL_PASSWORD_TYPE = 'PARAM_STORE' -EDL_BASE_URL = 'https://urs.earthdata.nasa.gov/' -LOG_LEVEL = '20' +EDL_USERNAME = "/sps/processing/workflows/edl_username" +EDL_PASSWORD = "/sps/processing/workflows/edl_password" +EDL_PASSWORD_TYPE = "PARAM_STORE" +EDL_BASE_URL = "https://urs.earthdata.nasa.gov/" +LOG_LEVEL = "20" # This path must be inside the shared Persistent Volume @@ -33,56 +33,46 @@ STAGE_IN_RESULTS = "/scratch/granules/stage-in-results.json" # Venue dependent parameters -CLIENT_ID = '40c2s0ulbhp9i0fmaph3su9jch' -DAPA_API = 'https://d3vc8w9zcq658.cloudfront.net' -STAGING_BUCKET = 'sps-dev-ds-storage' +CLIENT_ID = "40c2s0ulbhp9i0fmaph3su9jch" +DAPA_API = "https://d3vc8w9zcq658.cloudfront.net" +STAGING_BUCKET = "sps-dev-ds-storage" -#CWL_URL = "http://awslbdockstorestack-lb-1429770210.us-west-2.elb.amazonaws.com:9998/api/ga4gh/trs/v2/tools/%23workflow%2Fdockstore.org%2Fmike-gangl%2FSBG-unity-preprocess/versions/16/PLAIN-CWL/descriptor/%2Fprocess.cwl" +# CWL_URL = "http://awslbdockstorestack-lb-1429770210.us-west-2.elb.amazonaws.com:9998/api/ga4gh/trs/v2/tools/%23workflow%2Fdockstore.org%2Fmike-gangl%2FSBG-unity-preprocess/versions/16/PLAIN-CWL/descriptor/%2Fprocess.cwl" CWL_URL = "https://raw.githubusercontent.com/unity-sds/unity-sps-workflows/sbg/sbg/process.cwl" # YAML_FILE = "/scratch/process.yaml" -ARGS = { - "download_dir": { - "class": "Directory", - "path": "/scratch/granules" - } -} +ARGS = {"download_dir": {"class": "Directory", "path": "/scratch/granules"}} # Default DAG configuration -dag_default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'start_date': datetime(2024, 1, 1, 0, 0) -} +dag_default_args = {"owner": "unity-sps", "depends_on_past": False, "start_date": datetime(2024, 1, 1, 0, 0)} volume = k8s.V1Volume( - name='unity-sps-airflow-pv', - persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='unity-sps-airflow-pvc') + name="unity-sps-airflow-pv", + persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name="unity-sps-airflow-pvc"), ) volume_mount = k8s.V1VolumeMount( - name='unity-sps-airflow-pv', - mount_path='/scratch', - sub_path=None, - read_only=False + name="unity-sps-airflow-pv", mount_path="/scratch", sub_path=None, read_only=False ) -dag = DAG(dag_id='sbg-preprocess-no-cwl', - description='SBG Preprocess Workflow', - tags=["SBG", "Unity", "SPS", "NASA", "JPL"], - is_paused_upon_creation=True, - catchup=False, - schedule=None, - max_active_runs=1, - default_args=dag_default_args, - params={ - "input_cmr_collection_name": Param("C2408009906-LPCLOUD", type="string"), - "input_cmr_search_start_time": Param("2023-08-10T03:41:03.000Z", type="string"), - "input_cmr_search_stop_time": Param("2023-08-10T03:41:03.000Z", type="string"), - "input_crid": Param("001", type="string"), - "output_collection_id": Param("urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", type="string"), - "output_data_bucket": Param("sps-dev-ds-storage", type="string") - }, ) +dag = DAG( + dag_id="sbg-preprocess-no-cwl", + description="SBG Preprocess Workflow", + tags=["SBG", "Unity", "SPS", "NASA", "JPL"], + is_paused_upon_creation=True, + catchup=False, + schedule=None, + max_active_runs=1, + default_args=dag_default_args, + params={ + "input_cmr_collection_name": Param("C2408009906-LPCLOUD", type="string"), + "input_cmr_search_start_time": Param("2023-08-10T03:41:03.000Z", type="string"), + "input_cmr_search_stop_time": Param("2023-08-10T03:41:03.000Z", type="string"), + "input_crid": Param("001", type="string"), + "output_collection_id": Param("urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", type="string"), + "output_data_bucket": Param("sps-dev-ds-storage", type="string"), + }, +) cmr_query_env_vars = [ # k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=""), @@ -120,18 +110,18 @@ startup_timeout_seconds=1000, get_logs=True, task_id="CMR_Query", - full_pod_spec=k8s.V1Pod( - k8s.V1ObjectMeta(name=('cmr-query-pod-' + uuid.uuid4().hex))), + full_pod_spec=k8s.V1Pod(k8s.V1ObjectMeta(name=("cmr-query-pod-" + uuid.uuid4().hex))), do_xcom_push=True, volumes=[volume], volume_mounts=[volume_mount], - dag=dag) + dag=dag, +) stage_in_env_vars = [ - #k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=AWS_ACCESS_KEY_ID), - #k8s.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value=AWS_SECRET_ACCESS_KEY), - #k8s.V1EnvVar(name="AWS_SESSION_TOKEN", value=AWS_SESSION_TOKEN), - #k8s.V1EnvVar(name="AWS_REGION", value=AWS_REGION), + # k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=AWS_ACCESS_KEY_ID), + # k8s.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value=AWS_SECRET_ACCESS_KEY), + # k8s.V1EnvVar(name="AWS_SESSION_TOKEN", value=AWS_SESSION_TOKEN), + # k8s.V1EnvVar(name="AWS_REGION", value=AWS_REGION), k8s.V1EnvVar(name="PYTHONUNBUFFERED", value="1"), # k8s.V1EnvVar(name="USERNAME", value=UNITY_USERNAME), # k8s.V1EnvVar(name="PASSWORD", value=UNITY_PASSWORD), @@ -166,12 +156,12 @@ startup_timeout_seconds=1000, get_logs=True, task_id="Stage_In", - full_pod_spec=k8s.V1Pod( - k8s.V1ObjectMeta(name=('stage-in-pod-' + uuid.uuid4().hex))), + full_pod_spec=k8s.V1Pod(k8s.V1ObjectMeta(name=("stage-in-pod-" + uuid.uuid4().hex))), # do_xcom_push=True, volumes=[volume], volume_mounts=[volume_mount], - dag=dag) + dag=dag, +) # ref:http://awslbdockstorestack-lb-1429770210.us-west-2.elb.amazonaws.com:9998/api/ga4gh/trs/v2/tools/%23workflow%2Fdockstore.org%2Fmike-gangl%2FSBG-unity-preprocess/versions/16/PLAIN-CWL/descriptor/%2Fprocess.cwl @@ -191,12 +181,12 @@ startup_timeout_seconds=1000, get_logs=True, task_id="Process", - full_pod_spec=k8s.V1Pod( - k8s.V1ObjectMeta(name=('process-pod-' + uuid.uuid4().hex))), + full_pod_spec=k8s.V1Pod(k8s.V1ObjectMeta(name=("process-pod-" + uuid.uuid4().hex))), # do_xcom_push=True, volumes=[volume], volume_mounts=[volume_mount], - dag=dag) + dag=dag, +) stage_out_env_vars = [ # k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value=AWS_ACCESS_KEY_ID), @@ -232,15 +222,15 @@ startup_timeout_seconds=1000, get_logs=True, task_id="Stage_Out", - full_pod_spec=k8s.V1Pod( - k8s.V1ObjectMeta(name=('stage-out-pod-' + uuid.uuid4().hex))), + full_pod_spec=k8s.V1Pod(k8s.V1ObjectMeta(name=("stage-out-pod-" + uuid.uuid4().hex))), # do_xcom_push=True, volumes=[volume], volume_mounts=[volume_mount], - dag=dag) + dag=dag, +) -''' +""" def preprocess(ti=None, **context): # cmr_query = ti.xcom_pull(task_ids=['CMR_Query'])[0] #print(cmr_query) @@ -250,6 +240,6 @@ def preprocess(ti=None, **context): preprocess_task = PythonOperator(task_id="Preprocess", python_callable=preprocess, dag=dag) -''' +""" cmr_query_task >> stage_in_task >> process_task >> stage_out_task diff --git a/terraform-unity/modules/terraform-eks-cluster/README.md b/terraform-unity/modules/terraform-eks-cluster/README.md index 421c19f7..92728946 100644 --- a/terraform-unity/modules/terraform-eks-cluster/README.md +++ b/terraform-unity/modules/terraform-eks-cluster/README.md @@ -19,7 +19,7 @@ | Name | Source | Version | |------|--------|---------| -| [unity-eks](#module\_unity-eks) | git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module | 0.1.3 | +| [unity-eks](#module\_unity-eks) | git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module | main | ## Resources @@ -35,6 +35,7 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [cluster\_name](#input\_cluster\_name) | n/a | `string` | n/a | yes | +| [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({|
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
}))
{| no | ## Outputs diff --git a/terraform-unity/modules/terraform-eks-cluster/main.tf b/terraform-unity/modules/terraform-eks-cluster/main.tf index 3b96d9d8..09df65c6 100644 --- a/terraform-unity/modules/terraform-eks-cluster/main.tf +++ b/terraform-unity/modules/terraform-eks-cluster/main.tf @@ -1,15 +1,9 @@ module "unity-eks" { - source = "git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=0.1.3" + source = "git@github.com:unity-sds/unity-cs-infra.git//terraform-unity-eks_module?ref=main" deployment_name = var.cluster_name - nodegroups = { - defaultGroup = { - instance_types = ["m5.xlarge"] - min_size = 1 - max_size = 1 - desired_size = 1 - } - } + nodegroups = var.nodegroups + aws_auth_roles = [{ rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/mcp-tenantOperator" username = "admin" diff --git a/terraform-unity/modules/terraform-eks-cluster/variables.tf b/terraform-unity/modules/terraform-eks-cluster/variables.tf index abbf86f7..a47859f6 100644 --- a/terraform-unity/modules/terraform-eks-cluster/variables.tf +++ b/terraform-unity/modules/terraform-eks-cluster/variables.tf @@ -1,3 +1,33 @@ variable "cluster_name" { type = string } + +variable "nodegroups" { + description = "A map of node group configurations" + + type = map(object({ + create_iam_role = optional(bool) + iam_role_arn = optional(string) + ami_id = optional(string) + min_size = optional(number) + max_size = optional(number) + desired_size = optional(number) + instance_types = optional(list(string)) + capacity_type = optional(string) + enable_bootstrap_user_data = optional(bool) + metadata_options = optional(map(any)) + })) + + default = { + defaultGroup = { + instance_types = ["m5.xlarge"] + min_size = 1 + max_size = 1 + desired_size = 1 + metadata_options = { + "http_endpoint" : "enabled", + "http_put_response_hop_limit" : 3, + } + } + } +} diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/.terraform.lock.hcl b/terraform-unity/modules/terraform-unity-sps-airflow/.terraform.lock.hcl index c9c721dc..0adb6929 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/.terraform.lock.hcl +++ b/terraform-unity/modules/terraform-unity-sps-airflow/.terraform.lock.hcl @@ -2,24 +2,25 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/aws" { - version = "5.36.0" + version = "5.35.0" + constraints = "5.35.0" hashes = [ - "h1:54QgAU2vY65WZsiZ9FligQfIf7hQUvwse4ezMwVMwgg=", - "zh:0da8409db879b2c400a7d9ed1311ba6d9eb1374ea08779eaf0c5ad0af00ac558", - "zh:1b7521567e1602bfff029f88ccd2a182cdf97861c9671478660866472c3333fa", - "zh:1cab4e6f3a1d008d01df44a52132a90141389e77dbb4ec4f6ac1119333242ecf", - "zh:1df9f73595594ce8293fb21287bcacf5583ae82b9f3a8e5d704109b8cf691646", - "zh:2b5909268db44b6be95ff6f9dc80d5f87ca8f63ba530fe66723c5fdeb17695fc", - "zh:37dd731eeb0bc1b20e3ec3a0cb5eb7a730edab425058ff40f2243438acc82830", - "zh:3e94c76a2b607a1174d10f5712aed16cb32216ac1c91bd6f21749d61a14045ac", - "zh:40e6ba3184d2d3bf283a07feed8b79c1bbc537a91215cac7b3521b9ccb3e503e", - "zh:67e52353fea47eb97825f6eb6fddd1935e0ff3b53a8861d23a70c2babf83ae51", - "zh:6d2e2f390e0c7b2cd2344b1d5d6eec8a1c11cf35d19f1d6f341286f2449e9e10", - "zh:7005483c43926800fad5bb18e27be883dac4339edb83a8f18ccdc7edf86fafc2", - "zh:7073fa7ccaa9b07c2cf7b24550a90e11f4880afd5c53afd51278eff0154692a0", + "h1:MKNFmhsOIirK7Qzr6TWkVaBcVGN81lCU0BPiaPOeQ8s=", + "zh:3a2a6f40db82d30ea8c5e3e251ca5e16b08e520570336e7e342be823df67e945", + "zh:420a23b69b412438a15b8b2e2c9aac2cf2e4976f990f117e4bf8f630692d3949", + "zh:4d8b887f6a71b38cff77ad14af9279528433e279eed702d96b81ea48e16e779c", + "zh:4edd41f8e1c7d29931608a7b01a7ae3d89d6f95ef5502cf8200f228a27917c40", + "zh:6337544e2ded5cf37b55a70aa6ce81c07fd444a2644ff3c5aad1d34680051bdc", + "zh:668faa3faaf2e0758bf319ea40d2304340f4a2dc2cd24460ddfa6ab66f71b802", + "zh:79ddc6d7c90e59fdf4a51e6ea822ba9495b1873d6a9d70daf2eeaf6fc4eb6ff3", + "zh:885822027faf1aa57787f980ead7c26e7d0e55b4040d926b65709b764f804513", + "zh:8c50a8f397b871388ff2e048f5eb280af107faa2e8926694f1ffd9f32a7a7cdf", "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:a6d48620e526c766faec9aeb20c40a98c1810c69b6699168d725f721dfe44846", - "zh:e29b651b5f39324656f466cd24a54861795cc423a1b58372f4e1d2d2112d10a0", + "zh:a2f5d2553df5573a060641f18ee7585587047c25ba73fd80617f59b5893d22b4", + "zh:c43833ae2a152213ee92eb5be7653f9493779eddbe0ce403ea49b5f1d87fd766", + "zh:dab01527a3a55b4f0f958af6f46313d775e27f9ad9d10bedbbfea4a35a06dc5f", + "zh:ed49c65620ec42718d681a7fc00c166c295ff2795db6cede2c690b83f9fb3e65", + "zh:f0a358c0ae1087c466d0fbcc3b4da886f33f881a145c3836ec43149878b86a1a", ] } diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/README.md b/terraform-unity/modules/terraform-unity-sps-airflow/README.md index d9bb68a3..38a59098 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/README.md +++ b/terraform-unity/modules/terraform-unity-sps-airflow/README.md @@ -16,7 +16,7 @@ | Name | Version | |------|---------| -| [aws](#provider\_aws) | 5.36.0 | +| [aws](#provider\_aws) | 5.35.0 | | [helm](#provider\_helm) | 2.12.1 | | [kubernetes](#provider\_kubernetes) | 2.25.2 | | [null](#provider\_null) | 3.2.2 | @@ -32,6 +32,9 @@ No modules. |------|------| | [aws_db_instance.airflow_db](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/db_instance) | resource | | [aws_db_subnet_group.airflow_db](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/db_subnet_group) | resource | +| [aws_iam_policy.airflow_worker_policy](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/iam_policy) | resource | +| [aws_iam_role.airflow_worker_role](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.airflow_worker_policy_attachment](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/iam_role_policy_attachment) | resource | | [aws_s3_bucket.airflow_logs](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/s3_bucket) | resource | | [aws_secretsmanager_secret.airflow_db](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/secretsmanager_secret) | resource | | [aws_secretsmanager_secret_version.airflow_db](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/resources/secretsmanager_secret_version) | resource | @@ -54,9 +57,9 @@ No modules. | [random_id.airflow_webserver_secret](https://registry.terraform.io/providers/hashicorp/random/3.6.0/docs/resources/id) | resource | | [random_id.counter](https://registry.terraform.io/providers/hashicorp/random/3.6.0/docs/resources/id) | resource | | [random_password.airflow_db](https://registry.terraform.io/providers/hashicorp/random/3.6.0/docs/resources/password) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/caller_identity) | data source | | [aws_eks_cluster.cluster](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/eks_cluster) | data source | | [aws_eks_cluster_auth.cluster](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/eks_cluster_auth) | data source | -| [aws_eks_node_group.default](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/eks_node_group) | data source | | [aws_security_group.default](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/security_group) | data source | | [aws_ssm_parameter.subnet_ids](https://registry.terraform.io/providers/hashicorp/aws/5.35.0/docs/data-sources/ssm_parameter) | data source | | [kubernetes_ingress_v1.airflow_ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/2.25.2/docs/data-sources/ingress_v1) | data source | diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf index 668217fd..7fd5c83f 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/data.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/data.tf @@ -1,3 +1,5 @@ +data "aws_caller_identity" "current" {} + data "aws_eks_cluster" "cluster" { name = var.eks_cluster_name } @@ -23,8 +25,3 @@ data "kubernetes_ingress_v1" "ogc_processes_api_ingress" { namespace = kubernetes_namespace.airflow.metadata[0].name } } - -data "aws_eks_node_group" "default" { - cluster_name = var.eks_cluster_name - node_group_name = "defaultGroup" -} diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf b/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf index 9ea2d0f3..b586cef3 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/locals.tf @@ -13,4 +13,5 @@ locals { mission = var.project Stack = "" } + oidc_provider_url = replace(data.aws_eks_cluster.cluster.identity[0].oidc[0].issuer, "https://", "") } diff --git a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf index d9022fba..4ed52485 100644 --- a/terraform-unity/modules/terraform-unity-sps-airflow/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-airflow/main.tf @@ -214,6 +214,66 @@ resource "aws_s3_bucket" "airflow_logs" { }) } +resource "aws_iam_policy" "airflow_worker_policy" { + name = "${var.project}-${var.venue}-${var.service_area}-AirflowWorkerPolicy-${local.counter}" + description = "Policy for Airflow Workers to access AWS services" + policy = jsonencode( + { + "Version" : "2012-10-17", + "Statement" : [ + { + "Effect" : "Allow", + "Action" : [ + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:CreateLogGroup", + "s3:ListBucket", + "s3:GetObject", + "s3:PutObject", + "sqs:SendMessage", + "sqs:ReceiveMessage", + "sns:Publish", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "secretsmanager:GetSecretValue", + "ssm:GetParameters" + ], + "Resource" : "*" + } + ] + } + ) +} + +resource "aws_iam_role" "airflow_worker_role" { + name = "${var.project}-${var.venue}-${var.service_area}-AirflowWorker-${local.counter}" + assume_role_policy = jsonencode( + { + "Version" : "2012-10-17", + "Statement" : [ + { + "Effect" : "Allow", + "Principal" : { + "Federated" : "arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.oidc_provider_url}" + }, + "Action" : "sts:AssumeRoleWithWebIdentity", + "Condition" : { + "StringEquals" : { + "${local.oidc_provider_url}:sub" : "system:serviceaccount:${kubernetes_namespace.airflow.metadata[0].name}:airflow-worker" + } + } + } + ] + } + ) + permissions_boundary = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/mcp-tenantOperator-AMI-APIG" +} + +resource "aws_iam_role_policy_attachment" "airflow_worker_policy_attachment" { + role = aws_iam_role.airflow_worker_role.name + policy_arn = aws_iam_policy.airflow_worker_policy.arn +} + resource "helm_release" "airflow" { name = "airflow" repository = var.helm_charts.airflow.repository @@ -228,7 +288,7 @@ resource "helm_release" "airflow" { metadata_secret_name = "airflow-metadata-secret" webserver_secret_name = "airflow-webserver-secret" airflow_logs_s3_location = "s3://${aws_s3_bucket.airflow_logs.id}" - airflow_worker_role_arn = data.aws_eks_node_group.default.node_role_arn + airflow_worker_role_arn = aws_iam_role.airflow_worker_role.arn }) ] set_sensitive {
"defaultGroup": {
"desired_size": 1,
"instance_types": [
"m5.xlarge"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}