From 24e64dbf55b582dcc274ecca99a027141170b31d Mon Sep 17 00:00:00 2001 From: Jesus Camacho Rodriguez Date: Tue, 30 Jan 2024 15:11:29 -0800 Subject: [PATCH] Update Azure Pipeline config file for Spark --- .../azure-pipelines/run-lst-bench.yml | 127 +++++++++++------- 1 file changed, 81 insertions(+), 46 deletions(-) diff --git a/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml b/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml index 7d3b3da0..cf443b06 100644 --- a/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml +++ b/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml @@ -15,14 +15,29 @@ trigger: none variables: MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository MAVEN_OPTS: '-ntp -B -Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' + EXP_SCALE_FACTOR: 100 + EXP_MACHINE: 'Standard_E8s_v5' + EXP_CLUSTER_SIZE: 4 parameters: - name: lsts type: object default: - - "delta-2.2.0" - - "iceberg-1.1.0" - - "hudi-0.12.2" + - table_format: "delta" + version: "2.2.0" + mode: "cow" + - table_format: "iceberg" + version: "1.1.0" + mode: "cow" + - table_format: "iceberg" + version: "1.1.0" + mode: "mor" + - table_format: "hudi" + version: "0.12.2" + mode: "cow" + - table_format: "hudi" + version: "0.12.2" + mode: "mor" stages: # Build LST-Bench and create artifact to deploy to target VM @@ -56,15 +71,17 @@ stages: - task: PublishPipelineArtifact@1 inputs: targetPath: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/' - artifact: drop + artifact: lst-bench-0.1-SNAPSHOT # Set up engine and deploy LST-Bench - stage: deploy jobs: - deployment: EngineDeploy displayName: 'Deploying engine' + workspace: + clean: all environment: - name: 'lst-bench-periodic-reporting' + name: 'lst-bench-github' resourceType: VirtualMachine tags: 'head' strategy: @@ -74,7 +91,7 @@ stages: - bash: | echo 'Deploy engine' mkdir -p ~/spark-3.3.1 - cp $(Pipeline.Workspace)/drop/.azure-pipelines/scripts/spark-3.3.1/* ~/spark-3.3.1/ + cp $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/run/spark-3.3.1/azure-pipelines/sh/* ~/spark-3.3.1/ cd ~/spark-3.3.1 chmod +x ./* my_ip=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') @@ -84,8 +101,10 @@ stages: ./dist-exec.sh spark-3.3.1 init.sh ${my_ip} "$(lstbenchdatasets_shared_key)" - deployment: ClientDeploy displayName: 'Deploying LST-Bench client' + workspace: + clean: all environment: - name: 'lst-bench-periodic-reporting' + name: 'lst-bench-github' resourceType: VirtualMachine tags: 'client' strategy: @@ -94,77 +113,93 @@ stages: steps: - bash: | echo 'Deploy LST-Bench client' - mkdir -p ~/lst-bench-0.1 - cp -rf $(Pipeline.Workspace)/drop/* ~/lst-bench-0.1/ - chmod +x ~/lst-bench-0.1/launcher.sh + sudo apt install -y openjdk-11-jdk + mkdir -p ~/lst-bench-0.1-SNAPSHOT + cp -rf $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/* ~/lst-bench-0.1-SNAPSHOT/ + chmod +x ~/lst-bench-0.1-SNAPSHOT/launcher.sh # Run LST-Bench -- stage: test - jobs: - - ${{ each lst in parameters.lsts }}: +- ${{ each lst in parameters.lsts }}: + - stage: test_${{ lst.mode }}_${{ lst.table_format }} + jobs: - deployment: StartEngine - displayName: 'Additional Commands' + displayName: "Starting Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})" environment: - name: 'lst-bench-periodic-reporting' + name: 'lst-bench-github' resourceType: VirtualMachine tags: 'head' strategy: runOnce: deploy: steps: + - download: none - bash: | cd ~/spark-3.3.1 - ./$(lst).sh - ./dist-exec.sh spark-3.3.1 $(lst).sh - ./start-cluster.sh $(lst) + ./${{ lst.table_format }}-${{ lst.version }}.sh + ./dist-exec.sh spark-3.3.1 ${{ lst.table_format }}-${{ lst.version }}.sh + ./stop-cluster.sh && ./start-cluster.sh ${{ lst.table_format }} + my_ip=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + echo spark master host - ${my_ip} + echo "##vso[task.setvariable variable=spark_master_host;isoutput=true]${my_ip}" + name: engine_start_step - deployment: RunExperiment - displayName: 'Additional Commands' + dependsOn: StartEngine + displayName: "Running Experiment (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})" environment: - name: 'lst-bench-periodic-reporting' + name: 'lst-bench-github' resourceType: VirtualMachine tags: 'client' + variables: + SPARK_MASTER_HOST: $[ dependencies.StartEngine.outputs['engine_start_step.spark_master_host'] ] strategy: runOnce: deploy: steps: + - download: none - bash: | - cd ~/lst-bench-0.1 + cd ~/lst-bench-0.1-SNAPSHOT echo "WP1 - Longevity" - ./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \ - -e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \ - -t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \ - -l .azure-pipelines/scripts/spark-3.3.1/library.yaml \ - -w .azure-pipelines/scripts/spark-3.3.1/wp1_longevity-$(lst).yaml + export EXP_NAME="wp1_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}" + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/wp1_longevity-${{ lst.table_format }}-${{ lst.version }}.yaml echo "WP2 - Resilience" - ./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \ - -e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \ - -t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \ - -l .azure-pipelines/scripts/spark-3.3.1/library.yaml \ - -w .azure-pipelines/scripts/spark-3.3.1/wp2_resilience-$(lst).yaml + export EXP_NAME="wp2_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}" + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/wp2_resilience-${{ lst.table_format }}-${{ lst.version }}.yaml echo "WP3 - Read/Write Concurrency" - ./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \ - -e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \ - -t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \ - -l .azure-pipelines/scripts/spark-3.3.1/library.yaml \ - -w .azure-pipelines/scripts/spark-3.3.1/wp3_rw_concurrency-$(lst).yaml + export EXP_NAME="wp3_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}" + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-${{ lst.table_format }}-${{ lst.version }}.yaml echo "WP4 - Time Travel" - ./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \ - -e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \ - -t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \ - -l .azure-pipelines/scripts/spark-3.3.1/library.yaml \ - -w .azure-pipelines/scripts/spark-3.3.1/wp3_time_travel-$(lst).yaml + export EXP_NAME="wp4_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}" + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/wp4_time_travel-${{ lst.table_format }}-${{ lst.version }}.yaml - deployment: StopEngine - displayName: 'Additional Commands' + dependsOn: RunExperiment + displayName: "Stopping Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})" environment: - name: 'lst-bench-periodic-reporting' + name: 'lst-bench-github' resourceType: VirtualMachine tags: 'head' strategy: runOnce: deploy: steps: + - download: none - bash: | cd ~/spark-3.3.1 - ./stop-cluster.sh $(lst) - ./cleanup-$(lst).sh - ./dist-exec.sh spark-3.3.1 cleanup-$(lst).sh + ./stop-cluster.sh + ./cleanup-${{ lst.table_format }}-${{ lst.version }}.sh + ./dist-exec.sh spark-3.3.1 cleanup-${{ lst.table_format }}-${{ lst.version }}.sh