Skip to content

Commit

Permalink
Update Azure Pipeline config file for Spark
Browse files Browse the repository at this point in the history
  • Loading branch information
jcamachor committed Jan 31, 2024
1 parent 703146b commit 3a37eb8
Showing 1 changed file with 88 additions and 51 deletions.
139 changes: 88 additions & 51 deletions run/spark-3.3.1/azure-pipelines/run-lst-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,29 @@ trigger: none
variables:
MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
MAVEN_OPTS: '-ntp -B -Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
EXP_SCALE_FACTOR: 100
EXP_MACHINE: 'Standard_E8s_v5'
EXP_CLUSTER_SIZE: 4

parameters:
- name: lsts
type: object
default:
- "delta-2.2.0"
- "iceberg-1.1.0"
- "hudi-0.12.2"
- table_format: "delta"
version: "2.2.0"
mode: "cow"
- table_format: "iceberg"
version: "1.1.0"
mode: "cow"
- table_format: "iceberg"
version: "1.1.0"
mode: "mor"
- table_format: "hudi"
version: "0.12.2"
mode: "cow"
- table_format: "hudi"
version: "0.12.2"
mode: "mor"

stages:
# Build LST-Bench and create artifact to deploy to target VM
Expand Down Expand Up @@ -56,25 +71,27 @@ stages:
- task: PublishPipelineArtifact@1
inputs:
targetPath: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/'
artifact: drop
artifact: lst-bench-0.1-SNAPSHOT

# Set up engine and deploy LST-Bench
- stage: deploy
jobs:
- deployment: EngineDeploy
displayName: 'Deploying engine'
workspace:
clean: all
environment:
name: 'lst-bench-periodic-reporting'
name: 'lst-bench-github'
resourceType: VirtualMachine
tags: 'head'
resourceName: 'lst-bench-head'
strategy:
runOnce:
deploy:
steps:
- bash: |
echo 'Deploy engine'
mkdir -p ~/spark-3.3.1
cp $(Pipeline.Workspace)/drop/.azure-pipelines/scripts/spark-3.3.1/* ~/spark-3.3.1/
cp $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/run/spark-3.3.1/azure-pipelines/sh/* ~/spark-3.3.1/
cd ~/spark-3.3.1
chmod +x ./*
my_ip=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
Expand All @@ -84,87 +101,107 @@ stages:
./dist-exec.sh spark-3.3.1 init.sh ${my_ip} "$(lstbenchdatasets_shared_key)"
- deployment: ClientDeploy
displayName: 'Deploying LST-Bench client'
workspace:
clean: all
environment:
name: 'lst-bench-periodic-reporting'
name: 'lst-bench-github'
resourceType: VirtualMachine
tags: 'client'
resourceName: 'lst-bench-client'
strategy:
runOnce:
deploy:
steps:
- bash: |
echo 'Deploy LST-Bench client'
mkdir -p ~/lst-bench-0.1
cp -rf $(Pipeline.Workspace)/drop/* ~/lst-bench-0.1/
chmod +x ~/lst-bench-0.1/launcher.sh
sudo apt install -y openjdk-11-jdk
mkdir -p ~/lst-bench-0.1-SNAPSHOT
cp -rf $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/* ~/lst-bench-0.1-SNAPSHOT/
chmod +x ~/lst-bench-0.1-SNAPSHOT/launcher.sh
# Run LST-Bench
- stage: test
jobs:
- ${{ each lst in parameters.lsts }}:
- ${{ each lst in parameters.lsts }}:
- stage: test_${{ lst.mode }}_${{ lst.table_format }}
jobs:
- deployment: StartEngine
displayName: 'Additional Commands'
displayName: "Starting Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})"
environment:
name: 'lst-bench-periodic-reporting'
name: 'lst-bench-github'
resourceType: VirtualMachine
tags: 'head'
resourceName: 'lst-bench-head'
variables:
process.clean: false
strategy:
runOnce:
deploy:
steps:
- download: none
- bash: |
cd ~/spark-3.3.1
./$(lst).sh
./dist-exec.sh spark-3.3.1 $(lst).sh
./start-cluster.sh $(lst)
./${{ lst.table_format }}-${{ lst.version }}.sh
./dist-exec.sh spark-3.3.1 ${{ lst.table_format }}-${{ lst.version }}.sh
./stop-cluster.sh && ./start-cluster.sh ${{ lst.table_format }}
my_ip=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
echo "##vso[task.setvariable variable=spark_head_node;isOutput=true]${my_ip}"
name: engine_start_step
- deployment: RunExperiment
displayName: 'Additional Commands'
dependsOn: StartEngine
displayName: "Running Experiment (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})"
environment:
name: 'lst-bench-periodic-reporting'
name: 'lst-bench-github'
resourceType: VirtualMachine
tags: 'client'
resourceName: 'lst-bench-client'
variables:
spark_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.spark_head_node'] ]
timeoutInMinutes: 0
strategy:
runOnce:
deploy:
steps:
- download: none
- bash: |
cd ~/lst-bench-0.1
cd ~/lst-bench-0.1-SNAPSHOT
echo "WP1 - Longevity"
./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \
-e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \
-t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \
-l .azure-pipelines/scripts/spark-3.3.1/library.yaml \
-w .azure-pipelines/scripts/spark-3.3.1/wp1_longevity-$(lst).yaml
export EXP_NAME="wp1_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}"
./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \
-e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \
-t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \
-l run/spark-3.3.1/config/tpcds/library.yaml \
-w run/spark-3.3.1/config/tpcds/wp1_longevity-${{ lst.table_format }}-${{ lst.version }}.yaml
echo "WP2 - Resilience"
./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \
-e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \
-t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \
-l .azure-pipelines/scripts/spark-3.3.1/library.yaml \
-w .azure-pipelines/scripts/spark-3.3.1/wp2_resilience-$(lst).yaml
export EXP_NAME="wp2_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}"
./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \
-e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \
-t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \
-l run/spark-3.3.1/config/tpcds/library.yaml \
-w run/spark-3.3.1/config/tpcds/wp2_resilience-${{ lst.table_format }}-${{ lst.version }}.yaml
echo "WP3 - Read/Write Concurrency"
./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \
-e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \
-t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \
-l .azure-pipelines/scripts/spark-3.3.1/library.yaml \
-w .azure-pipelines/scripts/spark-3.3.1/wp3_rw_concurrency-$(lst).yaml
export EXP_NAME="wp3_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}"
./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \
-e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \
-t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \
-l run/spark-3.3.1/config/tpcds/library.yaml \
-w run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-${{ lst.table_format }}-${{ lst.version }}.yaml
echo "WP4 - Time Travel"
./launcher.sh -c .azure-pipelines/scripts/spark-3.3.1/connections_config.yaml \
-e .azure-pipelines/scripts/spark-3.3.1/experiment_config_spark_ib_cow_w3m_sf_1000.yaml \
-t .azure-pipelines/scripts/spark-3.3.1/telemetry_config.yaml \
-l .azure-pipelines/scripts/spark-3.3.1/library.yaml \
-w .azure-pipelines/scripts/spark-3.3.1/wp3_time_travel-$(lst).yaml
export EXP_NAME="wp4_${{ lst.mode }}_${{ lst.table_format }}_sf_${EXP_SCALE_FACTOR}"
./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \
-e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \
-t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \
-l run/spark-3.3.1/config/tpcds/library.yaml \
-w run/spark-3.3.1/config/tpcds/wp4_time_travel-${{ lst.table_format }}-${{ lst.version }}.yaml
- deployment: StopEngine
displayName: 'Additional Commands'
dependsOn: RunExperiment
displayName: "Stopping Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})"
environment:
name: 'lst-bench-periodic-reporting'
name: 'lst-bench-github'
resourceType: VirtualMachine
tags: 'head'
resourceName: 'lst-bench-head'
strategy:
runOnce:
deploy:
steps:
- download: none
- bash: |
cd ~/spark-3.3.1
./stop-cluster.sh $(lst)
./cleanup-$(lst).sh
./dist-exec.sh spark-3.3.1 cleanup-$(lst).sh
./stop-cluster.sh
./cleanup-${{ lst.table_format }}-${{ lst.version }}.sh
./dist-exec.sh spark-3.3.1 cleanup-${{ lst.table_format }}-${{ lst.version }}.sh

0 comments on commit 3a37eb8

Please sign in to comment.