Other Azure Pipelines configuration files

microsoft · Jan 31, 2024 · eda36aa · eda36aa
1 parent ea58e24
commit eda36aa
Show file tree

Hide file tree

Showing 7 changed files with 165 additions and 0 deletions.
diff --git a/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml b/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml
@@ -0,0 +1,7 @@
+# Description: Connections Configuration
+---
+version: 1
+connections:
+- id: spark_0
+  driver: org.apache.hive.jdbc.HiveDriver
+  url: jdbc:hive2://${SPARK_MASTER_HOST}:10000
diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-delta-2.2.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-delta-2.2.0.yaml
@@ -0,0 +1,29 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+  system: spark
+  system_version: 3.3.1
+  table_format: delta
+  table_format_version: 2.2.0
+  scale_factor: "${EXP_SCALE_FACTOR}"
+  mode: cow
+  machine: "${EXP_MACHINE}"
+  cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+  external_catalog: spark_catalog
+  external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+  external_table_format: csv
+  external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+  external_options_suffix: ',header="true"'
+  external_tblproperties_suffix: ''
+  catalog: spark_catalog
+  database: "${EXP_NAME}"
+  table_format: delta
+  data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/delta/sf_${EXP_SCALE_FACTOR}/'
+  options_suffix: ''
+  tblproperties_suffix: ''
diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-hudi-0.12.2.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-hudi-0.12.2.yaml
@@ -0,0 +1,29 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+  system: spark
+  system_version: 3.3.1
+  table_format: hudi
+  table_format_version: 0.12.2
+  scale_factor: "${EXP_SCALE_FACTOR}"
+  mode: cow
+  machine: "${EXP_MACHINE}"
+  cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+  external_catalog: spark_catalog
+  external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+  external_table_format: csv
+  external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+  external_options_suffix: ',header="true"'
+  external_tblproperties_suffix: ''
+  catalog: spark_catalog
+  database: "${EXP_NAME}"
+  table_format: hudi
+  data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/'
+  options_suffix: ''
+  tblproperties_suffix: ', "type"="cow"'
diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-iceberg-1.1.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-iceberg-1.1.0.yaml
@@ -0,0 +1,29 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+  system: spark
+  system_version: 3.3.1
+  table_format: iceberg
+  table_format_version: 1.1.0
+  scale_factor: "${EXP_SCALE_FACTOR}"
+  mode: cow
+  machine: "${EXP_MACHINE}"
+  cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+  external_catalog: spark_catalog
+  external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+  external_table_format: csv
+  external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+  external_options_suffix: ',header="true"'
+  external_tblproperties_suffix: ''
+  catalog: spark_catalog
+  database: "${EXP_NAME}"
+  table_format: iceberg
+  data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/'
+  options_suffix: ''
+  tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="copy-on-write", "write.update.mode"="copy-on-write", "write.merge.mode"="copy-on-write"'
diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-hudi-0.12.2.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-hudi-0.12.2.yaml
@@ -0,0 +1,29 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+  system: spark
+  system_version: 3.3.1
+  table_format: hudi
+  table_format_version: 0.12.2
+  scale_factor: "${EXP_SCALE_FACTOR}"
+  mode: mor
+  machine: "${EXP_MACHINE}"
+  cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+  external_catalog: spark_catalog
+  external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+  external_table_format: csv
+  external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+  external_options_suffix: ',header="true"'
+  external_tblproperties_suffix: ''
+  catalog: spark_catalog
+  database: "${EXP_NAME}"
+  table_format: hudi
+  data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/'
+  options_suffix: ''
+  tblproperties_suffix: ', "type"="mor"'
diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-iceberg-1.1.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-iceberg-1.1.0.yaml
@@ -0,0 +1,29 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+  system: spark
+  system_version: 3.3.1
+  table_format: iceberg
+  table_format_version: 1.1.0
+  scale_factor: "${EXP_SCALE_FACTOR}"
+  mode: mor
+  machine: "${EXP_MACHINE}"
+  cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+  external_catalog: spark_catalog
+  external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+  external_table_format: csv
+  external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+  external_options_suffix: ',header="true"'
+  external_tblproperties_suffix: ''
+  catalog: spark_catalog
+  database: "${EXP_NAME}"
+  table_format: iceberg
+  data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/'
+  options_suffix: ''
+  tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="merge-on-read", "write.update.mode"="merge-on-read", "write.merge.mode"="merge-on-read"'
diff --git a/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml b/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml
@@ -0,0 +1,13 @@
+# Description: Telemetry Configuration
+---
+version: 1
+connection:
+  id: duckdb_0
+  driver: org.duckdb.DuckDBDriver
+  url: jdbc:duckdb:./telemetry-spark-3.3.1
+execute_ddl: true
+ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql'
+insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql'
+# The following parameter values will be used to replace the variables in the logging statements.
+parameter_values:
+  data_path: ''