Skip to content

Commit

Permalink
Other Azure Pipelines configuration files
Browse files Browse the repository at this point in the history
  • Loading branch information
jcamachor committed Jan 31, 2024
1 parent ea58e24 commit eda36aa
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Description: Connections Configuration
---
version: 1
connections:
- id: spark_0
driver: org.apache.hive.jdbc.HiveDriver
url: jdbc:hive2://${SPARK_MASTER_HOST}:10000
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Description: Experiment Configuration
---
version: 1
id: "${EXP_NAME}"
repetitions: 1
# Metadata accepts any key-value that we want to register together with the experiment run.
metadata:
system: spark
system_version: 3.3.1
table_format: delta
table_format_version: 2.2.0
scale_factor: "${EXP_SCALE_FACTOR}"
mode: cow
machine: "${EXP_MACHINE}"
cluster_size: "${EXP_CLUSTER_SIZE}"
# The following parameter values will be used to replace the variables in the workload statements.
parameter_values:
external_catalog: spark_catalog
external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
external_table_format: csv
external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
external_options_suffix: ',header="true"'
external_tblproperties_suffix: ''
catalog: spark_catalog
database: "${EXP_NAME}"
table_format: delta
data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/delta/sf_${EXP_SCALE_FACTOR}/'
options_suffix: ''
tblproperties_suffix: ''
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Description: Experiment Configuration
---
version: 1
id: "${EXP_NAME}"
repetitions: 1
# Metadata accepts any key-value that we want to register together with the experiment run.
metadata:
system: spark
system_version: 3.3.1
table_format: hudi
table_format_version: 0.12.2
scale_factor: "${EXP_SCALE_FACTOR}"
mode: cow
machine: "${EXP_MACHINE}"
cluster_size: "${EXP_CLUSTER_SIZE}"
# The following parameter values will be used to replace the variables in the workload statements.
parameter_values:
external_catalog: spark_catalog
external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
external_table_format: csv
external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
external_options_suffix: ',header="true"'
external_tblproperties_suffix: ''
catalog: spark_catalog
database: "${EXP_NAME}"
table_format: hudi
data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/'
options_suffix: ''
tblproperties_suffix: ', "type"="cow"'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Description: Experiment Configuration
---
version: 1
id: "${EXP_NAME}"
repetitions: 1
# Metadata accepts any key-value that we want to register together with the experiment run.
metadata:
system: spark
system_version: 3.3.1
table_format: iceberg
table_format_version: 1.1.0
scale_factor: "${EXP_SCALE_FACTOR}"
mode: cow
machine: "${EXP_MACHINE}"
cluster_size: "${EXP_CLUSTER_SIZE}"
# The following parameter values will be used to replace the variables in the workload statements.
parameter_values:
external_catalog: spark_catalog
external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
external_table_format: csv
external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
external_options_suffix: ',header="true"'
external_tblproperties_suffix: ''
catalog: spark_catalog
database: "${EXP_NAME}"
table_format: iceberg
data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/'
options_suffix: ''
tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="copy-on-write", "write.update.mode"="copy-on-write", "write.merge.mode"="copy-on-write"'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Description: Experiment Configuration
---
version: 1
id: "${EXP_NAME}"
repetitions: 1
# Metadata accepts any key-value that we want to register together with the experiment run.
metadata:
system: spark
system_version: 3.3.1
table_format: hudi
table_format_version: 0.12.2
scale_factor: "${EXP_SCALE_FACTOR}"
mode: mor
machine: "${EXP_MACHINE}"
cluster_size: "${EXP_CLUSTER_SIZE}"
# The following parameter values will be used to replace the variables in the workload statements.
parameter_values:
external_catalog: spark_catalog
external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
external_table_format: csv
external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
external_options_suffix: ',header="true"'
external_tblproperties_suffix: ''
catalog: spark_catalog
database: "${EXP_NAME}"
table_format: hudi
data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/'
options_suffix: ''
tblproperties_suffix: ', "type"="mor"'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Description: Experiment Configuration
---
version: 1
id: "${EXP_NAME}"
repetitions: 1
# Metadata accepts any key-value that we want to register together with the experiment run.
metadata:
system: spark
system_version: 3.3.1
table_format: iceberg
table_format_version: 1.1.0
scale_factor: "${EXP_SCALE_FACTOR}"
mode: mor
machine: "${EXP_MACHINE}"
cluster_size: "${EXP_CLUSTER_SIZE}"
# The following parameter values will be used to replace the variables in the workload statements.
parameter_values:
external_catalog: spark_catalog
external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
external_table_format: csv
external_data_path: "abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
external_options_suffix: ',header="true"'
external_tblproperties_suffix: ''
catalog: spark_catalog
database: "${EXP_NAME}"
table_format: iceberg
data_path: 'abfss://azure-pipelines@lstbenchdatasets.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/'
options_suffix: ''
tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="merge-on-read", "write.update.mode"="merge-on-read", "write.merge.mode"="merge-on-read"'
13 changes: 13 additions & 0 deletions run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Description: Telemetry Configuration
---
version: 1
connection:
id: duckdb_0
driver: org.duckdb.DuckDBDriver
url: jdbc:duckdb:./telemetry-spark-3.3.1
execute_ddl: true
ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql'
insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql'
# The following parameter values will be used to replace the variables in the logging statements.
parameter_values:
data_path: ''

0 comments on commit eda36aa

Please sign in to comment.