diff --git a/run/README.md b/run/README.md
index 14e74f6c..aa45d28c 100644
--- a/run/README.md
+++ b/run/README.md
@@ -24,9 +24,9 @@ This folder contains configurations for running LST-Bench on various systems as
- [x] Delta Lake 2.2.0
- [x] Apache Hudi 0.12.2
- [x] Apache Iceberg 1.1.0
-- [ ] Trino 420
- - [ ] Delta Lake
- - [ ] Apache Iceberg
+- [x] Trino 420
+ - [x] Delta Lake
+ - [x] Apache Iceberg
## Folder Structure
While the folder for each engine may have a slightly different structure, they generally contain the following:
diff --git a/run/spark-3.3.1/azure-pipelines/README.md b/run/spark-3.3.1/azure-pipelines/README.md
index 4488a12e..6e5e17de 100644
--- a/run/spark-3.3.1/azure-pipelines/README.md
+++ b/run/spark-3.3.1/azure-pipelines/README.md
@@ -32,10 +32,11 @@ This directory comprises the necessary tooling for executing LST-Bench on Apache
- A VMSS cluster, that will serve as the Spark worker nodes, within the same VNet as the head node.
- An Azure Storage Account accessible by both the VMSS and head node.
- An Azure SQL Database (or SQL Server flavored RDBMS) that will be running Hive Metastore.
- The Hive Metastore schema for version 2.3.0 should already be installed in the instance.
+ The Hive Metastore schema for version 2.3.9 should already be installed in the instance.
- Prior to running the pipeline, several variables need definition in your Azure Pipeline:
- `data_storage_account`: Name of the Azure Blob Storage account where the source data for the experiment is stored.
- `data_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the source data for the experiment is stored.
+ - `data_storage_account_container`: Name of the container in the Azure Blob Storage account where the source data for the experiment is stored.
- `hms_jdbc_driver`: JDBC driver for the Hive Metastore.
- `hms_jdbc_url`: JDBC URL for the Hive Metastore.
- `hms_jdbc_user`: Username for the Hive Metastore.
diff --git a/run/spark-3.3.1/azure-pipelines/sh/hms.sh b/run/spark-3.3.1/azure-pipelines/sh/hms.sh
index 4d78cbff..531b57a0 100755
--- a/run/spark-3.3.1/azure-pipelines/sh/hms.sh
+++ b/run/spark-3.3.1/azure-pipelines/sh/hms.sh
@@ -5,6 +5,10 @@ if [ "$#" -ne 7 ]; then
fi
source env.sh
+if [ -z "${USER}" ]; then
+ echo "ERROR: USER is not defined."
+ exit 1
+fi
if [ -z "${HADOOP_HOME}" ]; then
echo "ERROR: HADOOP_HOME is not defined."
exit 1
diff --git a/run/trino-420/azure-pipelines/README.md b/run/trino-420/azure-pipelines/README.md
new file mode 100644
index 00000000..17330460
--- /dev/null
+++ b/run/trino-420/azure-pipelines/README.md
@@ -0,0 +1,56 @@
+
+
+# Azure Pipelines Deployment for LST-Bench on Trino 420
+This directory comprises the necessary tooling for executing LST-Bench on Trino 420 with different LSTs using Azure Pipelines. The included tooling consists of:
+- `run-lst-bench.yml`:
+ An Azure Pipelines script designed to deploy Trino and execute LST-Bench.
+- `sh/`:
+ A directory containing shell scripts and engine configuration files supporting the deployment of Trino and the execution of experiments.
+- `config/`:
+ A directory with LST-Bench configuration files necessary for executing the experiments that are part of the results.
+
+## Prerequisites
+- Automation for deploying the infrastructure in Azure to run LST-Bench is not implemented. As a result, the Azure Pipeline script expects the following setup:
+ - A VM named 'lst-bench-client' connected to the pipeline environment to run the LST-Bench client.
+ - A VM named 'lst-bench-head' to run the coordinator node of the Trino cluster, also connected to the pipeline environment.
+ - A VMSS cluster, that will serve as the Trino worker nodes, within the same VNet as the coordinator node.
+ - An Azure Storage Account accessible by both the VMSS and coordinator node.
+ - An Azure SQL Database (or SQL Server flavored RDBMS) that will be running Hive Metastore.
+ The Hive Metastore schema for version 2.3.9 should already be installed in the instance.
+- Prior to running the pipeline, several variables need definition in your Azure Pipeline:
+ - `data_storage_account`: Name of the Azure Blob Storage account where the source data for the experiment is stored.
+ - `data_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the source data for the experiment is stored.
+ - `data_storage_account_container`: Name of the container in the Azure Blob Storage account where the source data for the experiment is stored.
+ - `hms_jdbc_driver`: JDBC driver for the Hive Metastore.
+ - `hms_jdbc_url`: JDBC URL for the Hive Metastore.
+ - `hms_jdbc_user`: Username for the Hive Metastore.
+ - `hms_jdbc_password` (secret): Password for the Hive Metastore.
+ - `hms_storage_account`: Name of the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog (can be the same as the data_storage_account).
+ - `hms_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog.
+ - `hms_storage_account_container`: Name of the container in the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog.
+- The LSTs to run experiments on can be modified via input parameters for the pipelines in the Azure Pipelines YAML file or from the Web UI.
+ Default values are assigned to these parameters.
+ Parameters also include experiment scale factor, machine type, and cluster size.
+ Note that these parameters are not used to deploy the data or the infrastructure, as this process is not automated in the pipeline.
+ Instead, they are recorded in the experiment telemetry for proper categorization and visualization of results later on.
+
+## Additional Notes
+For workloads within LST-Bench that include an `optimize` step, particularly those involving partitioned tables, a [custom task](/docs/workloads.md#custom-tasks) is used to execute this step.
+The task divides the `optimize` operation into batches, each containing up to 100 partitions (the parameter value is configurable).
+This approach was implemented to address issues where Trino would crash if the optimization step were applied to the entire table.
diff --git a/run/trino-420/azure-pipelines/config/connections_config.yaml b/run/trino-420/azure-pipelines/config/connections_config.yaml
new file mode 100644
index 00000000..e73d8451
--- /dev/null
+++ b/run/trino-420/azure-pipelines/config/connections_config.yaml
@@ -0,0 +1,9 @@
+# Description: Connections Configuration
+---
+version: 1
+connections:
+- id: trino_0
+ driver: io.trino.jdbc.TrinoDriver
+ url: jdbc:trino://${TRINO_MASTER_HOST}:8080
+ username: admin
+ password: ''
diff --git a/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml b/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml
new file mode 100644
index 00000000..ef8dbd07
--- /dev/null
+++ b/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml
@@ -0,0 +1,30 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+ system: trino
+ system_version: 420
+ table_format: delta
+ table_format_version: undefined
+ scale_factor: "${EXP_SCALE_FACTOR}"
+ mode: cow
+ machine: "${EXP_MACHINE}"
+ cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+ external_catalog: hive
+ external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+ external_table_format: textfile
+ external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+ external_options_suffix: ''
+ external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1"
+ catalog: delta
+ database: "delta_${EXP_NAME}"
+ table_format: delta
+ data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/delta/sf_${EXP_SCALE_FACTOR}/'
+ options_suffix: ''
+ tblproperties_suffix: ''
+ partition_spec_keyword: 'partitioned_by'
diff --git a/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml b/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml
new file mode 100644
index 00000000..502f7fa8
--- /dev/null
+++ b/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml
@@ -0,0 +1,30 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: "${EXP_NAME}"
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+ system: trino
+ system_version: 420
+ table_format: iceberg
+ table_format_version: undefined
+ scale_factor: "${EXP_SCALE_FACTOR}"
+ mode: mor
+ machine: "${EXP_MACHINE}"
+ cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+ external_catalog: hive
+ external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+ external_table_format: textfile
+ external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+ external_options_suffix: ''
+ external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1"
+ catalog: iceberg
+ database: "iceberg_${EXP_NAME}"
+ table_format: iceberg
+ data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/'
+ options_suffix: ''
+ tblproperties_suffix: ''
+ partition_spec_keyword: 'partitioning'
diff --git a/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml b/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml
new file mode 100644
index 00000000..b164151b
--- /dev/null
+++ b/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml
@@ -0,0 +1,20 @@
+# Description: Experiment Configuration
+---
+version: 1
+id: setup_experiment
+repetitions: 1
+# Metadata accepts any key-value that we want to register together with the experiment run.
+metadata:
+ system: trino
+ system_version: 420
+ scale_factor: "${EXP_SCALE_FACTOR}"
+ machine: "${EXP_MACHINE}"
+ cluster_size: "${EXP_CLUSTER_SIZE}"
+# The following parameter values will be used to replace the variables in the workload statements.
+parameter_values:
+ external_catalog: hive
+ external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}"
+ external_table_format: textfile
+ external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/"
+ external_options_suffix: ''
+ external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1"
diff --git a/run/trino-420/azure-pipelines/config/telemetry_config.yaml b/run/trino-420/azure-pipelines/config/telemetry_config.yaml
new file mode 100644
index 00000000..baa9e63c
--- /dev/null
+++ b/run/trino-420/azure-pipelines/config/telemetry_config.yaml
@@ -0,0 +1,13 @@
+# Description: Telemetry Configuration
+---
+version: 1
+connection:
+ id: duckdb_0
+ driver: org.duckdb.DuckDBDriver
+ url: jdbc:duckdb:./telemetry-trino-420
+execute_ddl: true
+ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql'
+insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql'
+# The following parameter values will be used to replace the variables in the logging statements.
+parameter_values:
+ data_path: ''
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/run-lst-bench.yml b/run/trino-420/azure-pipelines/run-lst-bench.yml
new file mode 100644
index 00000000..6e5e7871
--- /dev/null
+++ b/run/trino-420/azure-pipelines/run-lst-bench.yml
@@ -0,0 +1,249 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+trigger: none
+
+parameters:
+- name: lsts
+ type: object
+ default:
+ - table_format: "delta"
+ mode: "cow"
+ - table_format: "iceberg"
+ mode: "mor"
+- name: workloads
+ type: object
+ default:
+ - "wp1_longevity"
+ - "wp2_resilience"
+ - "wp3_rw_concurrency"
+- name: exp_scale_factor
+ type: number
+ default: 100
+- name: exp_machine
+ type: string
+ default: "Standard_E8s_v5"
+- name: exp_cluster_size
+ type: number
+ default: 8
+
+variables:
+ MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
+ MAVEN_OPTS: '-ntp -B -Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
+ EXP_SCALE_FACTOR: ${{ parameters.exp_scale_factor }}
+ EXP_MACHINE: ${{ parameters.exp_machine }}
+ EXP_CLUSTER_SIZE: ${{ parameters.exp_cluster_size }}
+
+stages:
+# Build LST-Bench and create artifact to deploy to target VM
+- stage: build
+ jobs:
+ - job: Build
+ pool:
+ vmImage: 'ubuntu-latest'
+ steps:
+ - task: Cache@2
+ displayName: Cache Maven local repo
+ inputs:
+ key: 'maven | "$(Agent.OS)" | **/pom.xml'
+ restoreKeys: |
+ maven | "$(Agent.OS)"
+ maven
+ path: $(MAVEN_CACHE_FOLDER)
+ - task: Maven@4
+ inputs:
+ mavenPomFile: 'pom.xml'
+ options: $(MAVEN_OPTS)
+ javaHomeOption: 'JDKVersion'
+ jdkVersionOption: '1.11'
+ publishJUnitResults: false
+ goals: 'package -DskipTests -Ptrino-jdbc'
+ - task: CopyFiles@2
+ displayName: 'Copy Artifacts to: $(TargetFolder)'
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)'
+ TargetFolder: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/'
+ - task: PublishPipelineArtifact@1
+ inputs:
+ targetPath: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/'
+ artifact: lst-bench-0.1-SNAPSHOT
+
+# Set up engine and deploy LST-Bench
+- stage: deploy
+ jobs:
+ - deployment: EngineDeploy
+ displayName: 'Deploying engine'
+ workspace:
+ clean: all
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-head'
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - bash: |
+ echo 'Deploy engine'
+ mkdir -p ~/trino-420
+ cp $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/run/trino-420/azure-pipelines/sh/* ~/trino-420/
+ cd ~/trino-420
+ chmod +x ./*
+ trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
+ ./init.sh 'true' "${trino_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)"
+ ./hms.sh "$(hms_jdbc_driver)" "$(hms_jdbc_url)" "$(hms_jdbc_user)" "$(hms_jdbc_password)" "$(hms_storage_account)" "$(hms_storage_account_shared_key)" "$(hms_storage_account_container)"
+ ./dist-setup.sh
+ ./dist-exec.sh trino-420 init.sh 'false' "${trino_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)"
+ - deployment: ClientDeploy
+ displayName: 'Deploying LST-Bench client'
+ workspace:
+ clean: all
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-client'
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - bash: |
+ echo 'Deploy LST-Bench client'
+ sudo apt install -y openjdk-11-jdk
+ mkdir -p ~/lst-bench-0.1-SNAPSHOT
+ cp -rf $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/* ~/lst-bench-0.1-SNAPSHOT/
+ chmod +x ~/lst-bench-0.1-SNAPSHOT/launcher.sh
+
+# Run LST-Bench (setup external tables)
+- stage: setup_experiment
+ jobs:
+ - deployment: StartEngine
+ displayName: "Starting Engine"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-head'
+ variables:
+ process.clean: false
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/trino-420
+ ./stop-cluster.sh && ./start-cluster.sh
+ sleep 20
+ trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
+ echo "##vso[task.setvariable variable=trino_head_node;isOutput=true]${trino_head_node}"
+ name: engine_start_step
+ - deployment: RunSetupExperiment
+ dependsOn: StartEngine
+ displayName: "Setup Experiment"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-client'
+ variables:
+ trino_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.trino_head_node'] ]
+ timeoutInMinutes: 0
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/lst-bench-0.1-SNAPSHOT
+ ./launcher.sh -c run/trino-420/azure-pipelines/config/connections_config.yaml \
+ -e run/trino-420/azure-pipelines/config/setup_experiment_config.yaml \
+ -t run/trino-420/azure-pipelines/config/telemetry_config.yaml \
+ -l run/trino-420/config/tpcds/library.yaml \
+ -w run/trino-420/config/tpcds/setup_experiment.yaml
+ - deployment: StopEngine
+ dependsOn: RunSetupExperiment
+ displayName: "Stopping Engine"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-head'
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/trino-420
+ ./stop-cluster.sh
+
+# Run LST-Bench
+- ${{ each lst in parameters.lsts }}:
+ - ${{ each workload in parameters.workloads }}:
+ - stage: test_${{ lst.mode }}_${{ lst.table_format }}_${{ workload }}
+ jobs:
+ - deployment: StartEngine
+ displayName: "Starting Engine (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-head'
+ variables:
+ process.clean: false
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/trino-420
+ ./stop-cluster.sh && ./start-cluster.sh ${{ lst.table_format }}
+ sleep 20
+ trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
+ echo "##vso[task.setvariable variable=trino_head_node;isOutput=true]${trino_head_node}"
+ name: engine_start_step
+ - deployment: RunExperiment
+ dependsOn: StartEngine
+ displayName: "Running Experiment (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-client'
+ variables:
+ trino_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.trino_head_node'] ]
+ timeoutInMinutes: 0
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/lst-bench-0.1-SNAPSHOT
+ echo "${{ workload }}"
+ export EXP_NAME="${{ workload }}"
+ ./launcher.sh -c run/trino-420/azure-pipelines/config/connections_config.yaml \
+ -e run/trino-420/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}.yaml \
+ -t run/trino-420/azure-pipelines/config/telemetry_config.yaml \
+ -l run/trino-420/config/tpcds/library.yaml \
+ -w run/trino-420/config/tpcds/${{ workload }}.yaml
+ - deployment: StopEngine
+ dependsOn: RunExperiment
+ displayName: "Stopping Engine (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})"
+ environment:
+ name: 'lst-bench-github'
+ resourceType: VirtualMachine
+ resourceName: 'lst-bench-head'
+ strategy:
+ runOnce:
+ deploy:
+ steps:
+ - download: none
+ - bash: |
+ cd ~/trino-420
+ ./stop-cluster.sh
diff --git a/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template b/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template
new file mode 100644
index 00000000..a09f60f8
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template
@@ -0,0 +1,5 @@
+coordinator=true
+node-scheduler.include-coordinator=false
+http-server.http.port=8080
+discovery.uri=http://$TRINO_MASTER_HOST:8080
+query.max-memory=378GB
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/delta.properties.template b/run/trino-420/azure-pipelines/sh/delta.properties.template
new file mode 100644
index 00000000..efd7cef8
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/delta.properties.template
@@ -0,0 +1,6 @@
+connector.name=delta_lake
+hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083
+hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT}
+hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY}
+delta.max-partitions-per-writer=2500
+delta.compression-codec=GZIP
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/dist-exec.sh b/run/trino-420/azure-pipelines/sh/dist-exec.sh
new file mode 100755
index 00000000..bd7c3ca6
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/dist-exec.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -e
+source env.sh
+if [ -z "${HOSTS}" ]; then
+ echo "ERROR: HOSTS is not defined."
+ exit 1
+fi
+
+if [ "$#" -lt 2 ]; then
+ echo "Error: Please provide at least two input parameters."
+ exit 1
+fi
+deploy_dir=$1
+script_file=$2
+
+for node in $HOSTS ; do ssh -t $node "mkdir -p ~/$deploy_dir" ; done
+for node in $HOSTS ; do scp *.template $node:~/$deploy_dir ; done
+for node in $HOSTS ; do scp $script_file $node:~/$deploy_dir ; done
+for node in $HOSTS ; do ssh -t $node "cd ~/$deploy_dir && chmod +x ./$script_file && ./$script_file ${@:3}" ; done
diff --git a/run/trino-420/azure-pipelines/sh/dist-setup.sh b/run/trino-420/azure-pipelines/sh/dist-setup.sh
new file mode 100755
index 00000000..99edc490
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/dist-setup.sh
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+if [ -z "${HOME}" ]; then
+ echo "ERROR: HOME is not defined."
+ exit 1
+fi
+
+# Install packages
+sudo apt install -y net-tools nmap
+
+# Configure hosts
+my_ip=$(/sbin/ifconfig eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p')
+ip_range=${my_ip%.*}.*
+nmap -sn $ip_range | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | grep -v "^$my_ip$" > $HOME/hostiplist
+
+export HOSTS=$(<$HOME/hostiplist)
+
+for node in $HOSTS ; do scp ~/.ssh/id_rsa* $node:~/.ssh/ ; done
+
+# Push to environment
+echo "export HOSTS=\"${HOSTS}\"" >> env.sh
+echo "source $(pwd)/env.sh" >> ~/.bashrc
diff --git a/run/trino-420/azure-pipelines/sh/hive-site.xml.template b/run/trino-420/azure-pipelines/sh/hive-site.xml.template
new file mode 100644
index 00000000..0e79ed7b
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/hive-site.xml.template
@@ -0,0 +1,36 @@
+
+
+ javax.jdo.option.ConnectionURL
+ ${HMS_JDBC_URL}
+
+
+
+ javax.jdo.option.ConnectionDriverName
+ ${HMS_JDBC_DRIVER}
+
+
+
+ javax.jdo.option.ConnectionUserName
+ ${HMS_JDBC_USER}
+
+
+
+ javax.jdo.option.ConnectionPassword
+ ${HMS_JDBC_PASSWORD}
+
+
+
+ hive.metastore.warehouse.dir
+ abfss://${HMS_STORAGE_ACCOUNT_CONTAINER}@${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net/hive/warehouse
+
+
+
+ fs.azure.account.auth.type.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net
+ SharedKey
+
+
+
+ fs.azure.account.key.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net
+ ${HMS_STORAGE_ACCOUNT_SHARED_KEY}
+
+
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/hive.properties.template b/run/trino-420/azure-pipelines/sh/hive.properties.template
new file mode 100644
index 00000000..c052a1c8
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/hive.properties.template
@@ -0,0 +1,5 @@
+connector.name=hive
+hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083
+hive.allow-drop-table=true
+hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT}
+hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY}
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/hms.sh b/run/trino-420/azure-pipelines/sh/hms.sh
new file mode 100755
index 00000000..907c2bad
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/hms.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -e
+if [ "$#" -ne 7 ]; then
+ echo "Usage: $0 HMS_JDBC_DRIVER HMS_JDBC_URL HMS_JDBC_USER HMS_JDBC_PASSWORD HMS_STORAGE_ACCOUNT HMS_STORAGE_ACCOUNT_SHARED_KEY HMS_STORAGE_ACCOUNT_CONTAINER"
+ exit 1
+fi
+
+if [ -z "${USER}" ]; then
+ echo "ERROR: USER is not defined."
+ exit 1
+fi
+
+export HMS_JDBC_DRIVER=$1
+export HMS_JDBC_URL=$2
+export HMS_JDBC_USER=$3
+export HMS_JDBC_PASSWORD=$4
+export HMS_STORAGE_ACCOUNT=$5
+export HMS_STORAGE_ACCOUNT_SHARED_KEY=$6
+export HMS_STORAGE_ACCOUNT_CONTAINER=$7
+export HADOOP_HOME=/home/$USER/hadoop
+export HIVE_HOME=/home/$USER/hive
+
+# Install Hadoop
+rm -rf hadoop-3.3.1
+wget -nv -N https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
+tar -xzf hadoop-3.3.1.tar.gz
+ln -sf $(pwd)/hadoop-3.3.1 $HADOOP_HOME
+
+# Install Hive (needed for HMS)
+rm -rf apache-hive-2.3.9-bin
+wget -nv -N https://downloads.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz
+tar -xzf apache-hive-2.3.9-bin.tar.gz
+ln -sf $(pwd)/apache-hive-2.3.9-bin $HIVE_HOME
+
+# Configure HMS
+envsubst < "hive-site.xml.template" > "$HIVE_HOME/conf/hive-site.xml"
+
+# Copy Azure dependencies to Hive classpath
+cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-azure* $HIVE_HOME/lib/
+
+# Install MSSQL driver
+wget -nv -N https://repo1.maven.org/maven2/com/microsoft/sqlserver/mssql-jdbc/6.2.1.jre8/mssql-jdbc-6.2.1.jre8.jar
+ln -sf $(pwd)/mssql-jdbc-6.2.1.jre8.jar $HIVE_HOME/lib/mssql-jdbc.jar
+
+# Push to environment
+echo "export HADOOP_HOME=${HADOOP_HOME}
+export HIVE_HOME=${HIVE_HOME}" >> env.sh
+echo "source $(pwd)/env.sh" >> ~/.bashrc
diff --git a/run/trino-420/azure-pipelines/sh/iceberg.properties.template b/run/trino-420/azure-pipelines/sh/iceberg.properties.template
new file mode 100644
index 00000000..d29aa613
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/iceberg.properties.template
@@ -0,0 +1,7 @@
+connector.name=iceberg
+hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083
+hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT}
+hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY}
+iceberg.max-partitions-per-writer=2500
+iceberg.file-format=PARQUET
+iceberg.compression-codec=GZIP
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/init.sh b/run/trino-420/azure-pipelines/sh/init.sh
new file mode 100755
index 00000000..711cf653
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/init.sh
@@ -0,0 +1,66 @@
+#!/bin/bash -e
+if [ "$#" -ne 4 ]; then
+ echo "Usage: $0 IS_COORDINATOR TRINO_MASTER_HOST DATA_STORAGE_ACCOUNT DATA_STORAGE_ACCOUNT_SHARED_KEY"
+ exit 1
+fi
+
+if [ -z "${USER}" ]; then
+ echo "ERROR: USER is not defined."
+ exit 1
+fi
+
+export HOSTNAME=$(hostname)
+export IS_COORDINATOR=$1
+export TRINO_MASTER_HOST=$2
+export TRINO_HOME=/home/$USER/trino
+export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
+export DATA_STORAGE_ACCOUNT=$3
+export DATA_STORAGE_ACCOUNT_SHARED_KEY=$4
+
+# Update dependencies and install packages
+sudo apt update -y
+sudo apt install -y openjdk-17-jdk python wget
+
+# Install Trino
+rm -rf trino-server-420
+wget -nv -N https://repo1.maven.org/maven2/io/trino/trino-server/420/trino-server-420.tar.gz
+tar -xzf trino-server-420.tar.gz
+ln -sf $(pwd)/trino-server-420 $TRINO_HOME
+
+# Configure Trino
+sudo mkdir -p /mnt/local_resource/
+sudo mkdir -p /mnt/local_resource/trino_data/
+sudo chown $USER:$USER /mnt/local_resource/trino_data
+sudo mkdir -p /mnt/local_resource/trino_tmp/
+sudo chown $USER:$USER /mnt/local_resource/trino_tmp
+
+sudo mkdir ${TRINO_HOME}/etc
+sudo chown $USER:$USER ${TRINO_HOME}/etc/
+envsubst < "node.properties.template" > "$TRINO_HOME/etc/node.properties"
+envsubst < "jvm.config.template" > "$TRINO_HOME/etc/jvm.config"
+if [ "$IS_COORDINATOR" = true ]; then
+ envsubst < "coordinator-config.properties.template" > "$TRINO_HOME/etc/config.properties"
+elif [ "$IS_COORDINATOR" = false ]; then
+ envsubst < "worker-config.properties.template" > "$TRINO_HOME/etc/config.properties"
+else
+ echo "IS_COORDINATOR must be either 'true' or 'false'"
+ exit 1
+fi
+envsubst < "log.properties.template" > "$TRINO_HOME/etc/log.properties"
+
+# Configure Trino connectors
+sudo mkdir ${TRINO_HOME}/etc/catalog
+sudo chown $USER:$USER ${TRINO_HOME}/etc/catalog/
+envsubst < "hive.properties.template" > "$TRINO_HOME/etc/catalog/hive.properties"
+envsubst < "delta.properties.template" > "$TRINO_HOME/etc/catalog/delta.properties"
+envsubst < "iceberg.properties.template" > "$TRINO_HOME/etc/catalog/iceberg.properties"
+
+# Set Linux OS limits required for Trino
+echo "trino soft nofile 131072
+trino hard nofile 131072" | sudo tee -a /etc/security/limits.conf
+
+# Push to environment
+echo "export TRINO_HOME=${TRINO_HOME}
+export JAVA_HOME=${JAVA_HOME}
+export PATH=${PATH}:${TRINO_HOME}/bin" >> env.sh
+echo "source $(pwd)/env.sh" >> ~/.bashrc
diff --git a/run/trino-420/azure-pipelines/sh/jvm.config.template b/run/trino-420/azure-pipelines/sh/jvm.config.template
new file mode 100644
index 00000000..4a852a53
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/jvm.config.template
@@ -0,0 +1,18 @@
+-server
+-Xmx54G
+-XX:InitialRAMPercentage=80
+-XX:MaxRAMPercentage=80
+-XX:G1HeapRegionSize=32M
+-XX:+ExplicitGCInvokesConcurrent
+-XX:+ExitOnOutOfMemoryError
+-XX:+HeapDumpOnOutOfMemoryError
+-XX:-OmitStackTraceInFastThrow
+-XX:ReservedCodeCacheSize=512M
+-XX:PerMethodRecompilationCutoff=10000
+-XX:PerBytecodeRecompilationCutoff=10000
+-Djdk.attach.allowAttachSelf=true
+-Djdk.nio.maxCachedBufferSize=2000000
+-XX:+UnlockDiagnosticVMOptions
+-XX:+UseAESCTRIntrinsics
+# Disable Preventive GC for performance reasons (JDK-8293861)
+-XX:-G1UsePreventiveGC
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/log.properties.template b/run/trino-420/azure-pipelines/sh/log.properties.template
new file mode 100644
index 00000000..d253499a
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/log.properties.template
@@ -0,0 +1 @@
+io.trino=INFO
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/node.properties.template b/run/trino-420/azure-pipelines/sh/node.properties.template
new file mode 100644
index 00000000..a2a65764
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/node.properties.template
@@ -0,0 +1,3 @@
+node.environment=production
+node.id=$HOSTNAME
+node.data-dir=/mnt/local_resource/trino_data
\ No newline at end of file
diff --git a/run/trino-420/azure-pipelines/sh/start-cluster.sh b/run/trino-420/azure-pipelines/sh/start-cluster.sh
new file mode 100755
index 00000000..0ccbf698
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/start-cluster.sh
@@ -0,0 +1,25 @@
+#!/bin/bash -e
+source env.sh
+if [ -z "${HIVE_HOME}" ]; then
+ echo "ERROR: HIVE_HOME is not defined."
+ exit 1
+fi
+if [ -z "${TRINO_HOME}" ]; then
+ echo "ERROR: TRINO_HOME is not defined."
+ exit 1
+fi
+if [ -z "${HOSTS}" ]; then
+ echo "ERROR: HOSTS is not defined."
+ exit 1
+fi
+
+echo "Starting HMS"
+cd $HIVE_HOME
+./bin/hive --service metastore &
+
+echo "Starting Trino cluster"
+echo "Starting Trino coordinator"
+cd $TRINO_HOME
+./bin/launcher start
+echo "Starting Trino workers"
+for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher start" ; done
diff --git a/run/trino-420/azure-pipelines/sh/stop-cluster.sh b/run/trino-420/azure-pipelines/sh/stop-cluster.sh
new file mode 100755
index 00000000..000acd27
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/stop-cluster.sh
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+source env.sh
+if [ -z "${HOSTS}" ]; then
+ echo "ERROR: HOSTS is not defined."
+ exit 1
+fi
+if [ -z "${TRINO_HOME}" ]; then
+ echo "ERROR: TRINO_HOME is not defined."
+ exit 1
+fi
+
+echo "Stopping Trino cluster"
+echo "Stopping Trino workers"
+for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher stop" ; done
+echo "Stopping Trino coordinator"
+cd $TRINO_HOME
+./bin/launcher stop
+
+echo "Stopping HMS"
+pkill -f "metastore" || true
diff --git a/run/trino-420/azure-pipelines/sh/worker-config.properties.template b/run/trino-420/azure-pipelines/sh/worker-config.properties.template
new file mode 100644
index 00000000..96a4c6fd
--- /dev/null
+++ b/run/trino-420/azure-pipelines/sh/worker-config.properties.template
@@ -0,0 +1,3 @@
+coordinator=false
+http-server.http.port=8080
+discovery.uri=http://$TRINO_MASTER_HOST:8080
\ No newline at end of file
diff --git a/run/trino-420/config/tpcds/library.yaml b/run/trino-420/config/tpcds/library.yaml
index 38804842..5fd99c01 100644
--- a/run/trino-420/config/tpcds/library.yaml
+++ b/run/trino-420/config/tpcds/library.yaml
@@ -241,6 +241,7 @@ task_templates:
# Execution of optimize on all benchmark tables but splitting optimization
# of partitioned tables into batches by relying on dependent task executor
- id: optimize_split
+ custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor
files:
- run/trino-420/scripts/tpcds/optimize/o_call_center.sql
- run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql
diff --git a/run/trino-420/config/tpcds/setup_experiment.yaml b/run/trino-420/config/tpcds/setup_experiment.yaml
new file mode 100644
index 00000000..d122811f
--- /dev/null
+++ b/run/trino-420/config/tpcds/setup_experiment.yaml
@@ -0,0 +1,32 @@
+# Description: Setup experiment
+---
+version: 1
+id: setup_experiment
+phases:
+- id: setup
+ sessions:
+ - tasks:
+ - template_id: setup
+- id: setup_data_maintenance
+ sessions:
+ - tasks:
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
+ - template_id: setup_data_maintenance
diff --git a/run/trino-420/config/tpcds/wp1_longevity.yaml b/run/trino-420/config/tpcds/wp1_longevity.yaml
index 936169fd..1a200455 100644
--- a/run/trino-420/config/tpcds/wp1_longevity.yaml
+++ b/run/trino-420/config/tpcds/wp1_longevity.yaml
@@ -3,23 +3,6 @@
version: 1
id: wp1_longevity
phases:
-- id: setup
- sessions:
- - tasks:
- - template_id: setup
-- id: setup_data_maintenance
- sessions:
- - tasks:
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- id: init
sessions:
- tasks:
diff --git a/run/trino-420/config/tpcds/wp2_resilience.yaml b/run/trino-420/config/tpcds/wp2_resilience.yaml
index d95edafe..58b0bd7a 100644
--- a/run/trino-420/config/tpcds/wp2_resilience.yaml
+++ b/run/trino-420/config/tpcds/wp2_resilience.yaml
@@ -3,25 +3,6 @@
version: 1
id: wp2_resilience
phases:
-- id: setup
- sessions:
- - tasks:
- - template_id: setup
-- id: setup_data_maintenance
- sessions:
- - tasks:
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- id: init
sessions:
- tasks:
@@ -46,7 +27,9 @@ phases:
- id: optimize_1
sessions:
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
- id: single_user_2o
sessions:
- tasks:
@@ -65,7 +48,9 @@ phases:
- id: optimize_2
sessions:
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
- id: single_user_3o
sessions:
- tasks:
@@ -86,7 +71,9 @@ phases:
- id: optimize_3
sessions:
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
- id: single_user_4o
sessions:
- tasks:
diff --git a/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml b/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml
index a3ff60db..26afcbb5 100644
--- a/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml
+++ b/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml
@@ -3,25 +3,6 @@
version: 1
id: wp3_rw_concurrency
phases:
-- id: setup
- sessions:
- - tasks:
- - template_id: setup
-- id: setup_data_maintenance
- sessions:
- - tasks:
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- - template_id: setup_data_maintenance
- id: init
sessions:
- tasks:
@@ -42,7 +23,9 @@ phases:
- tasks:
- template_id: single_user
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
- id: single_user_2o_data_maintenance_2
sessions:
- tasks:
@@ -57,7 +40,9 @@ phases:
- tasks:
- template_id: single_user
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
- id: single_user_3o_data_maintenance_3
sessions:
- tasks:
@@ -74,4 +59,6 @@ phases:
- tasks:
- template_id: single_user
- tasks:
- - template_id: optimize
+ - template_id: optimize_split
+ task_executor_arguments:
+ dependent_task_batch_size: 100
diff --git a/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb b/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb
new file mode 100644
index 00000000..a23b7a21
Binary files /dev/null and b/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb differ
diff --git a/src/main/java/com/microsoft/lst_bench/client/QueryResult.java b/src/main/java/com/microsoft/lst_bench/client/QueryResult.java
index 5c49f935..540642b0 100644
--- a/src/main/java/com/microsoft/lst_bench/client/QueryResult.java
+++ b/src/main/java/com/microsoft/lst_bench/client/QueryResult.java
@@ -31,11 +31,13 @@
*/
public class QueryResult {
+ private final Map columnTypes;
private final Map> valueList;
private static final String RESULT = "Result";
public QueryResult() {
+ this.columnTypes = new HashMap<>();
this.valueList = new HashMap<>();
}
@@ -45,6 +47,7 @@ public void populate(ResultSet rs) throws SQLException {
ResultSetMetaData rsmd = rs.getMetaData();
for (int j = 1; j <= rsmd.getColumnCount(); j++) {
+ columnTypes.put(rsmd.getColumnName(j), rsmd.getColumnType(j));
valueList.put(rsmd.getColumnName(j), new ArrayList<>());
}
@@ -65,24 +68,30 @@ public Integer getValueListSize() {
}
public boolean containsEmptyResultColumnOnly() {
- if (valueList.keySet().size() == 1
+ return valueList.keySet().size() == 1
&& valueList.containsKey(RESULT)
- && valueList.get(RESULT).size() == 0) {
- return true;
- }
- return false;
+ && valueList.get(RESULT).isEmpty();
}
public Map getStringMappings(int listMin, int listMax) {
Map result = new HashMap<>();
- for (String key : this.valueList.keySet()) {
+ for (String key : valueList.keySet()) {
List localList =
- this.valueList.get(key).subList(listMin, listMax).stream()
- .map(s -> s.toString())
+ valueList.get(key).subList(listMin, listMax).stream()
+ .map(Object::toString)
.collect(Collectors.toUnmodifiableList());
- // TODO: This assumes a VARCHAR type (or implicit casting by the engine),
- // we should probably handle it more generically using data types.
- result.put(key, "'" + String.join("','", localList) + "'");
+ switch (columnTypes.get(key)) {
+ case java.sql.Types.BIGINT:
+ case java.sql.Types.INTEGER:
+ case java.sql.Types.SMALLINT:
+ case java.sql.Types.TINYINT:
+ result.put(key, String.join(",", localList));
+ break;
+ default:
+ // Currently assumes String for all other types.
+ // TODO: Better handling and testing of data types across engines.
+ result.put(key, "'" + String.join("','", localList) + "'");
+ }
}
return result;
}