diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 449bd621..27cc5f59 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -26,3 +26,7 @@ updates: interval: "daily" assignees: - "jcamachor" + groups: + log4j: + patterns: + - "org.apache.logging.log4j*" diff --git a/.github/workflows/maven.yaml b/.github/workflows/maven.yaml index edeb8938..1d388aa1 100644 --- a/.github/workflows/maven.yaml +++ b/.github/workflows/maven.yaml @@ -33,7 +33,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: ${{ env.JAVA_VERSION }} distribution: 'temurin' @@ -55,7 +55,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: ${{ env.JAVA_VERSION }} distribution: 'temurin' @@ -77,7 +77,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: ${{ env.JAVA_VERSION }} distribution: 'temurin' diff --git a/.github/workflows/webapp-deploy.yaml b/.github/workflows/webapp-deploy.yaml new file mode 100644 index 00000000..d9c2601a --- /dev/null +++ b/.github/workflows/webapp-deploy.yaml @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Web App - lst-bench + +on: + push: + paths: + - metrics/** + - run/** + branches: + - main + workflow_dispatch: + +permissions: + contents: read + +env: + AZURE_WEBAPP_NAME: lst-bench + WORKING_DIRECTORY: './metrics/app' + STARTUP_COMMAND: 'python -m streamlit run main.py --server.port 8000 --server.address 0.0.0.0 --client.toolbarMode minimal' + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: 'Set up Python version' + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: 'Create and start virtual environment' + working-directory: ${{ env.WORKING_DIRECTORY }} + run: | + python -m venv venv + source venv/bin/activate + + - name: 'Install dependencies' + working-directory: ${{ env.WORKING_DIRECTORY }} + run: | + pip install setuptools + pip install -r requirements.txt + + - name: 'Copy .duckdb files from ./run/' + run: | + find ./run -type f -name "*.duckdb" -exec cp {} ${{ env.WORKING_DIRECTORY }} \; + + - name: Zip artifact for deployment + working-directory: ${{ env.WORKING_DIRECTORY }} + run: zip release.zip ./* -r + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v4 + with: + name: python-app + path: | + ${{ env.WORKING_DIRECTORY }}/release.zip + + deploy: + runs-on: ubuntu-latest + needs: build + environment: + name: 'webapp-deploy' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + permissions: + id-token: write #This is required for requesting the JWT + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: python-app + path: . + + - name: Unzip artifact for deployment + run: unzip release.zip + + - name: Login to Azure + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_33D9610570044F3DA4CC10BFC44E822C }} + tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_B6D8A47890014FE18CA30533FD44F9A3 }} + subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_16D6B2652AF543ADA2A0CBFD17A3F482 }} + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 + id: deploy-to-webapp + with: + app-name: ${{ env.AZURE_WEBAPP_NAME }} + startup-command: ${{ env.STARTUP_COMMAND }} diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index bee554f5..d7e1e172 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,9 @@ bin/ # Local configuration file (sdk path, etc) local.properties +# Python +*.pyc + # Others *~ .DS_Store diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 00000000..045fab5d --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,11 @@ +@article{2024lstbench, + author = {Jes\'{u}s Camacho-Rodr\'{\i}guez and Ashvin Agrawal and Anja Gruenheid and + Ashit Gosalia and Cristian Petculescu and Josep Aguilar-Saborit and + Avrilia Floratou and Carlo Curino and Raghu Ramakrishnan}, + title = {LST-Bench: Benchmarking Log-Structured Tables in the Cloud}, + journal = {Proc. ACM Manag. Data}, + volume = {2}, + number = {1}, + year = {2024}, + url = {https://doi.org/10.1145/3639314} +} diff --git a/README.md b/README.md index d053376f..06353391 100644 --- a/README.md +++ b/README.md @@ -69,8 +69,8 @@ usage: ./launcher.sh -c -e -l -t -w connections config details -e,--experiment-config [required] Path to input file containing the experiment config details - -l,--task-library [required] Path to input file containing - the library with task templates + -l,--library [required] Path to input file containing + the library with templates -t,--input-log-config [required] Path to input file containing the telemetry gathering config details -w,--workload [required] Path to input file containing @@ -111,22 +111,7 @@ The LST-Bench code is organized into two modules: The Python module performs data processing, analysis, and visualization to facilitate a deeper understanding of the experimental results. ### LST-Bench Concepts -In LST-Bench, the following concepts are used to define and organize SQL workloads: - -- **Task**: A task is a collection of SQL statements grouped together in a sequence of files. Each file represents a step or subtask within the overall task. - -- **Session**: A session refers to a sequence of tasks. It represents a logical unit of work or a user session. - -- **Phase**: A phase consists of multiple concurrent sessions that need to be completed before proceeding to the next phase. Phases help simulate concurrent workload scenarios. - -- **Workload**: A workload is a sequence of phases, defining the complete set of tasks, sessions, and phases to be executed during the evaluation. - -In LST-Bench, tasks are generated using task templates predefined in the task library. -LST-Bench includes a default task library that encompasses tasks derived from the TPC-DS benchmark, along with workload definitions representing the original TPC-DS and multiple workload patterns. These resources can be located [here](src/main/resources/config/tpcds). - -Although LST-Bench provides this set of tasks and workload patterns, -users have the flexibility to incorporate additional task templates or even create a completely new task library to model specific scenarios. -This flexible model allows for the easy creation of diverse SQL workloads for evaluation purposes without the need to modify the application itself. +In LST-Bench, we utilize specific concepts to define and organize SQL workloads, with a focus on maximizing flexibility and facilitating reusability across various workloads. For detailed information, refer to our [documentation](docs/workloads.md). ### Telemetry and Metrics Processor LST-Bench captures execution telemetry during workload execution at multiple levels, including per experiment, phase, session, task, file, and statement. @@ -142,17 +127,21 @@ Alternatively, if the LST-Bench [Metrics Processor](metrics) is used, you can si The processor will then analyze and visualize the results, providing a streamlined solution for result analysis and visualization. ## Documentation -For more details about LST-Bench, please refer to the accompanying [technical report](https://arxiv.org/pdf/2305.01120): +For more details about LST-Bench, please refer to the accompanying [technical report](https://arxiv.org/pdf/2305.01120). + +If you are writing an academic paper, you can cite this work as: ```bibtex -@article{2023lstbench, - title={LST-Bench: Benchmarking Log-Structured Tables in the Cloud}, - author={Jesús Camacho-Rodríguez and Ashvin Agrawal and Anja Gruenheid and +@article{2024lstbench, + author = {Jes\'{u}s Camacho-Rodr\'{\i}guez and Ashvin Agrawal and Anja Gruenheid and Ashit Gosalia and Cristian Petculescu and Josep Aguilar-Saborit and Avrilia Floratou and Carlo Curino and Raghu Ramakrishnan}, - year={2023}, - journal={arXiv preprint arXiv:2305.01120}, - url={https://arxiv.org/abs/2305.01120}, + title = {LST-Bench: Benchmarking Log-Structured Tables in the Cloud}, + journal = {Proc. ACM Manag. Data}, + volume = {2}, + number = {1}, + year = {2024}, + url = {https://doi.org/10.1145/3639314} } ``` diff --git a/docs/workloads.md b/docs/workloads.md new file mode 100644 index 00000000..7e5b98fd --- /dev/null +++ b/docs/workloads.md @@ -0,0 +1,198 @@ +# Definition of Workloads in LST-Bench + +In LST-Bench, workloads are defined using a YAML configuration file. +The schema for this configuration file can be accessed [here](/src/main/resources/schemas/workload.json). +To facilitate the reusability of various workload components, LST-Bench enables the definition of a [library](/src/main/resources/schemas/library.json). +This library should be supplied during benchmark execution, allowing workloads to reference entities predefined within it. + +LST-Bench already includes libraries encompassing tasks derived from the TPC-DS and TPC-H benchmarks, along with workload definitions that represent the original workloads specified by these standards. +Additionally, multiple other workload patterns that are especially relevant for evaluating LSTs are also included. +These resources can be found [here](/src/main/resources/config). + +While LST-Bench provides predefined libraries and workload definitions, users have the flexibility to incorporate additional task templates or even create an entirely new task library to model specific scenarios. +This flexible model allows for the easy creation of diverse SQL workloads for evaluation purposes without necessitating modifications to the LST-Bench application itself. + +Next we discuss the concepts used to define and organize SQL workloads in LST-Bench. + +## Task + +A _task_ in LST-Bench is a collection of SQL statements grouped together in a sequence of files. +Each file represents a step or subtask within the overall task. + +A task consists of two parts: a _template_ that defines the key elements of the task and an _instance_ that specifies arguments to instantiate the aforementioned template. + +### Task Template + +A task template is typically defined in the library and referenced by its identifier. For example, the following snippet shows a sample template defined as part of the `task_templates` block in a library: + +```yaml +task_templates: +# Execution of a few TPC-DS queries (possibly in a previous point-in-time) +- id: single_user_simple + files: + - src/main/resources/scripts/tpcds/single_user/spark/query1.sql + - src/main/resources/scripts/tpcds/single_user/spark/query2.sql + - src/main/resources/scripts/tpcds/single_user/spark/query3.sql + permutation_orders_path: src/main/resources/auxiliary/tpcds/single_user/permutation_orders/ + supports_time_travel: true +``` + +This template with the identifier `single_user_simple` comprises three SQL query files, each with a query. +Note that there are a couple of additional optional properties defined, namely `permutation_orders_path` and `supports_time_travel`. +Further information about these and other optional properties, including their descriptions, can be found [here](/src/main/resources/schemas/template.json). + +### Task Instance + +If we want to instantiate a task based on the `single_user_simple` template defined above as part of an input library, we can do so as follows: + +```yaml +- template_id: single_user_simple + permute_order: true +``` + +A task template can also be inlined within the task instantiation if we do not want to rely on a library: + +```yaml +- files: + - src/main/resources/scripts/tpcds/single_user/spark/query1.sql + - src/main/resources/scripts/tpcds/single_user/spark/query2.sql + - src/main/resources/scripts/tpcds/single_user/spark/query3.sql + permutation_orders_path: src/main/resources/auxiliary/tpcds/single_user/permutation_orders/ + permute_order: true +``` + +Note that tasks can also have their parameters modifying their behavior for a specific instance, e.g., `permute_order`. +These optional task parameters as well as an explanation about them can be found [here](/src/main/resources/schemas/instance.json). + +### Custom Tasks + + + +### Prepared Tasks + +A _prepared task_ is a [task instantiation](#task-instance) defined as part of the input library. For example, we can define a prepared task in the `prepared_tasks` block in the library as follows: + +```yaml +prepared_tasks: +- id: prepared_single_user_simple + template_id: single_user_simple + permute_order: true +``` + +Then, from the workload file, we can reference the _prepared task_ declared in the library, facilitating reuse and readability of the workload file: + +```yaml +- prepared_task_id: prepared_single_user_simple +``` + +## Tasks Sequences + +A _tasks sequence_ refers to a sequence of tasks that can be combined as part of a [session](#session) definition. We can define a sequence in the `tasks_sequence` block in the library as follows: + +```yaml +prepared_tasks_sequences: +- id: seq_two_single_user_simple + tasks: + - prepared_task_id: prepared_single_user_simple + - prepared_task_id: prepared_single_user_simple +``` + +Once that is done, we can reference the sequence in the workload file: + +```yaml +- prepared_tasks_sequence_id: seq_two_single_user_simple +``` + +## Session + +A _session_ refers to a sequence of tasks representing a logical unit of work or a user session, aligning with the concept of sessions in JDBC. + +For instance, the following snippet illustrates a sample session executing the `single_user_simple` task template declared earlier: + +```yaml + - tasks: + - template_id: single_user_simple + permute_order: true +``` + +If no endpoint is specified, the session is associated with a default target endpoint—the first connection declared in the connections YAML config file. + +Moreover, a session can also be defined using tasks sequences. For instance, the following snippet demonstrates a sample session that combines two sequences: one previously defined in the library and another inlined sequence using a `tasks` block. This session will execute a total of four `single_user_simple` tasks. + +```yaml + - tasks_sequences: + - prepared_tasks_sequence_id: seq_two_single_user_simple + - tasks: + - template_id: single_user_simple + permute_order: true + - template_id: single_user_simple + permute_order: true +``` + +## Phase + +A _phase_ consists of multiple concurrent sessions that need to be completed before proceeding to the next phase. Phases help simulate concurrent workload scenarios. + +Consider the following snippet demonstrating a phase executing four sessions concurrently across two different target endpoints: + +```yaml +- id: throughput_simple + sessions: + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 0 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 0 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 1 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 1 +``` + +Note that users are required to provide a unique identifier for each phase in their workload. + +## Workload + +A _workload_ is a sequence of phases, defining the complete set of tasks, sessions, and phases to be executed during the evaluation. + +To illustrate, here is the definition of a workload that executes warm-up phases in two different engines and subsequently executes a throughput phase: + +```yaml +id: my_first_workload +phases: +- id: warm_up_0 + sessions: + - tasks: + - template_id: single_user_simple + target_endpoint: 0 +- id: warm_up_1 + sessions: + - tasks: + - template_id: single_user_simple + target_endpoint: 1 +- id: throughput_simple + sessions: + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 0 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 0 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 1 + - tasks: + - template_id: single_user_simple + permute_order: true + target_endpoint: 1 +``` diff --git a/launcher.sh b/launcher.sh index c6f26a00..128d742b 100755 --- a/launcher.sh +++ b/launcher.sh @@ -1,7 +1,8 @@ #!/bin/bash -e # Constants -LST_BENCH_HOME="$PWD" +# Directory of the script +export LST_BENCH_HOME="$(dirname "$(readlink -f "$0")")" LST_BENCH_CLASSPATH="$LST_BENCH_HOME/target/*:$LST_BENCH_HOME/target/lib/*:$LST_BENCH_HOME/target/classes/*" -java -cp $LST_BENCH_CLASSPATH com.microsoft.lst_bench.Driver "$@" +java -cp ${LST_BENCH_CLASSPATH} com.microsoft.lst_bench.Driver "$@" diff --git a/metrics/app/README.md b/metrics/app/README.md new file mode 100755 index 00000000..86884783 --- /dev/null +++ b/metrics/app/README.md @@ -0,0 +1,75 @@ + + +# LST-Bench: Dashboard + +**Dashboard:** [https://lst-bench.azurewebsites.net/](https://lst-bench.azurewebsites.net/) + +The LST-Bench dashboard is powered by [Streamlit](https://github.com/streamlit/streamlit) and deployed to Azure App Service through GitHub actions. +You can find the deployment workflow [here](/.github/workflows/webapp-deploy.yaml). +The dashboard provides insights derived from metrics collected from LST-Bench, including execution time and degradation rate. + +## Evaluation +The results displayed in the dashboard are specific to the versions and configurations we tested. +Their performance is subject to change and improvement through further tuning and future developments. +Thus, the primary aim of sharing them is not to assert that one LST or engine is superior (in terms of speed, cost, etc.) to another. +Instead, it is to showcase LST-Bench's capability in quantifying significant trade-offs across various combinations of engines and LSTs. +Further details about the runs and setups are available [here](/run). + +## Adding a New Result +To include data from a new system, duplicate one of the directories in the [run folder](/run) and modify the necessary files within. +For a deeper understanding of the directory structure, consult the [README file](/run/README.md). +The LST-Bench dashboard web app automatically retrieves results from the .duckdb files within those folders and displays them on the dashboard. + +Alternatively, you can provide your own paths to search for results via commandline arguments, see below. + +## Dashboard Development +To run the LST-Bench dashboard locally and test your changes, follow these steps: + +### 1. Set up Python version +Ensure you have Python version 3.11 installed on your system. If not, you can download and install it from the official Python website. + +### 2. Create and Start Virtual Environment +To isolate the dependencies of the LST-Bench dashboard, it's recommended to use a virtual environment. You can create one by running the following command in your terminal: + +```bash +python -m venv venv +``` + +Once the virtual environment is created, activate it by executing: + +```bash +source venv/bin/activate +``` + +### 3. Install Dependencies +Install the the necessary packages specified in the requirements.txt using pip: + +```bash +pip install -r requirements.txt +``` + +### 4. Execute Streamlit App +With the dependencies installed, you can now start the Streamlit app by running the following command: + +```bash +python -m streamlit run main.py +python -m streamlit run main.py -- --result_dirs DIR1 DIR2 ... +``` + +This command will launch the LST-Bench dashboard locally in your browser. diff --git a/metrics/app/main.py b/metrics/app/main.py new file mode 100755 index 00000000..c215511f --- /dev/null +++ b/metrics/app/main.py @@ -0,0 +1,423 @@ +# Copyright (c) Microsoft Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from typing import List +import altair as alt +import collections +import duckdb +import logging +import pandas as pd +import os +import streamlit as st +import utils + + +@st.cache_resource +def get_connection(*, result_dirs: List[str] = None): + # Either search for results in provided directories + # or use default assuming that that the CWD is the location of this script + result_dirs = result_dirs or ["./", "../../run/"] + + connection = duckdb.connect() + # Get databases and attach them + databases_list = [] + + # Function to recursively find DuckDB files in a directory + def find_duckdb_files(directory: str) -> collections.abc.Iterator[str]: + # Warning if the directory does not exist + if not os.path.exists(directory): + st.warning(f"Directory '{directory}' does not exist.") + return + + if os.path.isfile(directory) and directory.endswith('.duckdb'): + yield directory + return + + for root, dirs, files in os.walk(directory): + for file in files: + if file.endswith('.duckdb'): + yield os.path.join(root, file) + + # Combine the results from all directories + for result_dir in result_dirs: + for database_path in find_duckdb_files(result_dir): + database = os.path.basename(database_path)[:-3] + connection.execute(f"ATTACH DATABASE '{database_path}' AS \"{database}\" (READ_ONLY)") + databases_list.append(database) + + if not databases_list: + st.error("No DuckDB files found in the provided directories.") + st.stop() + # Create view encompassing all experiments + union_sql = " UNION ".join([f"SELECT * FROM \"{database}\".experiment_telemetry" for database in databases_list]) + connection.execute(f"CREATE VIEW combined_experiment_telemetry AS {union_sql}") + return connection + +@st.cache_data +def get_systems(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT concat_ws('-', json(event_data)->>'system', json(event_data)->>'system_version') AS system + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY system ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A") + return df['system'] + + +@st.cache_data +def get_table_formats(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT concat_ws('-', json(event_data)->>'table_format', json(event_data)->>'table_format_version') AS table_format + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY table_format ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + return df['table_format'] + + +@st.cache_data +def get_modes(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT json(event_data)->>'mode' AS mode + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY mode ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + return df['mode'] + + +@st.cache_data +def get_cluster_sizes(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT json(event_data)->>'cluster_size' AS cluster_size + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY cluster_size ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + return df['cluster_size'] + + +@st.cache_data +def get_machines(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT json(event_data)->>'machine' AS machine + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY machine ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + return df['machine'] + + +@st.cache_data +def get_workloads(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT event_id AS workload + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY workload ASC; + """ + ).df() + # Replace None with Pandas NA + df['workload'] = df['workload'].replace('None', "N/A") + return df['workload'] + + +@st.cache_data +def get_scale_factors(*, result_dirs: List[str] = None): + connection = get_connection(result_dirs=result_dirs) + df = connection.execute( + f""" + SELECT DISTINCT json(event_data)->>'scale_factor' AS scale_factor + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND NOT(event_id LIKE 'setup%') + ORDER BY scale_factor ASC; + """ + ).df() + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + return df['scale_factor'] + + +def get_experiments_selected( + _workload_selected: str, + _systems_selected: list[str], + _table_formats_selected: list[str], + _modes_selected: list[str], + _cluster_sizes_selected: list[str], + _machines_selected: list[str], + _scale_factors_selected: list[str], + *, result_dirs: List[str] = None) -> pd.DataFrame: + connection = get_connection(result_dirs=result_dirs) + + df = connection.execute( + f""" + SELECT run_id, event_start_time, event_end_time, event_id, + concat_ws('-', json(event_data)->>'system', json(event_data)->>'system_version') AS system, + concat_ws('-', json(event_data)->>'table_format', json(event_data)->>'table_format_version') AS table_format, + cast(json(event_data)->>'mode' AS VARCHAR) AS mode, + cast(json(event_data)->>'cluster_size' AS VARCHAR) AS cluster_size, + cast(json(event_data)->>'machine' AS VARCHAR) AS machine, + cast(json(event_data)->>'scale_factor' AS VARCHAR) AS scale_factor + FROM combined_experiment_telemetry + WHERE event_type = 'EXEC_EXPERIMENT' AND event_status='SUCCESS' AND event_id = '{_workload_selected}' + AND {utils.generate_sql_in_with_null('system', _systems_selected)} + AND {utils.generate_sql_in_with_null('table_format', _table_formats_selected)} + AND {utils.generate_sql_in_with_null('mode', _modes_selected)} + AND {utils.generate_sql_in_with_null('cluster_size', _cluster_sizes_selected)} + AND {utils.generate_sql_in_with_null('machine', _machines_selected)} + AND {utils.generate_sql_in_with_null('scale_factor', _scale_factors_selected)} + ORDER BY cast(event_start_time AS TIMESTAMP) ASC; + """ + ).df() + df.fillna("N/A", inplace=True) + logging.debug(df) + if len(df) == 0: + st.error("No data found for the selected dimensions.") + st.stop() + return df + #return df_unfiltered + +@st.cache_data +def get_experiments_data(experiments_df: pd.DataFrame, target_granularity: str, + *, result_dirs: List[str] = None) -> pd.DataFrame: + connection = get_connection(result_dirs=result_dirs) + df = experiments_df + if len(df) == 0: + st.error("Empty experiments data.") + st.stop() + + granularities = { + 'phase': 'EXEC_PHASE', + 'session': 'EXEC_SESSION', + 'task': 'EXEC_TASK', + 'file': 'EXEC_FILE' + } + for granularity in granularities: + new_experiments_data_df = pd.DataFrame() + for idx, (run_id, event_start_time, event_end_time, event_id, system, table_format, mode, cluster_size, machine, + scale_factor) in enumerate(df.itertuples(index=False)): + new_experiment_data_df = connection.execute( + f""" + SELECT run_id, event_start_time, event_end_time, + concat_ws('/', CASE WHEN event_type = 'EXEC_PHASE' THEN NULL ELSE '{event_id}' END, regexp_replace(event_id, '(_delta|_iceberg|_hudi)', '')) AS event_id + FROM combined_experiment_telemetry + WHERE run_id = ? AND event_type = ? AND event_status='SUCCESS' + AND cast(event_start_time AS TIMESTAMP) >= ? AND cast(event_end_time AS TIMESTAMP) <= ? + ORDER BY cast(event_start_time AS TIMESTAMP) ASC; + """, + [run_id, granularities.get(granularity), event_start_time, event_end_time]).df() + new_experiment_data_df["system"] = system + new_experiment_data_df["table_format"] = table_format + new_experiment_data_df["mode"] = mode + new_experiment_data_df["cluster_size"] = cluster_size + new_experiment_data_df["machine"] = machine + new_experiment_data_df["scale_factor"] = scale_factor + new_experiments_data_df = pd.concat([new_experiments_data_df, new_experiment_data_df]) + df = new_experiments_data_df + if granularity == target_granularity: + break + # Replace None with Pandas NA + df.fillna("N/A", inplace=True) + logging.debug(df) + df['configuration'] = df.apply( + lambda row: (row['system'] + ", " + + row['table_format'] + ", " + + row['mode'] + ", " + + row['cluster_size'] + "x" + row['machine']), + axis=1) + # Calculate latency for each element. + df['time_diff_in_mins'] = df.apply( + lambda row: utils.time_diff_in_minutes(row['event_start_time'], row['event_end_time']), + axis=1) + return df + +def run(*, result_dirs: List[str] = None): + st.set_page_config( + page_title="LST-Bench - Dashboard", + page_icon=":bar_chart:", + layout="wide") + st.title('LST-Bench - Dashboard') + st.write("[Project Page](https://github.com/microsoft/lst-bench/) | " + "[Technical Report](https://arxiv.org/abs/2305.01120) | " + "[Evaluation](https://github.com/microsoft/lst-bench/tree/main/metrics/app#evaluation) | " + "[Adding a New Result](https://github.com/microsoft/lst-bench/tree/main/metrics/app#adding-a-new-result)") + + workloads = get_workloads(result_dirs=result_dirs) + workload_selected = st.sidebar.selectbox('Workload', workloads, index=0) + + systems = get_systems(result_dirs=result_dirs) + systems_selected = st.sidebar.multiselect('System', systems, default=systems) + + table_formats = get_table_formats(result_dirs=result_dirs) + table_formats_selected = st.sidebar.multiselect('Table Format', table_formats, default=table_formats) + + modes = get_modes(result_dirs=result_dirs) + modes_selected = st.sidebar.multiselect('Mode', modes, default=modes) + + cluster_sizes = get_cluster_sizes(result_dirs=result_dirs) + cluster_sizes_selected = st.sidebar.multiselect('Cluster Size', cluster_sizes, default=cluster_sizes) + + machines = get_machines(result_dirs=result_dirs) + machines_selected = st.sidebar.multiselect('Machine', machines, default=machines) + + scale_factors = get_scale_factors(result_dirs=result_dirs) + scale_factors_selected = st.sidebar.multiselect('Scale Factor', scale_factors, default=scale_factors) + + # Bail out if any of the dimensions if empty + if any(len(arr) == 0 for arr in [systems_selected, table_formats_selected, + modes_selected, cluster_sizes_selected, + machines_selected, scale_factors_selected]): + st.error("Please ensure you have selected at least one option for each dimension.") + st.stop() + + # Create tabs for current selection + exec_time_tab = None # This tab shows execution time. + performance_degradation_tab = None # This tab shows degradation rate. + # TODO + io_tab = None # This tab will show I/O metrics, such as bytes read/written. + io_api_calls_tab = None # This tab will show I/O API call metrics. + cpu_utilization_tab = None # This tab will show CPU utilization metrics. + + if workload_selected == 'wp1_longevity': + exec_time_tab, performance_degradation_tab = st.tabs(['Execution Time', 'Performance Degradation']) + else: + exec_time_tab = st.tabs(['Execution Time'])[0] + + if exec_time_tab is not None: + granularity_selected = exec_time_tab.radio( + 'Granularity:', + ['phase', 'session', 'task', 'file'], + horizontal=True) + regex = exec_time_tab.text_input('Filter Results:', placeholder='Regular Expression (Regex)') + + # --- Data manipulations --- # + experiments_selected_df = get_experiments_selected(workload_selected, systems_selected, table_formats_selected, + modes_selected, cluster_sizes_selected, machines_selected, + scale_factors_selected, + result_dirs=result_dirs) + experiments_data_df = get_experiments_data(experiments_selected_df, granularity_selected, result_dirs=result_dirs) + experiments_data_df = experiments_data_df[experiments_data_df['event_id'].str.contains(regex, regex=True)] + + if len(experiments_data_df) > 3000: + st.error( + "Too many rows in the result. " + "Please refine your dimension selection or apply a regex filter to narrow down the results.") + st.stop() + + # --- Plot the data --- # + chart = ( + alt.Chart(experiments_data_df) + .mark_bar() + .encode( + alt.X("configuration:N", axis=None, title='Configuration', stack=None), + alt.Y("time_diff_in_mins:Q", title='Latency (mins)', axis=alt.Axis(titleFontWeight='bold')), + alt.Color("configuration:N", legend=alt.Legend(titleFontWeight='bold', labelLimit=400), + title='Configuration'), + alt.Column("event_id:N", title="", + header=alt.Header(orient='bottom', labelFontWeight='bold', labelAlign='right', + labelAngle=-45, labelPadding=20), + sort=alt.SortField("event_start_time", order="ascending")) + ) + .configure_range( + category={'scheme': 'dark2'} + ) + ) + exec_time_tab.markdown('#') + exec_time_tab.altair_chart(chart, theme=None) + + if performance_degradation_tab is not None: + # --- Data manipulations --- # + experiments_selected_df = get_experiments_selected(workload_selected, systems_selected, table_formats_selected, + modes_selected, cluster_sizes_selected, machines_selected, + scale_factors_selected, result_dirs=result_dirs) + experiments_data_df = get_experiments_data(experiments_selected_df, 'phase', result_dirs=result_dirs) + # Filter rows with event_id following the format _ + experiments_data_df = experiments_data_df[experiments_data_df['event_id'].str.match(r'^.+_\d+$')] + # Extract name part from event_id + experiments_data_df['phase_type'] = experiments_data_df['event_id'].str.extract(r'^(.+)_\d+$') + # Group by each distinct 'configuration' and 'phase_type' + grouped_df = experiments_data_df.groupby(['configuration', 'phase_type']) + # Compute performance degradation + grouped_df = grouped_df['time_diff_in_mins'].agg(performance_degradation_rate=utils.performance_degradation) + grouped_df = grouped_df.reset_index() + + # --- Plot the data --- # + # X axis: phase type + # Y axis: configuration + # score: degradation rate + base = ( + alt.Chart(grouped_df) + .encode( + alt.X("phase_type:N", title='', axis=alt.Axis(labelFontWeight='bold', labelAngle=-45)), + alt.Y("configuration:N", title='Configuration', + axis=alt.Axis(titleFontWeight='bold', maxExtent=430, labelLimit=400)) + ) + ) + heatmap = ( + base.mark_rect() + .encode( + alt.Color('performance_degradation_rate:Q', + scale=alt.Scale(scheme='redblue', reverse=True), + title='Performance Degradation Rate', + legend=alt.Legend(titleFontWeight='bold', titleLimit=400, direction="horizontal")) + ) + .properties( + height={"step": 50}, + width={"step": 50} + ) + ) + text = ( + base.mark_text() + .encode( + alt.Text('performance_degradation_rate:Q', format=".2f"), + color=alt.condition(alt.datum.performance_degradation_rate > 0.8, alt.value("black"), alt.value("white")) + ) + ) + performance_degradation_tab.markdown('#') + performance_degradation_tab.altair_chart(heatmap + text, theme=None) + +if __name__ == '__main__': + # Parse arguments + parser = argparse.ArgumentParser(description='LST-Bench Dashboard') + parser.add_argument('--result_dirs', type=str, nargs='+', help='Directories containing the result files') + args = parser.parse_args() + run(result_dirs=args.result_dirs) diff --git a/metrics/app/requirements.txt b/metrics/app/requirements.txt new file mode 100644 index 00000000..419d2c1a --- /dev/null +++ b/metrics/app/requirements.txt @@ -0,0 +1,4 @@ +altair==5.2.0 +duckdb==0.9.2 +pandas==2.2.0 +streamlit==1.31.0 diff --git a/metrics/app/utils.py b/metrics/app/utils.py new file mode 100755 index 00000000..82463ec6 --- /dev/null +++ b/metrics/app/utils.py @@ -0,0 +1,80 @@ +# Copyright (c) Microsoft Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime as dt + +import pandas as pd + +# -------- DATE MANIPULATIONS -------- # + +utc_format = '%Y-%m-%dT%H:%M:%S.%f%z' + + +def time_diff_in_minutes(time_str1, time_str2): + d1 = dt.datetime.strptime(time_str1, utc_format) + d2 = dt.datetime.strptime(time_str2, utc_format) + return abs((d2 - d1).seconds / 60) + + +# -------- PERFORMANCE DEGRADATION -------- # +def performance_degradation(values: pd.DataFrame) -> float: + """ + Performance degradation is measured as the average rate of change between consecutive values. + + Formula: + degradation_rate = (Σ((M[i] - M[i-1]) / M[i-1])) / (n - 1) + + Where: + - M[i] is the current value + - M[i-1] is the previous value + - n is the number of observations + + Args: + - values (pd.DataFrame): A DataFrame containing the values for which performance degradation is measured. + + Returns: + - float: The average rate of performance degradation. + """ + + # Calculate the difference between each value and its previous value + diffs = values.diff() + # Remove the first row as it will be NaN + diffs = diffs.dropna() + # Divide each difference by the current value + diffs = diffs.div(values.shift(1)) + # Calculate the average rate of change + degradation_rate = diffs.mean() + + # TODO: Consider incorporating variance to understand the variability in performance degradation. + # TODO: Handle multiple runs for more comprehensive analysis. + + return degradation_rate + +# -------- SQL GENERATION -------- # +def generate_sql_in_with_null(lhs: str, values: list, NA_value = "N/A") -> str: + """ + Generates a string of comma-separated values from a list + with None values converted to NULL. + + Args: + - values (list): A list of values to be converted to a string. + + Returns: + - str: A string of comma-separated values. + """ + + str_list = ', '.join(["'" + str(value) + "'" for value in values if value is not NA_value]) + null_predicate = '' if NA_value in values else 'NOT' + + return f"({lhs} IN ({str_list}) OR {lhs} IS {null_predicate} NULL)" \ No newline at end of file diff --git a/pom.xml b/pom.xml index 139a585d..890af0f3 100644 --- a/pom.xml +++ b/pom.xml @@ -18,27 +18,27 @@ org.apache.commons commons-text - 1.11.0 + 1.12.0 org.apache.commons commons-lang3 - 3.13.0 + 3.14.0 commons-cli commons-cli - 1.6.0 + 1.7.0 commons-io commons-io - 2.15.0 + 2.16.1 org.immutables value - 2.10.0 + 2.10.1 provided @@ -55,32 +55,32 @@ org.apache.logging.log4j log4j-api - 2.21.1 + 2.23.1 org.apache.logging.log4j log4j-core - 2.21.1 + 2.23.1 org.apache.logging.log4j log4j-slf4j-impl - 2.21.1 + 2.23.1 com.fasterxml.jackson.dataformat jackson-dataformat-yaml - 2.15.3 + 2.17.1 org.duckdb duckdb_jdbc - 0.9.1 + 0.9.2 com.networknt json-schema-validator - 1.0.87 + 1.0.88 org.apache.commons @@ -92,19 +92,19 @@ org.junit.jupiter junit-jupiter - 5.10.1 + 5.10.2 test org.junit-pioneer junit-pioneer - 2.1.0 + 2.2.0 test org.mockito mockito-core - 5.7.0 + 5.12.0 test @@ -141,7 +141,7 @@ com.databricks databricks-jdbc - 2.6.34 + 2.6.38 @@ -156,7 +156,7 @@ com.microsoft.azure msal4j - 1.14.0 + 1.14.2 @@ -208,7 +208,7 @@ io.trino trino-jdbc - 432 + 448 @@ -219,7 +219,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.2.2 + 3.2.5 @@ -250,7 +250,7 @@ com.diffplug.spotless spotless-maven-plugin - 2.40.0 + 2.43.0 diff --git a/run/README.md b/run/README.md new file mode 100644 index 00000000..aa45d28c --- /dev/null +++ b/run/README.md @@ -0,0 +1,46 @@ + + +# LST-Bench: Configurations and Results +This folder contains configurations for running LST-Bench on various systems as depicted in the [LST-Bench dashboard](/metrics/app), along with details about the setups used to generate those results. + +## Systems Included +- [x] Apache Spark 3.3.1 + - [x] Delta Lake 2.2.0 + - [x] Apache Hudi 0.12.2 + - [x] Apache Iceberg 1.1.0 +- [x] Trino 420 + - [x] Delta Lake + - [x] Apache Iceberg + +## Folder Structure +While the folder for each engine may have a slightly different structure, they generally contain the following: + +- `scripts/`: + This directory contains SQL files used to execute LST-Bench workloads on the respective engine. + Typically, these SQL files may vary slightly across engines and LSTs based on the supported SQL dialect. +- `config/`: + This directory houses LST-Bench configuration files required to execute the workload. + It includes LST-Bench phase/session/task libraries that reference the aforementioned SQL scripts. +- Additional infrastructure and configuration automation folders, e.g., `azure-pipelines/`: + These folders contain scripts or files facilitating automation for running the benchmark on a specific infrastructure/engine. + For instance, Azure Pipelines scripts to deploy an engine with different LSTs and executing LST-Bench. + Generally, these folders should include an additional README.md file offering further details. +- `results/`: + This folder stores the results of the LST-Bench runs as captured by LST-Bench telemetry using DuckDB. + These results are processed and visualized in the [LST-Bench dashboard](/metrics/app). diff --git a/src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat b/run/auxiliary/tpcds/data_maintenance/parameter_values.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat rename to run/auxiliary/tpcds/data_maintenance/parameter_values.dat diff --git a/src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat b/run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat rename to run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000000.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000000.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000000.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000000.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000001.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000001.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000001.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000001.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000002.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000002.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000002.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000002.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000003.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000003.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000003.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000003.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000004.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000004.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000004.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000004.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000005.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000005.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000005.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000005.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000006.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000006.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000006.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000006.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000007.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000007.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000007.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000007.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000008.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000008.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000008.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000008.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000009.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000009.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000009.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000009.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000010.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000010.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000010.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000010.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000011.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000011.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000011.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000011.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000012.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000012.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000012.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000012.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000013.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000013.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000013.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000013.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000014.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000014.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000014.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000014.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000015.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000015.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000015.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000015.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000016.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000016.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000016.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000016.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000017.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000017.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000017.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000017.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000018.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000018.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000018.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000018.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000019.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000019.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000019.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000019.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000020.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000020.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000020.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000020.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000021.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000021.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000021.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000021.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000022.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000022.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000022.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000022.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000023.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000023.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000023.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000023.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000024.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000024.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000024.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000024.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000025.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000025.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000025.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000025.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000026.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000026.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000026.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000026.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000027.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000027.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000027.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000027.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000028.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000028.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000028.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000028.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000029.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000029.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000029.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000029.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000030.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000030.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000030.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000030.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000031.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000031.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000031.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000031.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000032.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000032.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000032.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000032.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000033.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000033.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000033.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000033.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000034.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000034.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000034.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000034.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000035.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000035.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000035.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000035.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000036.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000036.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000036.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000036.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000037.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000037.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000037.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000037.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000038.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000038.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000038.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000038.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000039.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000039.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000039.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000039.dat diff --git a/src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000040.dat b/run/auxiliary/tpcds/single_user/permutation_orders/000040.dat similarity index 100% rename from src/main/resources/auxiliary/tpcds/single_user/permutation_orders/000040.dat rename to run/auxiliary/tpcds/single_user/permutation_orders/000040.dat diff --git a/src/main/resources/auxiliary/tpch/data_maintenance/parameter_values.dat b/run/auxiliary/tpch/data_maintenance/parameter_values.dat similarity index 100% rename from src/main/resources/auxiliary/tpch/data_maintenance/parameter_values.dat rename to run/auxiliary/tpch/data_maintenance/parameter_values.dat diff --git a/src/main/resources/auxiliary/tpch/setup_data_maintenance/parameter_values.dat b/run/auxiliary/tpch/setup_data_maintenance/parameter_values.dat similarity index 100% rename from src/main/resources/auxiliary/tpch/setup_data_maintenance/parameter_values.dat rename to run/auxiliary/tpch/setup_data_maintenance/parameter_values.dat diff --git a/run/spark-3.3.1/azure-pipelines/README.md b/run/spark-3.3.1/azure-pipelines/README.md new file mode 100644 index 00000000..6e5e17de --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/README.md @@ -0,0 +1,51 @@ + + +# Azure Pipelines Deployment for LST-Bench on Apache Spark 3.3.1 +This directory comprises the necessary tooling for executing LST-Bench on Apache Spark 3.3.1 with different LSTs using Azure Pipelines. The included tooling consists of: +- `run-lst-bench.yml`: + An Azure Pipelines script designed to deploy Apache Spark with various LSTs and execute LST-Bench. +- `sh/`: + A directory containing shell scripts and engine configuration files supporting the deployment of Spark with different LSTs and the execution of experiments. +- `config/`: + A directory with LST-Bench configuration files necessary for executing the experiments that are part of the results. + +## Prerequisites +- Automation for deploying the infrastructure in Azure to run LST-Bench is not implemented. As a result, the Azure Pipeline script expects the following setup: + - A VM named 'lst-bench-client' connected to the pipeline environment to run the LST-Bench client. + - A VM named 'lst-bench-head' to run the head node of the Spark cluster, also connected to the pipeline environment. + - A VMSS cluster, that will serve as the Spark worker nodes, within the same VNet as the head node. + - An Azure Storage Account accessible by both the VMSS and head node. + - An Azure SQL Database (or SQL Server flavored RDBMS) that will be running Hive Metastore. + The Hive Metastore schema for version 2.3.9 should already be installed in the instance. +- Prior to running the pipeline, several variables need definition in your Azure Pipeline: + - `data_storage_account`: Name of the Azure Blob Storage account where the source data for the experiment is stored. + - `data_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the source data for the experiment is stored. + - `data_storage_account_container`: Name of the container in the Azure Blob Storage account where the source data for the experiment is stored. + - `hms_jdbc_driver`: JDBC driver for the Hive Metastore. + - `hms_jdbc_url`: JDBC URL for the Hive Metastore. + - `hms_jdbc_user`: Username for the Hive Metastore. + - `hms_jdbc_password` (secret): Password for the Hive Metastore. + - `hms_storage_account`: Name of the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog (can be the same as the data_storage_account). + - `hms_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog. + - `hms_storage_account_container`: Name of the container in the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog. +- The versions and configurations of LSTs to run can be modified via input parameters for the pipelines in the Azure Pipelines YAML file or from the Web UI. + Default values are assigned to these parameters. + Parameters also include experiment scale factor, machine type, and cluster size. + Note that these parameters are not used to deploy the data or the infrastructure, as this process is not automated in the pipeline. + Instead, they are recorded in the experiment telemetry for proper categorization and visualization of results later on. diff --git a/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml b/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml new file mode 100644 index 00000000..63128856 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/connections_config.yaml @@ -0,0 +1,7 @@ +# Description: Connections Configuration +--- +version: 1 +connections: +- id: spark_0 + driver: org.apache.hive.jdbc.HiveDriver + url: jdbc:hive2://${SPARK_MASTER_HOST}:10000 diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-delta-2.2.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-delta-2.2.0.yaml new file mode 100644 index 00000000..3fd39e23 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-delta-2.2.0.yaml @@ -0,0 +1,29 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + table_format: delta + table_format_version: 2.2.0 + scale_factor: "${EXP_SCALE_FACTOR}" + mode: cow + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: "${EXP_NAME}" + table_format: delta + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/delta/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: '' diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-hudi-0.12.2.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-hudi-0.12.2.yaml new file mode 100644 index 00000000..3c8df376 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-hudi-0.12.2.yaml @@ -0,0 +1,29 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + table_format: hudi + table_format_version: 0.12.2 + scale_factor: "${EXP_SCALE_FACTOR}" + mode: cow + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: "${EXP_NAME}" + table_format: hudi + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: ', "type"="cow"' diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-iceberg-1.1.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-iceberg-1.1.0.yaml new file mode 100644 index 00000000..506f40c7 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/experiment_config-cow-iceberg-1.1.0.yaml @@ -0,0 +1,29 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + table_format: iceberg + table_format_version: 1.1.0 + scale_factor: "${EXP_SCALE_FACTOR}" + mode: cow + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: "${EXP_NAME}" + table_format: iceberg + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="copy-on-write", "write.update.mode"="copy-on-write", "write.merge.mode"="copy-on-write"' diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-hudi-0.12.2.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-hudi-0.12.2.yaml new file mode 100644 index 00000000..cbc82720 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-hudi-0.12.2.yaml @@ -0,0 +1,29 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + table_format: hudi + table_format_version: 0.12.2 + scale_factor: "${EXP_SCALE_FACTOR}" + mode: mor + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: "${EXP_NAME}" + table_format: hudi + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/hudi/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: ', "type"="mor"' diff --git a/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-iceberg-1.1.0.yaml b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-iceberg-1.1.0.yaml new file mode 100644 index 00000000..2b916227 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/experiment_config-mor-iceberg-1.1.0.yaml @@ -0,0 +1,29 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + table_format: iceberg + table_format_version: 1.1.0 + scale_factor: "${EXP_SCALE_FACTOR}" + mode: mor + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: "${EXP_NAME}" + table_format: iceberg + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: ', "format-version"="2", "write.delete.mode"="merge-on-read", "write.update.mode"="merge-on-read", "write.merge.mode"="merge-on-read"' diff --git a/run/spark-3.3.1/azure-pipelines/config/setup_experiment_config.yaml b/run/spark-3.3.1/azure-pipelines/config/setup_experiment_config.yaml new file mode 100644 index 00000000..a4907102 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/setup_experiment_config.yaml @@ -0,0 +1,20 @@ +# Description: Experiment Configuration +--- +version: 1 +id: setup_experiment +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: spark + system_version: 3.3.1 + scale_factor: "${EXP_SCALE_FACTOR}" + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: csv + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' diff --git a/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml b/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml new file mode 100644 index 00000000..6e5f3400 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml @@ -0,0 +1,13 @@ +# Description: Telemetry Configuration +--- +version: 1 +connection: + id: duckdb_0 + driver: org.duckdb.DuckDBDriver + url: jdbc:duckdb:./telemetry-spark-3.3.1 +execute_ddl: true +ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' +insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' +# The following parameter values will be used to replace the variables in the logging statements. +parameter_values: + data_path: '' \ No newline at end of file diff --git a/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml b/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml new file mode 100644 index 00000000..1d63227e --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/run-lst-bench.yml @@ -0,0 +1,297 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +trigger: none + +parameters: +- name: lsts + type: object + default: + - table_format: "delta" + version: "2.2.0" + mode: "cow" + - table_format: "iceberg" + version: "1.1.0" + mode: "cow" + - table_format: "iceberg" + version: "1.1.0" + mode: "mor" + - table_format: "hudi" + version: "0.12.2" + mode: "cow" + - table_format: "hudi" + version: "0.12.2" + mode: "mor" +- name: workloads + type: object + default: + - "wp1_longevity" + - "wp2_resilience" + - "wp3_rw_concurrency" + - "wp4_time_travel" +- name: exp_scale_factor + type: number + default: 100 +- name: exp_machine + type: string + default: "Standard_E8s_v5" +- name: exp_cluster_size + type: number + default: 8 + +variables: + MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository + MAVEN_OPTS: '-ntp -B -Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' + EXP_SCALE_FACTOR: ${{ parameters.exp_scale_factor }} + EXP_MACHINE: ${{ parameters.exp_machine }} + EXP_CLUSTER_SIZE: ${{ parameters.exp_cluster_size }} + +stages: +# Build LST-Bench and create artifact to deploy to target VM +- stage: build + jobs: + - job: Build + pool: + vmImage: 'ubuntu-latest' + steps: + - task: Cache@2 + displayName: Cache Maven local repo + inputs: + key: 'maven | "$(Agent.OS)" | **/pom.xml' + restoreKeys: | + maven | "$(Agent.OS)" + maven + path: $(MAVEN_CACHE_FOLDER) + - task: Maven@4 + inputs: + mavenPomFile: 'pom.xml' + options: $(MAVEN_OPTS) + javaHomeOption: 'JDKVersion' + jdkVersionOption: '1.11' + publishJUnitResults: false + goals: 'package -DskipTests -Pspark-jdbc' + - task: CopyFiles@2 + displayName: 'Copy Artifacts to: $(TargetFolder)' + inputs: + SourceFolder: '$(Build.SourcesDirectory)' + TargetFolder: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/' + - task: PublishPipelineArtifact@1 + inputs: + targetPath: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/' + artifact: lst-bench-0.1-SNAPSHOT + +# Set up engine and deploy LST-Bench +- stage: deploy + jobs: + - deployment: EngineDeploy + displayName: 'Deploying engine' + workspace: + clean: all + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - bash: | + echo 'Deploy engine' + mkdir -p ~/spark-3.3.1 + cp $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/run/spark-3.3.1/azure-pipelines/sh/* ~/spark-3.3.1/ + cd ~/spark-3.3.1 + chmod +x ./* + spark_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + ./init.sh "${spark_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)" + ./hms.sh "$(hms_jdbc_driver)" "$(hms_jdbc_url)" "$(hms_jdbc_user)" "$(hms_jdbc_password)" "$(hms_storage_account)" "$(hms_storage_account_shared_key)" "$(hms_storage_account_container)" + ./dist-setup.sh + ./dist-exec.sh spark-3.3.1 init.sh "${spark_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)" + - deployment: ClientDeploy + displayName: 'Deploying LST-Bench client' + workspace: + clean: all + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + strategy: + runOnce: + deploy: + steps: + - bash: | + echo 'Deploy LST-Bench client' + sudo apt install -y openjdk-11-jdk + mkdir -p ~/lst-bench-0.1-SNAPSHOT + cp -rf $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/* ~/lst-bench-0.1-SNAPSHOT/ + chmod +x ~/lst-bench-0.1-SNAPSHOT/launcher.sh + +# Run LST-Bench (setup external tables) +- stage: setup_experiment + jobs: + - deployment: StartEngine + displayName: "Starting Engine" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + variables: + process.clean: false + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./stop-cluster.sh && ./start-cluster.sh + sleep 10 + spark_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + echo "##vso[task.setvariable variable=spark_head_node;isOutput=true]${spark_head_node}" + name: engine_start_step + - deployment: RunSetupExperiment + dependsOn: StartEngine + displayName: "Setup Experiment" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + variables: + spark_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.spark_head_node'] ] + timeoutInMinutes: 0 + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/lst-bench-0.1-SNAPSHOT + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/setup_experiment_config.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/setup_experiment.yaml + - deployment: StopEngine + dependsOn: RunSetupExperiment + displayName: "Stopping Engine" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./stop-cluster.sh + +# Run LST-Bench +# TODO: Enable time travel for Hudi (see HUDI-7274) +- ${{ each lst in parameters.lsts }}: + - stage: setup_${{ lst.mode }}_${{ lst.table_format }} + jobs: + - deployment: SetupEngine + displayName: "Setup Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./${{ lst.table_format }}-${{ lst.version }}.sh + ./dist-exec.sh spark-3.3.1 ${{ lst.table_format }}-${{ lst.version }}.sh + - ${{ each workload in parameters.workloads }}: + - ${{ if or(ne(lst.table_format, 'hudi'),ne(workload, 'wp4_time_travel')) }}: + - stage: test_${{ lst.mode }}_${{ lst.table_format }}_${{ workload }} + jobs: + - deployment: StartEngine + displayName: "Starting Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + variables: + process.clean: false + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./stop-cluster.sh && ./start-cluster.sh ${{ lst.table_format }} + sleep 10 + spark_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + echo "##vso[task.setvariable variable=spark_head_node;isOutput=true]${spark_head_node}" + name: engine_start_step + - deployment: RunExperiment + dependsOn: StartEngine + displayName: "Running Experiment (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + variables: + spark_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.spark_head_node'] ] + timeoutInMinutes: 0 + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/lst-bench-0.1-SNAPSHOT + echo "${{ workload }}" + export EXP_NAME="${{ workload }}" + ./launcher.sh -c run/spark-3.3.1/azure-pipelines/config/connections_config.yaml \ + -e run/spark-3.3.1/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}-${{ lst.version }}.yaml \ + -t run/spark-3.3.1/azure-pipelines/config/telemetry_config.yaml \ + -l run/spark-3.3.1/config/tpcds/library.yaml \ + -w run/spark-3.3.1/config/tpcds/${{ workload }}-${{ lst.table_format }}-${{ lst.version }}.yaml + - deployment: StopEngine + dependsOn: RunExperiment + displayName: "Stopping Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./stop-cluster.sh + - stage: cleanup_${{ lst.mode }}_${{ lst.table_format }} + jobs: + - deployment: CleanupEngine + displayName: "Cleanup Engine (${{ lst.mode }}, ${{ lst.table_format }}-${{ lst.version }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/spark-3.3.1 + ./cleanup-${{ lst.table_format }}-${{ lst.version }}.sh + ./dist-exec.sh spark-3.3.1 cleanup-${{ lst.table_format }}-${{ lst.version }}.sh diff --git a/run/spark-3.3.1/azure-pipelines/sh/cleanup-delta-2.2.0.sh b/run/spark-3.3.1/azure-pipelines/sh/cleanup-delta-2.2.0.sh new file mode 100755 index 00000000..c8eacccd --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/cleanup-delta-2.2.0.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +rm $SPARK_HOME/jars/delta-core.jar +rm $SPARK_HOME/jars/delta-storage.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/cleanup-hudi-0.12.2.sh b/run/spark-3.3.1/azure-pipelines/sh/cleanup-hudi-0.12.2.sh new file mode 100755 index 00000000..ab6aee49 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/cleanup-hudi-0.12.2.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +rm $SPARK_HOME/jars/hudi-spark-bundle.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/cleanup-iceberg-1.1.0.sh b/run/spark-3.3.1/azure-pipelines/sh/cleanup-iceberg-1.1.0.sh new file mode 100755 index 00000000..e0a01cd8 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/cleanup-iceberg-1.1.0.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +rm $SPARK_HOME/jars/iceberg-spark-runtime.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/delta-2.2.0.sh b/run/spark-3.3.1/azure-pipelines/sh/delta-2.2.0.sh new file mode 100755 index 00000000..c9e4f015 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/delta-2.2.0.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +wget -nv -N https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.2.0/delta-core_2.12-2.2.0.jar +wget -nv -N https://repo1.maven.org/maven2/io/delta/delta-storage/2.2.0/delta-storage-2.2.0.jar + +ln -sf $(pwd)/delta-core_2.12-2.2.0.jar $SPARK_HOME/jars/delta-core.jar +ln -sf $(pwd)/delta-storage-2.2.0.jar $SPARK_HOME/jars/delta-storage.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/dist-exec.sh b/run/spark-3.3.1/azure-pipelines/sh/dist-exec.sh new file mode 100755 index 00000000..bd7c3ca6 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/dist-exec.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e +source env.sh +if [ -z "${HOSTS}" ]; then + echo "ERROR: HOSTS is not defined." + exit 1 +fi + +if [ "$#" -lt 2 ]; then + echo "Error: Please provide at least two input parameters." + exit 1 +fi +deploy_dir=$1 +script_file=$2 + +for node in $HOSTS ; do ssh -t $node "mkdir -p ~/$deploy_dir" ; done +for node in $HOSTS ; do scp *.template $node:~/$deploy_dir ; done +for node in $HOSTS ; do scp $script_file $node:~/$deploy_dir ; done +for node in $HOSTS ; do ssh -t $node "cd ~/$deploy_dir && chmod +x ./$script_file && ./$script_file ${@:3}" ; done diff --git a/run/spark-3.3.1/azure-pipelines/sh/dist-setup.sh b/run/spark-3.3.1/azure-pipelines/sh/dist-setup.sh new file mode 100755 index 00000000..fda4f282 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/dist-setup.sh @@ -0,0 +1,28 @@ +#!/bin/bash -e +source env.sh +if [ -z "${HOME}" ]; then + echo "ERROR: HOME is not defined." + exit 1 +fi +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +# Install packages +sudo apt install -y net-tools nmap + +# Configure hosts +my_ip=$(/sbin/ifconfig eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') +ip_range=${my_ip%.*}.* +nmap -sn $ip_range | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | grep -v "^$my_ip$" > $HOME/hostiplist + +cp $HOME/hostiplist $SPARK_HOME/conf/workers + +export HOSTS=$(<$HOME/hostiplist) + +for node in $HOSTS ; do scp ~/.ssh/id_rsa* $node:~/.ssh/ ; done + +# Push to environment +echo "export HOSTS=\"${HOSTS}\"" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/spark-3.3.1/azure-pipelines/sh/hive-site.xml.template b/run/spark-3.3.1/azure-pipelines/sh/hive-site.xml.template new file mode 100644 index 00000000..0e79ed7b --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/hive-site.xml.template @@ -0,0 +1,36 @@ + + + javax.jdo.option.ConnectionURL + ${HMS_JDBC_URL} + + + + javax.jdo.option.ConnectionDriverName + ${HMS_JDBC_DRIVER} + + + + javax.jdo.option.ConnectionUserName + ${HMS_JDBC_USER} + + + + javax.jdo.option.ConnectionPassword + ${HMS_JDBC_PASSWORD} + + + + hive.metastore.warehouse.dir + abfss://${HMS_STORAGE_ACCOUNT_CONTAINER}@${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net/hive/warehouse + + + + fs.azure.account.auth.type.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net + SharedKey + + + + fs.azure.account.key.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net + ${HMS_STORAGE_ACCOUNT_SHARED_KEY} + + \ No newline at end of file diff --git a/run/spark-3.3.1/azure-pipelines/sh/hms.sh b/run/spark-3.3.1/azure-pipelines/sh/hms.sh new file mode 100755 index 00000000..531b57a0 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/hms.sh @@ -0,0 +1,49 @@ +#!/bin/bash -e +if [ "$#" -ne 7 ]; then + echo "Usage: $0 HMS_JDBC_DRIVER HMS_JDBC_URL HMS_JDBC_USER HMS_JDBC_PASSWORD HMS_STORAGE_ACCOUNT HMS_STORAGE_ACCOUNT_SHARED_KEY HMS_STORAGE_ACCOUNT_CONTAINER" + exit 1 +fi + +source env.sh +if [ -z "${USER}" ]; then + echo "ERROR: USER is not defined." + exit 1 +fi +if [ -z "${HADOOP_HOME}" ]; then + echo "ERROR: HADOOP_HOME is not defined." + exit 1 +fi +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +export HMS_JDBC_DRIVER=$1 +export HMS_JDBC_URL=$2 +export HMS_JDBC_USER=$3 +export HMS_JDBC_PASSWORD=$4 +export HMS_STORAGE_ACCOUNT=$5 +export HMS_STORAGE_ACCOUNT_SHARED_KEY=$6 +export HMS_STORAGE_ACCOUNT_CONTAINER=$7 +export HIVE_HOME=/home/$USER/hive + +# Install Hive (needed for HMS) +rm -rf apache-hive-2.3.9-bin +wget -nv -N https://downloads.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz +tar -xzf apache-hive-2.3.9-bin.tar.gz +ln -sf $(pwd)/apache-hive-2.3.9-bin $HIVE_HOME + +# Configure HMS +envsubst < "hive-site.xml.template" > "$HIVE_HOME/conf/hive-site.xml" +ln -sf $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/hive-site.xml + +# Copy Azure dependencies to Hive classpath +cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-azure* $HIVE_HOME/lib/ + +# Install MSSQL driver +wget -nv -N https://repo1.maven.org/maven2/com/microsoft/sqlserver/mssql-jdbc/6.2.1.jre8/mssql-jdbc-6.2.1.jre8.jar +ln -sf $(pwd)/mssql-jdbc-6.2.1.jre8.jar $SPARK_HOME/jars/mssql-jdbc.jar + +# Push to environment +echo "export HIVE_HOME=${HIVE_HOME}" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/spark-3.3.1/azure-pipelines/sh/hudi-0.12.2.sh b/run/spark-3.3.1/azure-pipelines/sh/hudi-0.12.2.sh new file mode 100755 index 00000000..7c9166c5 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/hudi-0.12.2.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +wget -nv -N https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark3.3-bundle_2.12/0.12.2/hudi-spark3.3-bundle_2.12-0.12.2.jar + +ln -sf $(pwd)/hudi-spark3.3-bundle_2.12-0.12.2.jar $SPARK_HOME/jars/hudi-spark-bundle.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/iceberg-1.1.0.sh b/run/spark-3.3.1/azure-pipelines/sh/iceberg-1.1.0.sh new file mode 100755 index 00000000..61d6c4d5 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/iceberg-1.1.0.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +wget -nv -N https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.1.0/iceberg-spark-runtime-3.3_2.12-1.1.0.jar + +ln -sf $(pwd)/iceberg-spark-runtime-3.3_2.12-1.1.0.jar $SPARK_HOME/jars/iceberg-spark-runtime.jar diff --git a/run/spark-3.3.1/azure-pipelines/sh/init.sh b/run/spark-3.3.1/azure-pipelines/sh/init.sh new file mode 100755 index 00000000..282753a5 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/init.sh @@ -0,0 +1,60 @@ +#!/bin/bash -e +if [ "$#" -ne 3 ]; then + echo "Usage: $0 SPARK_MASTER_HOST DATA_STORAGE_ACCOUNT DATA_STORAGE_ACCOUNT_SHARED_KEY" + exit 1 +fi + +if [ -z "${USER}" ]; then + echo "ERROR: USER is not defined." + exit 1 +fi + +export SPARK_MASTER_HOST=$1 +export SPARK_HOME=/home/$USER/spark +export HADOOP_HOME=/home/$USER/hadoop +export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64 +export DATA_STORAGE_ACCOUNT=$2 +export DATA_STORAGE_ACCOUNT_SHARED_KEY=$3 + +# Update dependencies and install packages +sudo apt update -y +sudo apt install -y openjdk-8-jdk wget + +# Install Hadoop +rm -rf hadoop-3.3.1 +wget -nv -N https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz +tar -xzf hadoop-3.3.1.tar.gz +ln -sf $(pwd)/hadoop-3.3.1 $HADOOP_HOME + +# Install Spark +rm -rf spark-3.3.1-bin-hadoop3 +wget -nv -N https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz +tar -xf spark-3.3.1-bin-hadoop3.tgz +ln -sf $(pwd)/spark-3.3.1-bin-hadoop3 $SPARK_HOME + +# Configure Spark +sudo mkdir -p /opt/spark-events +sudo chown $USER:$USER /opt/spark-events/ + +cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh +cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf + +envsubst < "spark-defaults.conf.template" > "$SPARK_HOME/conf/spark-defaults.conf" + +envsubst < "spark-env.sh.template" > "$SPARK_HOME/conf/spark-env.sh" + +sudo mkdir -p /mnt/local_resource/ +sudo mkdir -p /mnt/local_resource/data/ +sudo chown $USER:$USER /mnt/local_resource/data +sudo mkdir -p /mnt/local_resource/tmp/ +sudo chown $USER:$USER /mnt/local_resource/tmp + +# Copy Azure dependencies to Spark classpath +cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-azure* $SPARK_HOME/jars/ + +# Push to environment +echo "export HADOOP_HOME=${HADOOP_HOME} +export SPARK_HOME=${SPARK_HOME} +export JAVA_HOME=${JAVA_HOME} +export PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/spark-3.3.1/azure-pipelines/sh/spark-defaults.conf.template b/run/spark-3.3.1/azure-pipelines/sh/spark-defaults.conf.template new file mode 100644 index 00000000..67909343 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/spark-defaults.conf.template @@ -0,0 +1,16 @@ +spark.master spark://${SPARK_MASTER_HOST}:7077 +spark.driver.cores 4 +spark.driver.memory 45992m +spark.executor.cores 7 +spark.executor.memory 11754m +spark.memory.offHeap.enabled true +spark.memory.offHeap.size 36974886912 +spark.eventLog.enabled true +spark.eventLog.dir file:/opt/spark-events +spark.history.fs.logDirectory file:/opt/spark-events +spark.serializer org.apache.spark.serializer.KryoSerializer +spark.kryoserializer.buffer 1024k +spark.kryoserializer.buffer.max 1024m +spark.sql.parquet.compression.codec gzip +spark.hadoop.fs.azure.account.auth.type.${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net SharedKey +spark.hadoop.fs.azure.account.key.${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net ${DATA_STORAGE_ACCOUNT_SHARED_KEY} \ No newline at end of file diff --git a/run/spark-3.3.1/azure-pipelines/sh/spark-env.sh.template b/run/spark-3.3.1/azure-pipelines/sh/spark-env.sh.template new file mode 100644 index 00000000..18ea7d39 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/spark-env.sh.template @@ -0,0 +1,2 @@ +SPARK_MASTER_HOST=$SPARK_MASTER_HOST +JAVA_HOME=$JAVA_HOME \ No newline at end of file diff --git a/run/spark-3.3.1/azure-pipelines/sh/start-cluster.sh b/run/spark-3.3.1/azure-pipelines/sh/start-cluster.sh new file mode 100755 index 00000000..353e0b5f --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/start-cluster.sh @@ -0,0 +1,32 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +cd $SPARK_HOME + +echo "Starting Spark cluster" +./sbin/start-all.sh + +echo "Starting history server" +./sbin/start-history-server.sh + +echo "Starting thrift server" +if [ "$#" == 0 ]; then + echo "No LST provided" + ./sbin/start-thriftserver.sh +elif [ "$1" == "delta" ]; then + echo "Using delta catalog" + ./sbin/start-thriftserver.sh --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog --conf spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension +elif [ "$1" == "iceberg" ]; then + echo "Using iceberg catalog" + ./sbin/start-thriftserver.sh --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog --conf spark.sql.catalog.spark_catalog.type=hive --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +elif [ "$1" == "hudi" ]; then + echo "Using hudi catalog" + ./sbin/start-thriftserver.sh --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension +else + echo "Invalid LST" + exit 1 +fi diff --git a/run/spark-3.3.1/azure-pipelines/sh/stop-cluster.sh b/run/spark-3.3.1/azure-pipelines/sh/stop-cluster.sh new file mode 100755 index 00000000..68502692 --- /dev/null +++ b/run/spark-3.3.1/azure-pipelines/sh/stop-cluster.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e +source env.sh +if [ -z "${SPARK_HOME}" ]; then + echo "ERROR: SPARK_HOME is not defined." + exit 1 +fi + +cd $SPARK_HOME + +echo "Stopping thrift server" +./sbin/stop-thriftserver.sh + +echo "Stopping history server" +./sbin/stop-history-server.sh + +echo "Stopping spark cluster" +./sbin/stop-all.sh \ No newline at end of file diff --git a/run/spark-3.3.1/config/tpcds/library.yaml b/run/spark-3.3.1/config/tpcds/library.yaml new file mode 100644 index 00000000..23f9200e --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/library.yaml @@ -0,0 +1,529 @@ +# Description: Library +--- +version: 1 +task_templates: +# Create external tables needed for benchmark +- id: setup + files: + - run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql +# Create data maintenance external tables needed for benchmark +- id: setup_data_maintenance + files: + - run/spark-3.3.1/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql + parameter_values_file: run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat +# Create schema and drop existing tables +- id: init + files: + - run/spark-3.3.1/scripts/tpcds/init/init.sql +# Create benchmark tables and load data into them +- id: build + files: + - run/spark-3.3.1/scripts/tpcds/build/1_create_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer_address.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_date_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_household_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_income_band.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_inventory.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_item.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_promotion.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_reason.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_ship_mode.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_time_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_warehouse.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_page.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_site.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer_address.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_date_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_household_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_income_band.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_inventory.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_item.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_promotion.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_reason.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_ship_mode.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_time_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_warehouse.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_page.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_site.sql +# Compute statistics for tables +- id: analyze + files: + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_address.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_date_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_household_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_income_band.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_inventory.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_item.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_promotion.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_reason.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_ship_mode.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_store.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_time_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_warehouse.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_page.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_site.sql +# Execution of TPC-DS queries (possibly in a previous point-in-time) +- id: single_user + files: + - run/spark-3.3.1/scripts/tpcds/single_user/query1.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query2.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query3.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query4.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query5.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query6.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query7.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query8.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query9.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query10.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query11.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query12.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query13.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query14.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query15.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query16.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query17.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query18.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query19.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query20.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query21.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query22.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query23.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query24.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query25.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query26.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query27.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query28.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query29.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query30.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query31.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query32.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query33.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query34.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query35.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query36.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query37.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query38.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query39.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query40.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query41.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query42.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query43.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query44.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query45.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query46.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query47.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query48.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query49.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query50.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query51.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query52.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query53.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query54.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query55.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query56.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query57.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query58.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query59.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query60.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query61.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query62.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query63.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query64.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query65.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query66.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query67.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query68.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query69.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query70.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query71.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query72.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query73.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query74.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query75.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query76.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query77.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query78.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query79.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query80.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query81.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query82.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query83.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query84.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query85.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query86.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query87.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query88.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query89.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query90.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query91.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query92.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query93.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query94.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query95.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query96.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query97.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query98.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query99.sql + permutation_orders_path: run/auxiliary/tpcds/single_user/permutation_orders/ + supports_time_travel: true +# Execution of TPC-DS data maintenance queries (Delta) +- id: data_maintenance_delta + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-merge.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-merge.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-merge.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-merge.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_I.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WS.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat +# Execution of TPC-DS data maintenance queries (Iceberg) +- id: data_maintenance_iceberg + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_I.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WS.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat +# Execution of TPC-DS data maintenance queries (Hudi) +- id: data_maintenance_hudi + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-mixed.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-mixed.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-mixed.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-mixed.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_I.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SS.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WR.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WS.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat +# Execution of optimize on all benchmark tables (Delta) +- id: optimize_delta + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-delta.sql +# Execution of optimize on all benchmark tables but splitting optimization +# of partitioned tables into batches by relying on dependent task executor (Delta) +- id: optimize_split_delta + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-delta.sql +# Execution of optimize on all benchmark tables (Iceberg) +- id: optimize_iceberg + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-iceberg.sql +# Execution of optimize on all benchmark tables but splitting optimization +# of partitioned tables into batches by relying on dependent task executor (Iceberg) +- id: optimize_split_iceberg + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-iceberg.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-iceberg.sql +# Execution of optimize on all benchmark tables (Hudi) +- id: optimize_hudi + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-hudi.sql +# Execution of optimize on all benchmark tables but splitting optimization +# of partitioned tables into batches by relying on dependent task executor (Hudi) +- id: optimize_split_hudi + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_item-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_reason-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_store-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-hudi.sql + - run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-hudi.sql +# Execution of dependent TPC-DS data maintenance queries +- id: data_maintenance_dependent + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_insert.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_1.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_2.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_3.sql + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_insert.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat \ No newline at end of file diff --git a/run/spark-3.3.1/config/tpcds/setup_experiment.yaml b/run/spark-3.3.1/config/tpcds/setup_experiment.yaml new file mode 100644 index 00000000..d122811f --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/setup_experiment.yaml @@ -0,0 +1,32 @@ +# Description: Setup experiment +--- +version: 1 +id: setup_experiment +phases: +- id: setup + sessions: + - tasks: + - template_id: setup +- id: setup_data_maintenance + sessions: + - tasks: + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance diff --git a/src/main/resources/config/spark/tpcds/w0_tpcds-delta.yaml b/run/spark-3.3.1/config/tpcds/w0_tpcds-delta-2.2.0.yaml similarity index 98% rename from src/main/resources/config/spark/tpcds/w0_tpcds-delta.yaml rename to run/spark-3.3.1/config/tpcds/w0_tpcds-delta-2.2.0.yaml index 8c55b511..dd975408 100644 --- a/src/main/resources/config/spark/tpcds/w0_tpcds-delta.yaml +++ b/run/spark-3.3.1/config/tpcds/w0_tpcds-delta-2.2.0.yaml @@ -1,7 +1,7 @@ # Description: W0: Original TPC-DS sequence --- version: 1 -id: w0_tpcds_delta +id: w0_tpcds phases: - id: setup sessions: diff --git a/src/main/resources/config/spark/tpcds/w0_tpcds-hudi.yaml b/run/spark-3.3.1/config/tpcds/w0_tpcds-hudi-0.12.2.yaml similarity index 98% rename from src/main/resources/config/spark/tpcds/w0_tpcds-hudi.yaml rename to run/spark-3.3.1/config/tpcds/w0_tpcds-hudi-0.12.2.yaml index d4508627..0e81b4fd 100644 --- a/src/main/resources/config/spark/tpcds/w0_tpcds-hudi.yaml +++ b/run/spark-3.3.1/config/tpcds/w0_tpcds-hudi-0.12.2.yaml @@ -1,7 +1,7 @@ # Description: W0: Original TPC-DS sequence --- version: 1 -id: w0_tpcds_hudi +id: w0_tpcds phases: - id: setup sessions: diff --git a/src/main/resources/config/spark/tpcds/w0_tpcds-iceberg.yaml b/run/spark-3.3.1/config/tpcds/w0_tpcds-iceberg-1.1.0.yaml similarity index 98% rename from src/main/resources/config/spark/tpcds/w0_tpcds-iceberg.yaml rename to run/spark-3.3.1/config/tpcds/w0_tpcds-iceberg-1.1.0.yaml index 4fd0b4f0..ab43a8ef 100644 --- a/src/main/resources/config/spark/tpcds/w0_tpcds-iceberg.yaml +++ b/run/spark-3.3.1/config/tpcds/w0_tpcds-iceberg-1.1.0.yaml @@ -1,7 +1,7 @@ # Description: W0: Original TPC-DS sequence --- version: 1 -id: w0_tpcds_iceberg +id: w0_tpcds phases: - id: setup sessions: diff --git a/src/main/resources/config/spark/tpcds/wp1_longevity.yaml b/run/spark-3.3.1/config/tpcds/wp1_longevity-delta-2.2.0.yaml similarity index 70% rename from src/main/resources/config/spark/tpcds/wp1_longevity.yaml rename to run/spark-3.3.1/config/tpcds/wp1_longevity-delta-2.2.0.yaml index f12d1d63..b0498bce 100644 --- a/src/main/resources/config/spark/tpcds/wp1_longevity.yaml +++ b/run/spark-3.3.1/config/tpcds/wp1_longevity-delta-2.2.0.yaml @@ -3,23 +3,6 @@ version: 1 id: wp1_longevity phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: diff --git a/run/spark-3.3.1/config/tpcds/wp1_longevity-hudi-0.12.2.yaml b/run/spark-3.3.1/config/tpcds/wp1_longevity-hudi-0.12.2.yaml new file mode 100644 index 00000000..88c784c7 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp1_longevity-hudi-0.12.2.yaml @@ -0,0 +1,65 @@ +# Description: WP1: Longevity +--- +version: 1 +id: wp1_longevity +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build + replace_regex: + - pattern: '(?i)varchar\(.*\)|char\(.*\)' + replacement: 'string' +- id: single_user_1 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_2 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_3 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_4 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_4 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_5 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_5 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_6 + sessions: + - tasks: + - template_id: single_user diff --git a/run/spark-3.3.1/config/tpcds/wp1_longevity-iceberg-1.1.0.yaml b/run/spark-3.3.1/config/tpcds/wp1_longevity-iceberg-1.1.0.yaml new file mode 100644 index 00000000..721e3474 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp1_longevity-iceberg-1.1.0.yaml @@ -0,0 +1,62 @@ +# Description: WP1: Longevity +--- +version: 1 +id: wp1_longevity +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build +- id: single_user_1 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_2 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_3 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_4 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_4 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_5 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_5 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_6 + sessions: + - tasks: + - template_id: single_user diff --git a/src/main/resources/config/spark/tpcds/wp1_longevity_trickle_1k_batches.yaml b/run/spark-3.3.1/config/tpcds/wp1_longevity_trickle_1k_batches.yaml similarity index 97% rename from src/main/resources/config/spark/tpcds/wp1_longevity_trickle_1k_batches.yaml rename to run/spark-3.3.1/config/tpcds/wp1_longevity_trickle_1k_batches.yaml index bc309edb..7d81df86 100644 --- a/src/main/resources/config/spark/tpcds/wp1_longevity_trickle_1k_batches.yaml +++ b/run/spark-3.3.1/config/tpcds/wp1_longevity_trickle_1k_batches.yaml @@ -1,7 +1,7 @@ # Description: WP1: Longevity --- version: 1 -id: wp1_longevity +id: wp1_longevity_trickle phases: - id: setup sessions: @@ -95,7 +95,7 @@ phases: - id: data_maintenance_dependent_5 sessions: - tasks: - - template_id: template_id: data_maintenance_dependent + - template_id: data_maintenance_dependent custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor task_executor_arguments: dependent_task_batch_size: 1000 diff --git a/src/main/resources/config/spark/tpcds/wp2_resilience.yaml b/run/spark-3.3.1/config/tpcds/wp2_resilience-delta-2.2.0.yaml similarity index 71% rename from src/main/resources/config/spark/tpcds/wp2_resilience.yaml rename to run/spark-3.3.1/config/tpcds/wp2_resilience-delta-2.2.0.yaml index 9ed97a4c..86f38527 100644 --- a/src/main/resources/config/spark/tpcds/wp2_resilience.yaml +++ b/run/spark-3.3.1/config/tpcds/wp2_resilience-delta-2.2.0.yaml @@ -3,25 +3,6 @@ version: 1 id: wp2_resilience phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: diff --git a/run/spark-3.3.1/config/tpcds/wp2_resilience-hudi-0.12.2.yaml b/run/spark-3.3.1/config/tpcds/wp2_resilience-hudi-0.12.2.yaml new file mode 100644 index 00000000..ff73de34 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp2_resilience-hudi-0.12.2.yaml @@ -0,0 +1,77 @@ +# Description: WP2: Resilience +--- +version: 1 +id: wp2_resilience +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build + replace_regex: + - pattern: '(?i)varchar\(.*\)|char\(.*\)' + replacement: 'string' +- id: single_user_1 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_2 + sessions: + - tasks: + - template_id: single_user +- id: optimize_1 + sessions: + - tasks: + - template_id: optimize_hudi +- id: single_user_2o + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_3 + sessions: + - tasks: + - template_id: single_user +- id: optimize_2 + sessions: + - tasks: + - template_id: optimize_hudi +- id: single_user_3o + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_4 + sessions: + - tasks: + - template_id: single_user +- id: optimize_3 + sessions: + - tasks: + - template_id: optimize_hudi +- id: single_user_4o + sessions: + - tasks: + - template_id: single_user diff --git a/run/spark-3.3.1/config/tpcds/wp2_resilience-iceberg-1.1.0.yaml b/run/spark-3.3.1/config/tpcds/wp2_resilience-iceberg-1.1.0.yaml new file mode 100644 index 00000000..974730b5 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp2_resilience-iceberg-1.1.0.yaml @@ -0,0 +1,74 @@ +# Description: WP2: Resilience +--- +version: 1 +id: wp2_resilience +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build +- id: single_user_1 + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_2 + sessions: + - tasks: + - template_id: single_user +- id: optimize_1 + sessions: + - tasks: + - template_id: optimize_iceberg +- id: single_user_2o + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_3 + sessions: + - tasks: + - template_id: single_user +- id: optimize_2 + sessions: + - tasks: + - template_id: optimize_iceberg +- id: single_user_3o + sessions: + - tasks: + - template_id: single_user +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_4 + sessions: + - tasks: + - template_id: single_user +- id: optimize_3 + sessions: + - tasks: + - template_id: optimize_iceberg +- id: single_user_4o + sessions: + - tasks: + - template_id: single_user diff --git a/src/main/resources/config/spark/tpcds/wp3_rw_concurrency.yaml b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-delta-2.2.0.yaml similarity index 69% rename from src/main/resources/config/spark/tpcds/wp3_rw_concurrency.yaml rename to run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-delta-2.2.0.yaml index 28b93990..f84b48bd 100644 --- a/src/main/resources/config/spark/tpcds/wp3_rw_concurrency.yaml +++ b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-delta-2.2.0.yaml @@ -3,25 +3,6 @@ version: 1 id: wp3_rw_concurrency phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: diff --git a/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-hudi-0.12.2.yaml b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-hudi-0.12.2.yaml new file mode 100644 index 00000000..c5934f51 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-hudi-0.12.2.yaml @@ -0,0 +1,61 @@ +# Description: WP3: R/W concurrency +--- +version: 1 +id: wp3_rw_concurrency +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build + replace_regex: + - pattern: '(?i)varchar\(.*\)|char\(.*\)' + replacement: 'string' +- id: single_user_1_data_maintenance_1 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_2_optimize_1 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_hudi +- id: single_user_2o_data_maintenance_2 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_3_optimize_2 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_hudi +- id: single_user_3o_data_maintenance_3 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_4_optimize_3 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_hudi diff --git a/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-iceberg-1.1.0.yaml b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-iceberg-1.1.0.yaml new file mode 100644 index 00000000..c0be11da --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-iceberg-1.1.0.yaml @@ -0,0 +1,58 @@ +# Description: WP3: R/W concurrency +--- +version: 1 +id: wp3_rw_concurrency +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build +- id: single_user_1_data_maintenance_1 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_2_optimize_1 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_iceberg +- id: single_user_2o_data_maintenance_2 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_3_optimize_2 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_iceberg +- id: single_user_3o_data_maintenance_3 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_4_optimize_3 + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: optimize_iceberg diff --git a/src/main/resources/config/spark/tpcds/wp3_rw_concurrency_multi.yaml b/run/spark-3.3.1/config/tpcds/wp3_rw_concurrency_multi-delta-2.2.0.yaml similarity index 100% rename from src/main/resources/config/spark/tpcds/wp3_rw_concurrency_multi.yaml rename to run/spark-3.3.1/config/tpcds/wp3_rw_concurrency_multi-delta-2.2.0.yaml diff --git a/src/main/resources/config/spark/tpcds/wp4_time_travel.yaml b/run/spark-3.3.1/config/tpcds/wp4_time_travel-delta-2.2.0.yaml similarity index 80% rename from src/main/resources/config/spark/tpcds/wp4_time_travel.yaml rename to run/spark-3.3.1/config/tpcds/wp4_time_travel-delta-2.2.0.yaml index 0b6c186b..64027647 100644 --- a/src/main/resources/config/spark/tpcds/wp4_time_travel.yaml +++ b/run/spark-3.3.1/config/tpcds/wp4_time_travel-delta-2.2.0.yaml @@ -3,21 +3,6 @@ version: 1 id: wp4_time_travel phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: diff --git a/run/spark-3.3.1/config/tpcds/wp4_time_travel-hudi-0.12.2.yaml b/run/spark-3.3.1/config/tpcds/wp4_time_travel-hudi-0.12.2.yaml new file mode 100644 index 00000000..b0d7c545 --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp4_time_travel-hudi-0.12.2.yaml @@ -0,0 +1,86 @@ +# Description: WP4: Time travel +--- +version: 1 +id: wp4_time_travel +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build + replace_regex: + - pattern: '(?i)varchar\(.*\)|char\(.*\)' + replacement: 'string' +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_2_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_3_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_3_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_4_2 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_2 +- id: single_user_4_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_4_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_4 + sessions: + - tasks: + - template_id: data_maintenance_hudi + - template_id: data_maintenance_hudi +- id: single_user_5_3 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_3 +- id: single_user_5_2 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_2 +- id: single_user_5_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_5_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build diff --git a/run/spark-3.3.1/config/tpcds/wp4_time_travel-iceberg-1.1.0.yaml b/run/spark-3.3.1/config/tpcds/wp4_time_travel-iceberg-1.1.0.yaml new file mode 100644 index 00000000..0e91ad7f --- /dev/null +++ b/run/spark-3.3.1/config/tpcds/wp4_time_travel-iceberg-1.1.0.yaml @@ -0,0 +1,83 @@ +# Description: WP4: Time travel +--- +version: 1 +id: wp4_time_travel +phases: +- id: init + sessions: + - tasks: + - template_id: init +- id: build + sessions: + - tasks: + - template_id: build +- id: data_maintenance_1 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_2_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_2 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_3_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_3_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_3 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_4_2 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_2 +- id: single_user_4_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_4_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build +- id: data_maintenance_4 + sessions: + - tasks: + - template_id: data_maintenance_iceberg + - template_id: data_maintenance_iceberg +- id: single_user_5_3 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_3 +- id: single_user_5_2 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_2 +- id: single_user_5_1 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: data_maintenance_1 +- id: single_user_5_0 + sessions: + - tasks: + - template_id: single_user + time_travel_phase_id: build diff --git a/run/spark-3.3.1/config/tpch/library.yaml b/run/spark-3.3.1/config/tpch/library.yaml new file mode 100644 index 00000000..6efa2040 --- /dev/null +++ b/run/spark-3.3.1/config/tpch/library.yaml @@ -0,0 +1,87 @@ +# Description: Tasks Library +--- +version: 1 +task_templates: +# Create external tables needed for benchmark +- id: setup + files: + - run/spark-3.3.1/scripts/tpch/setup/ddl-external-tables.sql +# Create data maintenance external tables needed for benchmark +- id: setup_data_maintenance + files: + - run/spark-3.3.1/scripts/tpch/setup_data_maintenance/ddl-external-tables-refresh.sql + parameter_values_file: run/auxiliary/tpch/setup_data_maintenance/parameter_values.dat +# Create schema and drop existing tables +- id: init + files: + - run/spark-3.3.1/scripts/tpch/init/init.sql +# Create benchmark tables and load data into them +- id: build + files: + - run/spark-3.3.1/scripts/tpch/build/1_create_customer.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_lineitem.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_nation.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_orders.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_part.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_partsupp.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_region.sql + - run/spark-3.3.1/scripts/tpch/build/1_create_supplier.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_customer.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_lineitem.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_nation.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_orders.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_part.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_partsupp.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_region.sql + - run/spark-3.3.1/scripts/tpch/build/2_load_supplier.sql +# Compute statistics for tables +- id: analyze + files: + - run/spark-3.3.1/scripts/tpch/build/3_analyze_customer.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_lineitem.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_nation.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_orders.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_part.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_partsupp.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_region.sql + - run/spark-3.3.1/scripts/tpch/build/3_analyze_supplier.sql +# Execution of TPC-H queries +- id: single_user + files: + - run/spark-3.3.1/scripts/tpch/single_user/query1.sql + - run/spark-3.3.1/scripts/tpch/single_user/query2.sql + - run/spark-3.3.1/scripts/tpch/single_user/query3.sql + - run/spark-3.3.1/scripts/tpch/single_user/query4.sql + - run/spark-3.3.1/scripts/tpch/single_user/query5.sql + - run/spark-3.3.1/scripts/tpch/single_user/query6.sql + - run/spark-3.3.1/scripts/tpch/single_user/query7.sql + - run/spark-3.3.1/scripts/tpch/single_user/query8.sql + - run/spark-3.3.1/scripts/tpch/single_user/query9.sql + - run/spark-3.3.1/scripts/tpch/single_user/query10.sql + - run/spark-3.3.1/scripts/tpch/single_user/query11.sql + - run/spark-3.3.1/scripts/tpch/single_user/query12.sql + - run/spark-3.3.1/scripts/tpch/single_user/query13.sql + - run/spark-3.3.1/scripts/tpch/single_user/query14.sql + - run/spark-3.3.1/scripts/tpch/single_user/query15.sql + - run/spark-3.3.1/scripts/tpch/single_user/query16.sql + - run/spark-3.3.1/scripts/tpch/single_user/query17.sql + - run/spark-3.3.1/scripts/tpch/single_user/query18.sql + - run/spark-3.3.1/scripts/tpch/single_user/query19.sql + - run/spark-3.3.1/scripts/tpch/single_user/query20.sql + - run/spark-3.3.1/scripts/tpch/single_user/query21.sql + - run/spark-3.3.1/scripts/tpch/single_user/query22.sql +# Execution of RF1 TPC-H data maintenance queries +- id: data_maintenance_1 + files: + - run/spark-3.3.1/scripts/tpch/data_maintenance/RF1.sql + parameter_values_file: run/auxiliary/tpch/data_maintenance/parameter_values.dat +# Execution of RF2 TPC-H data maintenance queries (using MERGE) +- id: data_maintenance_2_merge + files: + - run/spark-3.3.1/scripts/tpch/data_maintenance/RF2-merge.sql + parameter_values_file: run/auxiliary/tpch/data_maintenance/parameter_values.dat +# Execution of RF2 TPC-H data maintenance queries (using DELETE) +- id: data_maintenance_2_delete + files: + - run/spark-3.3.1/scripts/tpch/data_maintenance/RF2.sql + parameter_values_file: run/auxiliary/tpch/data_maintenance/parameter_values.dat diff --git a/src/main/resources/config/spark/tpch/w0_tpch-delta.yaml b/run/spark-3.3.1/config/tpch/w0_tpch-delta.yaml similarity index 100% rename from src/main/resources/config/spark/tpch/w0_tpch-delta.yaml rename to run/spark-3.3.1/config/tpch/w0_tpch-delta.yaml diff --git a/src/main/resources/config/spark/tpch/w0_tpch-hudi.yaml b/run/spark-3.3.1/config/tpch/w0_tpch-hudi.yaml similarity index 100% rename from src/main/resources/config/spark/tpch/w0_tpch-hudi.yaml rename to run/spark-3.3.1/config/tpch/w0_tpch-hudi.yaml diff --git a/src/main/resources/config/spark/tpch/w0_tpch-iceberg.yaml b/run/spark-3.3.1/config/tpch/w0_tpch-iceberg.yaml similarity index 100% rename from src/main/resources/config/spark/tpch/w0_tpch-iceberg.yaml rename to run/spark-3.3.1/config/tpch/w0_tpch-iceberg.yaml diff --git a/run/spark-3.3.1/results/spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb b/run/spark-3.3.1/results/spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb new file mode 100644 index 00000000..e4b5508f Binary files /dev/null and b/run/spark-3.3.1/results/spark-3.3.1-2024-02-01-8xStandard_E8s_v5.duckdb differ diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_call_center.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_call_center.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_catalog_page.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_catalog_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_catalog_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_catalog_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_catalog_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_catalog_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_customer.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_customer.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_customer_address.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_customer_address.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_customer_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_customer_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_date_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_date_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_household_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_household_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_income_band.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_income_band.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_inventory.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_inventory.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_item.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_item.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_item.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_promotion.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_promotion.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_reason.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_reason.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_ship_mode.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_ship_mode.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_store.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_store.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_store.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_store_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_store_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_store_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_store_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_time_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_time_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_warehouse.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_warehouse.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_web_page.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_web_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_web_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_web_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_web_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_web_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/1_create_web_site.sql b/run/spark-3.3.1/scripts/tpcds/build/1_create_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/1_create_web_site.sql rename to run/spark-3.3.1/scripts/tpcds/build/1_create_web_site.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_call_center.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_call_center.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_catalog_page.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_catalog_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_catalog_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_catalog_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_catalog_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_catalog_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_customer.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_customer.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_customer_address.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_customer_address.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_customer_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_customer_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_date_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_date_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_household_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_household_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_income_band.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_income_band.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_inventory.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_inventory.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_item.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_item.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_item.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_promotion.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_promotion.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_reason.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_reason.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_ship_mode.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_ship_mode.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_store.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_store.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_store.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_store_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_store_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_store_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_store_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_time_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_time_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_warehouse.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_warehouse.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_web_page.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_web_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_web_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_web_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_web_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_web_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/2_load_web_site.sql b/run/spark-3.3.1/scripts/tpcds/build/2_load_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/2_load_web_site.sql rename to run/spark-3.3.1/scripts/tpcds/build/2_load_web_site.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_call_center.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_call_center.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_page.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_customer.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_customer.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_address.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_address.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_date_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_date_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_household_demographics.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_household_demographics.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_income_band.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_income_band.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_inventory.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_inventory.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_item.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_item.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_item.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_promotion.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_promotion.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_reason.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_reason.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_ship_mode.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_ship_mode.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_store.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_store.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_store.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_store_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_store_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_store_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_store_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_time_dim.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_time_dim.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_warehouse.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_warehouse.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_web_page.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_web_page.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_web_returns.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_web_returns.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_web_sales.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_web_sales.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/spark/3_analyze_web_site.sql b/run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/spark/3_analyze_web_site.sql rename to run/spark-3.3.1/scripts/tpcds/build/3_analyze_web_site.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-merge.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-merge.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-merge.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-merge.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-mixed.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-mixed.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-mixed.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS-mixed.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_CS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-merge.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-merge.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-merge.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-merge.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-mixed.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-mixed.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-mixed.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I-mixed.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_I.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-merge.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-merge.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-merge.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-merge.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-mixed.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-mixed.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-mixed.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS-mixed.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_SS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-merge.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-merge.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-merge.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-merge.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-mixed.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-mixed.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-mixed.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS-mixed.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/DF_WS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CR.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CR.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_I.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_I.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_I.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_I.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SR.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SR.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_SS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WR.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WR.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WS.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WS.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_WS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_delete.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_delete.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql new file mode 100644 index 00000000..b65f6035 --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CS_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.catalog_sales +WHERE (cs_item_sk, cs_order_number) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql new file mode 100644 index 00000000..27fed460 --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_I_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.inventory +WHERE (inv_date_sk, inv_item_sk, inv_warehouse_sk) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql new file mode 100644 index 00000000..b3c8da72 --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SR_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.store_returns +WHERE (sr_item_sk, sr_ticket_number) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql new file mode 100644 index 00000000..8ff83ef5 --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_SS_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.store_sales +WHERE (ss_item_sk, ss_ticket_number) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql new file mode 100644 index 00000000..624bf37b --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WR_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.web_returns +WHERE (wr_item_sk, wr_order_number) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_3.sql diff --git a/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql new file mode 100644 index 00000000..540122d0 --- /dev/null +++ b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_WS_delete.sql @@ -0,0 +1,2 @@ +DELETE FROM ${catalog}.${database}.web_sales +WHERE (ws_item_sk, ws_order_number) IN (${multi_values_clause}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CR_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_CS_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_I_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SR_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_SS_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WR_insert.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_1.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_1.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_1.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_2.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_2.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_2.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_3.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_3.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_3.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_insert.sql b/run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_insert.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_insert.sql rename to run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/LF_WS_insert.sql diff --git a/src/main/resources/scripts/tpcds/init/spark/init.sql b/run/spark-3.3.1/scripts/tpcds/init/init.sql similarity index 100% rename from src/main/resources/scripts/tpcds/init/spark/init.sql rename to run/spark-3.3.1/scripts/tpcds/init/init.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_call_center-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_call_center-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_call_center-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_call_center-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_call_center-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_call_center-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_call_center-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_page-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_returns-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_catalog_sales-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_address-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_customer_demographics-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_date_dim-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_household_demographics-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_income_band-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_income_band-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_income_band-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_income_band-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_income_band-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_income_band-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_income_band-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_inventory-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_inventory-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_inventory-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_inventory-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_inventory-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_inventory-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_inventory-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_item-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_item-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_item-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_item-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_item-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_item-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_item-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_item-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_item-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_item-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_item-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_item-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_promotion-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_promotion-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_promotion-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_promotion-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_promotion-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_promotion-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_promotion-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_reason-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_reason-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_reason-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_reason-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_reason-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_reason-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_reason-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_reason-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_reason-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_reason-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_returns-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_store_sales-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_time_dim-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_warehouse-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_page-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_page-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_page-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_page-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_page-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_page-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_page-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_returns-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_sales-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_site-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_site-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_site-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_site-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize/spark/o_web_site-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/spark/o_web_site-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize/o_web_site-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_inventory_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_IN-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-delta.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-delta.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-delta.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-delta.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-hudi.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-hudi.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-hudi.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-hudi.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-iceberg.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-iceberg.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-iceberg.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_NULL-iceberg.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_SELECT.sql b/run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_SELECT.sql rename to run/spark-3.3.1/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql b/run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql similarity index 100% rename from src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql rename to run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql diff --git a/src/main/resources/scripts/tpcds/setup_data_maintenance/spark/ddl-external-tables-refresh.sql b/run/spark-3.3.1/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql similarity index 100% rename from src/main/resources/scripts/tpcds/setup_data_maintenance/spark/ddl-external-tables-refresh.sql rename to run/spark-3.3.1/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query1.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query1.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query1.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query10.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query10.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query10.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query10.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query11.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query11.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query11.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query11.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query12.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query12.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query12.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query12.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query13.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query13.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query13.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query13.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query14.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query14.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query14.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query14.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query15.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query15.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query15.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query15.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query16.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query16.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query16.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query16.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query17.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query17.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query17.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query17.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query18.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query18.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query18.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query18.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query19.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query19.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query19.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query19.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query2.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query2.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query2.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query20.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query20.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query20.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query20.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query21.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query21.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query21.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query21.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query22.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query22.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query22.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query22.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query23.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query23.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query23.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query23.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query24.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query24.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query24.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query24.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query25.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query25.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query25.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query25.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query26.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query26.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query26.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query26.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query27.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query27.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query27.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query27.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query28.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query28.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query28.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query28.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query29.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query29.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query29.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query29.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query3.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query3.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query3.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query30.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query30.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query30.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query30.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query31.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query31.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query31.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query31.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query32.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query32.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query32.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query32.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query33.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query33.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query33.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query33.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query34.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query34.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query34.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query34.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query35.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query35.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query35.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query35.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query36.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query36.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query36.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query36.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query37.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query37.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query37.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query37.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query38.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query38.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query38.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query38.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query39.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query39.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query39.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query39.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query4.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query4.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query4.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query4.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query40.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query40.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query40.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query40.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query41.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query41.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query41.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query41.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query42.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query42.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query42.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query42.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query43.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query43.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query43.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query43.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query44.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query44.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query44.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query44.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query45.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query45.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query45.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query45.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query46.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query46.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query46.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query46.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query47.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query47.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query47.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query47.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query48.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query48.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query48.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query48.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query49.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query49.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query49.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query49.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query5.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query5.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query5.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query5.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query50.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query50.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query50.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query50.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query51.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query51.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query51.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query51.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query52.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query52.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query52.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query52.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query53.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query53.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query53.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query53.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query54.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query54.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query54.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query54.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query55.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query55.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query55.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query55.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query56.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query56.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query56.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query56.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query57.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query57.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query57.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query57.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query58.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query58.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query58.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query58.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query59.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query59.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query59.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query59.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query6.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query6.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query6.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query6.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query60.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query60.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query60.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query60.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query61.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query61.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query61.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query61.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query62.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query62.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query62.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query62.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query63.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query63.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query63.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query63.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query64.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query64.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query64.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query64.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query65.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query65.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query65.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query65.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query66.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query66.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query66.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query66.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query67.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query67.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query67.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query67.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query68.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query68.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query68.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query68.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query69.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query69.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query69.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query69.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query7.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query7.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query7.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query7.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query70.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query70.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query70.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query70.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query71.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query71.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query71.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query71.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query72.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query72.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query72.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query72.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query73.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query73.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query73.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query73.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query74.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query74.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query74.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query74.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query75.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query75.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query75.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query75.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query76.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query76.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query76.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query76.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query77.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query77.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query77.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query77.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query78.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query78.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query78.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query78.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query79.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query79.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query79.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query79.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query8.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query8.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query8.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query8.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query80.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query80.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query80.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query80.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query81.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query81.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query81.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query81.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query82.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query82.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query82.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query82.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query83.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query83.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query83.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query83.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query84.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query84.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query84.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query84.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query85.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query85.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query85.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query85.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query86.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query86.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query86.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query86.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query87.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query87.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query87.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query87.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query88.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query88.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query88.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query88.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query89.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query89.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query89.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query89.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query9.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query9.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query9.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query9.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query90.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query90.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query90.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query90.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query91.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query91.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query91.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query91.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query92.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query92.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query92.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query92.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query93.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query93.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query93.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query93.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query94.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query94.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query94.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query94.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query95.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query95.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query95.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query95.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query96.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query96.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query96.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query96.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query97.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query97.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query97.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query97.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query98.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query98.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query98.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query98.sql diff --git a/src/main/resources/scripts/tpcds/single_user/spark/query99.sql b/run/spark-3.3.1/scripts/tpcds/single_user/query99.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/spark/query99.sql rename to run/spark-3.3.1/scripts/tpcds/single_user/query99.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_customer.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_customer.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_customer.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_customer.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_lineitem.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_lineitem.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_lineitem.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_lineitem.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_nation.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_nation.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_nation.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_nation.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_orders.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_orders.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_orders.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_orders.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_part.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_part.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_part.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_part.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_partsupp.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_partsupp.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_partsupp.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_partsupp.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_region.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_region.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_region.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_region.sql diff --git a/src/main/resources/scripts/tpch/build/spark/1_create_supplier.sql b/run/spark-3.3.1/scripts/tpch/build/1_create_supplier.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/1_create_supplier.sql rename to run/spark-3.3.1/scripts/tpch/build/1_create_supplier.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_customer.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_customer.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_customer.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_customer.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_lineitem.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_lineitem.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_lineitem.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_lineitem.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_nation.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_nation.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_nation.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_nation.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_orders.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_orders.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_orders.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_orders.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_part.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_part.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_part.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_part.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_partsupp.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_partsupp.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_partsupp.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_partsupp.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_region.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_region.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_region.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_region.sql diff --git a/src/main/resources/scripts/tpch/build/spark/2_load_supplier.sql b/run/spark-3.3.1/scripts/tpch/build/2_load_supplier.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/2_load_supplier.sql rename to run/spark-3.3.1/scripts/tpch/build/2_load_supplier.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_customer.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_customer.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_customer.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_customer.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_lineitem.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_lineitem.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_lineitem.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_lineitem.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_nation.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_nation.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_nation.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_nation.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_orders.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_orders.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_orders.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_orders.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_part.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_part.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_part.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_part.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_partsupp.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_partsupp.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_partsupp.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_partsupp.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_region.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_region.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_region.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_region.sql diff --git a/src/main/resources/scripts/tpch/build/spark/3_analyze_supplier.sql b/run/spark-3.3.1/scripts/tpch/build/3_analyze_supplier.sql similarity index 100% rename from src/main/resources/scripts/tpch/build/spark/3_analyze_supplier.sql rename to run/spark-3.3.1/scripts/tpch/build/3_analyze_supplier.sql diff --git a/src/main/resources/scripts/tpch/data_maintenance/spark/RF1.sql b/run/spark-3.3.1/scripts/tpch/data_maintenance/RF1.sql similarity index 100% rename from src/main/resources/scripts/tpch/data_maintenance/spark/RF1.sql rename to run/spark-3.3.1/scripts/tpch/data_maintenance/RF1.sql diff --git a/src/main/resources/scripts/tpch/data_maintenance/spark/RF2-merge.sql b/run/spark-3.3.1/scripts/tpch/data_maintenance/RF2-merge.sql similarity index 100% rename from src/main/resources/scripts/tpch/data_maintenance/spark/RF2-merge.sql rename to run/spark-3.3.1/scripts/tpch/data_maintenance/RF2-merge.sql diff --git a/src/main/resources/scripts/tpch/data_maintenance/spark/RF2.sql b/run/spark-3.3.1/scripts/tpch/data_maintenance/RF2.sql similarity index 100% rename from src/main/resources/scripts/tpch/data_maintenance/spark/RF2.sql rename to run/spark-3.3.1/scripts/tpch/data_maintenance/RF2.sql diff --git a/src/main/resources/scripts/tpch/init/spark/init.sql b/run/spark-3.3.1/scripts/tpch/init/init.sql similarity index 100% rename from src/main/resources/scripts/tpch/init/spark/init.sql rename to run/spark-3.3.1/scripts/tpch/init/init.sql diff --git a/src/main/resources/scripts/tpch/setup/spark/ddl-external-tables.sql b/run/spark-3.3.1/scripts/tpch/setup/ddl-external-tables.sql similarity index 100% rename from src/main/resources/scripts/tpch/setup/spark/ddl-external-tables.sql rename to run/spark-3.3.1/scripts/tpch/setup/ddl-external-tables.sql diff --git a/src/main/resources/scripts/tpch/setup_data_maintenance/spark/ddl-external-tables-refresh.sql b/run/spark-3.3.1/scripts/tpch/setup_data_maintenance/ddl-external-tables-refresh.sql similarity index 100% rename from src/main/resources/scripts/tpch/setup_data_maintenance/spark/ddl-external-tables-refresh.sql rename to run/spark-3.3.1/scripts/tpch/setup_data_maintenance/ddl-external-tables-refresh.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query1.sql b/run/spark-3.3.1/scripts/tpch/single_user/query1.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query1.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query1.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query10.sql b/run/spark-3.3.1/scripts/tpch/single_user/query10.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query10.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query10.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query11.sql b/run/spark-3.3.1/scripts/tpch/single_user/query11.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query11.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query11.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query12.sql b/run/spark-3.3.1/scripts/tpch/single_user/query12.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query12.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query12.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query13.sql b/run/spark-3.3.1/scripts/tpch/single_user/query13.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query13.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query13.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query14.sql b/run/spark-3.3.1/scripts/tpch/single_user/query14.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query14.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query14.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query15.sql b/run/spark-3.3.1/scripts/tpch/single_user/query15.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query15.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query15.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query16.sql b/run/spark-3.3.1/scripts/tpch/single_user/query16.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query16.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query16.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query17.sql b/run/spark-3.3.1/scripts/tpch/single_user/query17.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query17.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query17.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query18.sql b/run/spark-3.3.1/scripts/tpch/single_user/query18.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query18.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query18.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query19.sql b/run/spark-3.3.1/scripts/tpch/single_user/query19.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query19.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query19.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query2.sql b/run/spark-3.3.1/scripts/tpch/single_user/query2.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query2.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query2.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query20.sql b/run/spark-3.3.1/scripts/tpch/single_user/query20.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query20.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query20.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query21.sql b/run/spark-3.3.1/scripts/tpch/single_user/query21.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query21.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query21.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query22.sql b/run/spark-3.3.1/scripts/tpch/single_user/query22.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query22.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query22.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query3.sql b/run/spark-3.3.1/scripts/tpch/single_user/query3.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query3.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query3.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query4.sql b/run/spark-3.3.1/scripts/tpch/single_user/query4.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query4.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query4.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query5.sql b/run/spark-3.3.1/scripts/tpch/single_user/query5.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query5.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query5.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query6.sql b/run/spark-3.3.1/scripts/tpch/single_user/query6.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query6.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query6.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query7.sql b/run/spark-3.3.1/scripts/tpch/single_user/query7.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query7.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query7.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query8.sql b/run/spark-3.3.1/scripts/tpch/single_user/query8.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query8.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query8.sql diff --git a/src/main/resources/scripts/tpch/single_user/spark/query9.sql b/run/spark-3.3.1/scripts/tpch/single_user/query9.sql similarity index 100% rename from src/main/resources/scripts/tpch/single_user/spark/query9.sql rename to run/spark-3.3.1/scripts/tpch/single_user/query9.sql diff --git a/run/trino-420/azure-pipelines/README.md b/run/trino-420/azure-pipelines/README.md new file mode 100644 index 00000000..17330460 --- /dev/null +++ b/run/trino-420/azure-pipelines/README.md @@ -0,0 +1,56 @@ + + +# Azure Pipelines Deployment for LST-Bench on Trino 420 +This directory comprises the necessary tooling for executing LST-Bench on Trino 420 with different LSTs using Azure Pipelines. The included tooling consists of: +- `run-lst-bench.yml`: + An Azure Pipelines script designed to deploy Trino and execute LST-Bench. +- `sh/`: + A directory containing shell scripts and engine configuration files supporting the deployment of Trino and the execution of experiments. +- `config/`: + A directory with LST-Bench configuration files necessary for executing the experiments that are part of the results. + +## Prerequisites +- Automation for deploying the infrastructure in Azure to run LST-Bench is not implemented. As a result, the Azure Pipeline script expects the following setup: + - A VM named 'lst-bench-client' connected to the pipeline environment to run the LST-Bench client. + - A VM named 'lst-bench-head' to run the coordinator node of the Trino cluster, also connected to the pipeline environment. + - A VMSS cluster, that will serve as the Trino worker nodes, within the same VNet as the coordinator node. + - An Azure Storage Account accessible by both the VMSS and coordinator node. + - An Azure SQL Database (or SQL Server flavored RDBMS) that will be running Hive Metastore. + The Hive Metastore schema for version 2.3.9 should already be installed in the instance. +- Prior to running the pipeline, several variables need definition in your Azure Pipeline: + - `data_storage_account`: Name of the Azure Blob Storage account where the source data for the experiment is stored. + - `data_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the source data for the experiment is stored. + - `data_storage_account_container`: Name of the container in the Azure Blob Storage account where the source data for the experiment is stored. + - `hms_jdbc_driver`: JDBC driver for the Hive Metastore. + - `hms_jdbc_url`: JDBC URL for the Hive Metastore. + - `hms_jdbc_user`: Username for the Hive Metastore. + - `hms_jdbc_password` (secret): Password for the Hive Metastore. + - `hms_storage_account`: Name of the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog (can be the same as the data_storage_account). + - `hms_storage_account_shared_key` (secret): Shared key for the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog. + - `hms_storage_account_container`: Name of the container in the Azure Blob Storage account where the Hive Metastore will store data associated with the catalog. +- The LSTs to run experiments on can be modified via input parameters for the pipelines in the Azure Pipelines YAML file or from the Web UI. + Default values are assigned to these parameters. + Parameters also include experiment scale factor, machine type, and cluster size. + Note that these parameters are not used to deploy the data or the infrastructure, as this process is not automated in the pipeline. + Instead, they are recorded in the experiment telemetry for proper categorization and visualization of results later on. + +## Additional Notes +For workloads within LST-Bench that include an `optimize` step, particularly those involving partitioned tables, a [custom task](/docs/workloads.md#custom-tasks) is used to execute this step. +The task divides the `optimize` operation into batches, each containing up to 100 partitions (the parameter value is configurable). +This approach was implemented to address issues where Trino would crash if the optimization step were applied to the entire table. diff --git a/run/trino-420/azure-pipelines/config/connections_config.yaml b/run/trino-420/azure-pipelines/config/connections_config.yaml new file mode 100644 index 00000000..e73d8451 --- /dev/null +++ b/run/trino-420/azure-pipelines/config/connections_config.yaml @@ -0,0 +1,9 @@ +# Description: Connections Configuration +--- +version: 1 +connections: +- id: trino_0 + driver: io.trino.jdbc.TrinoDriver + url: jdbc:trino://${TRINO_MASTER_HOST}:8080 + username: admin + password: '' diff --git a/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml b/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml new file mode 100644 index 00000000..ef8dbd07 --- /dev/null +++ b/run/trino-420/azure-pipelines/config/experiment_config-cow-delta.yaml @@ -0,0 +1,30 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: trino + system_version: 420 + table_format: delta + table_format_version: undefined + scale_factor: "${EXP_SCALE_FACTOR}" + mode: cow + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: hive + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: textfile + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: '' + external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1" + catalog: delta + database: "delta_${EXP_NAME}" + table_format: delta + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/delta/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: '' + partition_spec_keyword: 'partitioned_by' diff --git a/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml b/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml new file mode 100644 index 00000000..502f7fa8 --- /dev/null +++ b/run/trino-420/azure-pipelines/config/experiment_config-mor-iceberg.yaml @@ -0,0 +1,30 @@ +# Description: Experiment Configuration +--- +version: 1 +id: "${EXP_NAME}" +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: trino + system_version: 420 + table_format: iceberg + table_format_version: undefined + scale_factor: "${EXP_SCALE_FACTOR}" + mode: mor + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: hive + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: textfile + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: '' + external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1" + catalog: iceberg + database: "iceberg_${EXP_NAME}" + table_format: iceberg + data_path: 'abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/run/iceberg/sf_${EXP_SCALE_FACTOR}/' + options_suffix: '' + tblproperties_suffix: '' + partition_spec_keyword: 'partitioning' diff --git a/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml b/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml new file mode 100644 index 00000000..b164151b --- /dev/null +++ b/run/trino-420/azure-pipelines/config/setup_experiment_config.yaml @@ -0,0 +1,20 @@ +# Description: Experiment Configuration +--- +version: 1 +id: setup_experiment +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +metadata: + system: trino + system_version: 420 + scale_factor: "${EXP_SCALE_FACTOR}" + machine: "${EXP_MACHINE}" + cluster_size: "${EXP_CLUSTER_SIZE}" +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: hive + external_database: "external_tpcds_sf_${EXP_SCALE_FACTOR}" + external_table_format: textfile + external_data_path: "abfss://${DATA_STORAGE_ACCOUNT_CONTAINER}@${DATA_STORAGE_ACCOUNT}.dfs.core.windows.net/tpc-ds/csv/sf_${EXP_SCALE_FACTOR}/" + external_options_suffix: '' + external_tblproperties_suffix: ", textfile_field_separator=',', null_format='', skip_header_line_count=1" diff --git a/run/trino-420/azure-pipelines/config/telemetry_config.yaml b/run/trino-420/azure-pipelines/config/telemetry_config.yaml new file mode 100644 index 00000000..baa9e63c --- /dev/null +++ b/run/trino-420/azure-pipelines/config/telemetry_config.yaml @@ -0,0 +1,13 @@ +# Description: Telemetry Configuration +--- +version: 1 +connection: + id: duckdb_0 + driver: org.duckdb.DuckDBDriver + url: jdbc:duckdb:./telemetry-trino-420 +execute_ddl: true +ddl_file: 'src/main/resources/scripts/logging/duckdb/ddl.sql' +insert_file: 'src/main/resources/scripts/logging/duckdb/insert.sql' +# The following parameter values will be used to replace the variables in the logging statements. +parameter_values: + data_path: '' \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/run-lst-bench.yml b/run/trino-420/azure-pipelines/run-lst-bench.yml new file mode 100644 index 00000000..6e5e7871 --- /dev/null +++ b/run/trino-420/azure-pipelines/run-lst-bench.yml @@ -0,0 +1,249 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +trigger: none + +parameters: +- name: lsts + type: object + default: + - table_format: "delta" + mode: "cow" + - table_format: "iceberg" + mode: "mor" +- name: workloads + type: object + default: + - "wp1_longevity" + - "wp2_resilience" + - "wp3_rw_concurrency" +- name: exp_scale_factor + type: number + default: 100 +- name: exp_machine + type: string + default: "Standard_E8s_v5" +- name: exp_cluster_size + type: number + default: 8 + +variables: + MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository + MAVEN_OPTS: '-ntp -B -Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' + EXP_SCALE_FACTOR: ${{ parameters.exp_scale_factor }} + EXP_MACHINE: ${{ parameters.exp_machine }} + EXP_CLUSTER_SIZE: ${{ parameters.exp_cluster_size }} + +stages: +# Build LST-Bench and create artifact to deploy to target VM +- stage: build + jobs: + - job: Build + pool: + vmImage: 'ubuntu-latest' + steps: + - task: Cache@2 + displayName: Cache Maven local repo + inputs: + key: 'maven | "$(Agent.OS)" | **/pom.xml' + restoreKeys: | + maven | "$(Agent.OS)" + maven + path: $(MAVEN_CACHE_FOLDER) + - task: Maven@4 + inputs: + mavenPomFile: 'pom.xml' + options: $(MAVEN_OPTS) + javaHomeOption: 'JDKVersion' + jdkVersionOption: '1.11' + publishJUnitResults: false + goals: 'package -DskipTests -Ptrino-jdbc' + - task: CopyFiles@2 + displayName: 'Copy Artifacts to: $(TargetFolder)' + inputs: + SourceFolder: '$(Build.SourcesDirectory)' + TargetFolder: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/' + - task: PublishPipelineArtifact@1 + inputs: + targetPath: '$(System.DefaultWorkingDirectory)/pipeline-artifacts/' + artifact: lst-bench-0.1-SNAPSHOT + +# Set up engine and deploy LST-Bench +- stage: deploy + jobs: + - deployment: EngineDeploy + displayName: 'Deploying engine' + workspace: + clean: all + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - bash: | + echo 'Deploy engine' + mkdir -p ~/trino-420 + cp $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/run/trino-420/azure-pipelines/sh/* ~/trino-420/ + cd ~/trino-420 + chmod +x ./* + trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + ./init.sh 'true' "${trino_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)" + ./hms.sh "$(hms_jdbc_driver)" "$(hms_jdbc_url)" "$(hms_jdbc_user)" "$(hms_jdbc_password)" "$(hms_storage_account)" "$(hms_storage_account_shared_key)" "$(hms_storage_account_container)" + ./dist-setup.sh + ./dist-exec.sh trino-420 init.sh 'false' "${trino_head_node}" "$(data_storage_account)" "$(data_storage_account_shared_key)" + - deployment: ClientDeploy + displayName: 'Deploying LST-Bench client' + workspace: + clean: all + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + strategy: + runOnce: + deploy: + steps: + - bash: | + echo 'Deploy LST-Bench client' + sudo apt install -y openjdk-11-jdk + mkdir -p ~/lst-bench-0.1-SNAPSHOT + cp -rf $(Pipeline.Workspace)/lst-bench-0.1-SNAPSHOT/* ~/lst-bench-0.1-SNAPSHOT/ + chmod +x ~/lst-bench-0.1-SNAPSHOT/launcher.sh + +# Run LST-Bench (setup external tables) +- stage: setup_experiment + jobs: + - deployment: StartEngine + displayName: "Starting Engine" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + variables: + process.clean: false + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/trino-420 + ./stop-cluster.sh && ./start-cluster.sh + sleep 20 + trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + echo "##vso[task.setvariable variable=trino_head_node;isOutput=true]${trino_head_node}" + name: engine_start_step + - deployment: RunSetupExperiment + dependsOn: StartEngine + displayName: "Setup Experiment" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + variables: + trino_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.trino_head_node'] ] + timeoutInMinutes: 0 + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/lst-bench-0.1-SNAPSHOT + ./launcher.sh -c run/trino-420/azure-pipelines/config/connections_config.yaml \ + -e run/trino-420/azure-pipelines/config/setup_experiment_config.yaml \ + -t run/trino-420/azure-pipelines/config/telemetry_config.yaml \ + -l run/trino-420/config/tpcds/library.yaml \ + -w run/trino-420/config/tpcds/setup_experiment.yaml + - deployment: StopEngine + dependsOn: RunSetupExperiment + displayName: "Stopping Engine" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/trino-420 + ./stop-cluster.sh + +# Run LST-Bench +- ${{ each lst in parameters.lsts }}: + - ${{ each workload in parameters.workloads }}: + - stage: test_${{ lst.mode }}_${{ lst.table_format }}_${{ workload }} + jobs: + - deployment: StartEngine + displayName: "Starting Engine (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + variables: + process.clean: false + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/trino-420 + ./stop-cluster.sh && ./start-cluster.sh ${{ lst.table_format }} + sleep 20 + trino_head_node=$(ip addr show eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') + echo "##vso[task.setvariable variable=trino_head_node;isOutput=true]${trino_head_node}" + name: engine_start_step + - deployment: RunExperiment + dependsOn: StartEngine + displayName: "Running Experiment (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-client' + variables: + trino_master_host: $[ dependencies.StartEngine.outputs['deploy_lst-bench-head.engine_start_step.trino_head_node'] ] + timeoutInMinutes: 0 + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/lst-bench-0.1-SNAPSHOT + echo "${{ workload }}" + export EXP_NAME="${{ workload }}" + ./launcher.sh -c run/trino-420/azure-pipelines/config/connections_config.yaml \ + -e run/trino-420/azure-pipelines/config/experiment_config-${{ lst.mode }}-${{ lst.table_format }}.yaml \ + -t run/trino-420/azure-pipelines/config/telemetry_config.yaml \ + -l run/trino-420/config/tpcds/library.yaml \ + -w run/trino-420/config/tpcds/${{ workload }}.yaml + - deployment: StopEngine + dependsOn: RunExperiment + displayName: "Stopping Engine (${{ lst.mode }}, ${{ lst.table_format }}, ${{ workload }})" + environment: + name: 'lst-bench-github' + resourceType: VirtualMachine + resourceName: 'lst-bench-head' + strategy: + runOnce: + deploy: + steps: + - download: none + - bash: | + cd ~/trino-420 + ./stop-cluster.sh diff --git a/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template b/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template new file mode 100644 index 00000000..a09f60f8 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/coordinator-config.properties.template @@ -0,0 +1,5 @@ +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=8080 +discovery.uri=http://$TRINO_MASTER_HOST:8080 +query.max-memory=378GB \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/delta.properties.template b/run/trino-420/azure-pipelines/sh/delta.properties.template new file mode 100644 index 00000000..efd7cef8 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/delta.properties.template @@ -0,0 +1,6 @@ +connector.name=delta_lake +hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 +hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} +hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} +delta.max-partitions-per-writer=2500 +delta.compression-codec=GZIP \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/dist-exec.sh b/run/trino-420/azure-pipelines/sh/dist-exec.sh new file mode 100755 index 00000000..bd7c3ca6 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/dist-exec.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e +source env.sh +if [ -z "${HOSTS}" ]; then + echo "ERROR: HOSTS is not defined." + exit 1 +fi + +if [ "$#" -lt 2 ]; then + echo "Error: Please provide at least two input parameters." + exit 1 +fi +deploy_dir=$1 +script_file=$2 + +for node in $HOSTS ; do ssh -t $node "mkdir -p ~/$deploy_dir" ; done +for node in $HOSTS ; do scp *.template $node:~/$deploy_dir ; done +for node in $HOSTS ; do scp $script_file $node:~/$deploy_dir ; done +for node in $HOSTS ; do ssh -t $node "cd ~/$deploy_dir && chmod +x ./$script_file && ./$script_file ${@:3}" ; done diff --git a/run/trino-420/azure-pipelines/sh/dist-setup.sh b/run/trino-420/azure-pipelines/sh/dist-setup.sh new file mode 100755 index 00000000..99edc490 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/dist-setup.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e +if [ -z "${HOME}" ]; then + echo "ERROR: HOME is not defined." + exit 1 +fi + +# Install packages +sudo apt install -y net-tools nmap + +# Configure hosts +my_ip=$(/sbin/ifconfig eth0 | sed -n 's/ *inet [^0-9]*\([0-9\.]\+\).*/\1/p') +ip_range=${my_ip%.*}.* +nmap -sn $ip_range | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | grep -v "^$my_ip$" > $HOME/hostiplist + +export HOSTS=$(<$HOME/hostiplist) + +for node in $HOSTS ; do scp ~/.ssh/id_rsa* $node:~/.ssh/ ; done + +# Push to environment +echo "export HOSTS=\"${HOSTS}\"" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/trino-420/azure-pipelines/sh/hive-site.xml.template b/run/trino-420/azure-pipelines/sh/hive-site.xml.template new file mode 100644 index 00000000..0e79ed7b --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/hive-site.xml.template @@ -0,0 +1,36 @@ + + + javax.jdo.option.ConnectionURL + ${HMS_JDBC_URL} + + + + javax.jdo.option.ConnectionDriverName + ${HMS_JDBC_DRIVER} + + + + javax.jdo.option.ConnectionUserName + ${HMS_JDBC_USER} + + + + javax.jdo.option.ConnectionPassword + ${HMS_JDBC_PASSWORD} + + + + hive.metastore.warehouse.dir + abfss://${HMS_STORAGE_ACCOUNT_CONTAINER}@${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net/hive/warehouse + + + + fs.azure.account.auth.type.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net + SharedKey + + + + fs.azure.account.key.${HMS_STORAGE_ACCOUNT}.dfs.core.windows.net + ${HMS_STORAGE_ACCOUNT_SHARED_KEY} + + \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/hive.properties.template b/run/trino-420/azure-pipelines/sh/hive.properties.template new file mode 100644 index 00000000..c052a1c8 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/hive.properties.template @@ -0,0 +1,5 @@ +connector.name=hive +hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 +hive.allow-drop-table=true +hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} +hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/hms.sh b/run/trino-420/azure-pipelines/sh/hms.sh new file mode 100755 index 00000000..907c2bad --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/hms.sh @@ -0,0 +1,47 @@ +#!/bin/bash -e +if [ "$#" -ne 7 ]; then + echo "Usage: $0 HMS_JDBC_DRIVER HMS_JDBC_URL HMS_JDBC_USER HMS_JDBC_PASSWORD HMS_STORAGE_ACCOUNT HMS_STORAGE_ACCOUNT_SHARED_KEY HMS_STORAGE_ACCOUNT_CONTAINER" + exit 1 +fi + +if [ -z "${USER}" ]; then + echo "ERROR: USER is not defined." + exit 1 +fi + +export HMS_JDBC_DRIVER=$1 +export HMS_JDBC_URL=$2 +export HMS_JDBC_USER=$3 +export HMS_JDBC_PASSWORD=$4 +export HMS_STORAGE_ACCOUNT=$5 +export HMS_STORAGE_ACCOUNT_SHARED_KEY=$6 +export HMS_STORAGE_ACCOUNT_CONTAINER=$7 +export HADOOP_HOME=/home/$USER/hadoop +export HIVE_HOME=/home/$USER/hive + +# Install Hadoop +rm -rf hadoop-3.3.1 +wget -nv -N https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz +tar -xzf hadoop-3.3.1.tar.gz +ln -sf $(pwd)/hadoop-3.3.1 $HADOOP_HOME + +# Install Hive (needed for HMS) +rm -rf apache-hive-2.3.9-bin +wget -nv -N https://downloads.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz +tar -xzf apache-hive-2.3.9-bin.tar.gz +ln -sf $(pwd)/apache-hive-2.3.9-bin $HIVE_HOME + +# Configure HMS +envsubst < "hive-site.xml.template" > "$HIVE_HOME/conf/hive-site.xml" + +# Copy Azure dependencies to Hive classpath +cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-azure* $HIVE_HOME/lib/ + +# Install MSSQL driver +wget -nv -N https://repo1.maven.org/maven2/com/microsoft/sqlserver/mssql-jdbc/6.2.1.jre8/mssql-jdbc-6.2.1.jre8.jar +ln -sf $(pwd)/mssql-jdbc-6.2.1.jre8.jar $HIVE_HOME/lib/mssql-jdbc.jar + +# Push to environment +echo "export HADOOP_HOME=${HADOOP_HOME} +export HIVE_HOME=${HIVE_HOME}" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/trino-420/azure-pipelines/sh/iceberg.properties.template b/run/trino-420/azure-pipelines/sh/iceberg.properties.template new file mode 100644 index 00000000..d29aa613 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/iceberg.properties.template @@ -0,0 +1,7 @@ +connector.name=iceberg +hive.metastore.uri=thrift://${TRINO_MASTER_HOST}:9083 +hive.azure.abfs-storage-account=${DATA_STORAGE_ACCOUNT} +hive.azure.abfs-access-key=${DATA_STORAGE_ACCOUNT_SHARED_KEY} +iceberg.max-partitions-per-writer=2500 +iceberg.file-format=PARQUET +iceberg.compression-codec=GZIP \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/init.sh b/run/trino-420/azure-pipelines/sh/init.sh new file mode 100755 index 00000000..711cf653 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/init.sh @@ -0,0 +1,66 @@ +#!/bin/bash -e +if [ "$#" -ne 4 ]; then + echo "Usage: $0 IS_COORDINATOR TRINO_MASTER_HOST DATA_STORAGE_ACCOUNT DATA_STORAGE_ACCOUNT_SHARED_KEY" + exit 1 +fi + +if [ -z "${USER}" ]; then + echo "ERROR: USER is not defined." + exit 1 +fi + +export HOSTNAME=$(hostname) +export IS_COORDINATOR=$1 +export TRINO_MASTER_HOST=$2 +export TRINO_HOME=/home/$USER/trino +export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 +export DATA_STORAGE_ACCOUNT=$3 +export DATA_STORAGE_ACCOUNT_SHARED_KEY=$4 + +# Update dependencies and install packages +sudo apt update -y +sudo apt install -y openjdk-17-jdk python wget + +# Install Trino +rm -rf trino-server-420 +wget -nv -N https://repo1.maven.org/maven2/io/trino/trino-server/420/trino-server-420.tar.gz +tar -xzf trino-server-420.tar.gz +ln -sf $(pwd)/trino-server-420 $TRINO_HOME + +# Configure Trino +sudo mkdir -p /mnt/local_resource/ +sudo mkdir -p /mnt/local_resource/trino_data/ +sudo chown $USER:$USER /mnt/local_resource/trino_data +sudo mkdir -p /mnt/local_resource/trino_tmp/ +sudo chown $USER:$USER /mnt/local_resource/trino_tmp + +sudo mkdir ${TRINO_HOME}/etc +sudo chown $USER:$USER ${TRINO_HOME}/etc/ +envsubst < "node.properties.template" > "$TRINO_HOME/etc/node.properties" +envsubst < "jvm.config.template" > "$TRINO_HOME/etc/jvm.config" +if [ "$IS_COORDINATOR" = true ]; then + envsubst < "coordinator-config.properties.template" > "$TRINO_HOME/etc/config.properties" +elif [ "$IS_COORDINATOR" = false ]; then + envsubst < "worker-config.properties.template" > "$TRINO_HOME/etc/config.properties" +else + echo "IS_COORDINATOR must be either 'true' or 'false'" + exit 1 +fi +envsubst < "log.properties.template" > "$TRINO_HOME/etc/log.properties" + +# Configure Trino connectors +sudo mkdir ${TRINO_HOME}/etc/catalog +sudo chown $USER:$USER ${TRINO_HOME}/etc/catalog/ +envsubst < "hive.properties.template" > "$TRINO_HOME/etc/catalog/hive.properties" +envsubst < "delta.properties.template" > "$TRINO_HOME/etc/catalog/delta.properties" +envsubst < "iceberg.properties.template" > "$TRINO_HOME/etc/catalog/iceberg.properties" + +# Set Linux OS limits required for Trino +echo "trino soft nofile 131072 +trino hard nofile 131072" | sudo tee -a /etc/security/limits.conf + +# Push to environment +echo "export TRINO_HOME=${TRINO_HOME} +export JAVA_HOME=${JAVA_HOME} +export PATH=${PATH}:${TRINO_HOME}/bin" >> env.sh +echo "source $(pwd)/env.sh" >> ~/.bashrc diff --git a/run/trino-420/azure-pipelines/sh/jvm.config.template b/run/trino-420/azure-pipelines/sh/jvm.config.template new file mode 100644 index 00000000..4a852a53 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/jvm.config.template @@ -0,0 +1,18 @@ +-server +-Xmx54G +-XX:InitialRAMPercentage=80 +-XX:MaxRAMPercentage=80 +-XX:G1HeapRegionSize=32M +-XX:+ExplicitGCInvokesConcurrent +-XX:+ExitOnOutOfMemoryError +-XX:+HeapDumpOnOutOfMemoryError +-XX:-OmitStackTraceInFastThrow +-XX:ReservedCodeCacheSize=512M +-XX:PerMethodRecompilationCutoff=10000 +-XX:PerBytecodeRecompilationCutoff=10000 +-Djdk.attach.allowAttachSelf=true +-Djdk.nio.maxCachedBufferSize=2000000 +-XX:+UnlockDiagnosticVMOptions +-XX:+UseAESCTRIntrinsics +# Disable Preventive GC for performance reasons (JDK-8293861) +-XX:-G1UsePreventiveGC \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/log.properties.template b/run/trino-420/azure-pipelines/sh/log.properties.template new file mode 100644 index 00000000..d253499a --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/log.properties.template @@ -0,0 +1 @@ +io.trino=INFO \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/node.properties.template b/run/trino-420/azure-pipelines/sh/node.properties.template new file mode 100644 index 00000000..a2a65764 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/node.properties.template @@ -0,0 +1,3 @@ +node.environment=production +node.id=$HOSTNAME +node.data-dir=/mnt/local_resource/trino_data \ No newline at end of file diff --git a/run/trino-420/azure-pipelines/sh/start-cluster.sh b/run/trino-420/azure-pipelines/sh/start-cluster.sh new file mode 100755 index 00000000..0ccbf698 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/start-cluster.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e +source env.sh +if [ -z "${HIVE_HOME}" ]; then + echo "ERROR: HIVE_HOME is not defined." + exit 1 +fi +if [ -z "${TRINO_HOME}" ]; then + echo "ERROR: TRINO_HOME is not defined." + exit 1 +fi +if [ -z "${HOSTS}" ]; then + echo "ERROR: HOSTS is not defined." + exit 1 +fi + +echo "Starting HMS" +cd $HIVE_HOME +./bin/hive --service metastore & + +echo "Starting Trino cluster" +echo "Starting Trino coordinator" +cd $TRINO_HOME +./bin/launcher start +echo "Starting Trino workers" +for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher start" ; done diff --git a/run/trino-420/azure-pipelines/sh/stop-cluster.sh b/run/trino-420/azure-pipelines/sh/stop-cluster.sh new file mode 100755 index 00000000..000acd27 --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/stop-cluster.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +source env.sh +if [ -z "${HOSTS}" ]; then + echo "ERROR: HOSTS is not defined." + exit 1 +fi +if [ -z "${TRINO_HOME}" ]; then + echo "ERROR: TRINO_HOME is not defined." + exit 1 +fi + +echo "Stopping Trino cluster" +echo "Stopping Trino workers" +for node in $HOSTS ; do ssh -t $node "cd ${TRINO_HOME} && ./bin/launcher stop" ; done +echo "Stopping Trino coordinator" +cd $TRINO_HOME +./bin/launcher stop + +echo "Stopping HMS" +pkill -f "metastore" || true diff --git a/run/trino-420/azure-pipelines/sh/worker-config.properties.template b/run/trino-420/azure-pipelines/sh/worker-config.properties.template new file mode 100644 index 00000000..96a4c6fd --- /dev/null +++ b/run/trino-420/azure-pipelines/sh/worker-config.properties.template @@ -0,0 +1,3 @@ +coordinator=false +http-server.http.port=8080 +discovery.uri=http://$TRINO_MASTER_HOST:8080 \ No newline at end of file diff --git a/run/trino-420/config/tpcds/library.yaml b/run/trino-420/config/tpcds/library.yaml new file mode 100644 index 00000000..5fd99c01 --- /dev/null +++ b/run/trino-420/config/tpcds/library.yaml @@ -0,0 +1,283 @@ +# Description: Tasks Library +--- +version: 1 +task_templates: +# Create external tables needed for benchmark +- id: setup + files: + - run/trino-420/scripts/tpcds/setup/ddl-external-tables.sql +# Create data maintenance external tables needed for benchmark +- id: setup_data_maintenance + files: + - run/trino-420/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql + parameter_values_file: run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat +# Create schema and drop existing tables +- id: init + files: + - run/trino-420/scripts/tpcds/init/init.sql +# Create benchmark tables and load data into them +- id: build + files: + - run/trino-420/scripts/tpcds/build/1_create_call_center.sql + - run/trino-420/scripts/tpcds/build/1_create_catalog_page.sql + - run/trino-420/scripts/tpcds/build/1_create_catalog_returns.sql + - run/trino-420/scripts/tpcds/build/1_create_catalog_sales.sql + - run/trino-420/scripts/tpcds/build/1_create_customer.sql + - run/trino-420/scripts/tpcds/build/1_create_customer_address.sql + - run/trino-420/scripts/tpcds/build/1_create_customer_demographics.sql + - run/trino-420/scripts/tpcds/build/1_create_date_dim.sql + - run/trino-420/scripts/tpcds/build/1_create_household_demographics.sql + - run/trino-420/scripts/tpcds/build/1_create_income_band.sql + - run/trino-420/scripts/tpcds/build/1_create_inventory.sql + - run/trino-420/scripts/tpcds/build/1_create_item.sql + - run/trino-420/scripts/tpcds/build/1_create_promotion.sql + - run/trino-420/scripts/tpcds/build/1_create_reason.sql + - run/trino-420/scripts/tpcds/build/1_create_ship_mode.sql + - run/trino-420/scripts/tpcds/build/1_create_store.sql + - run/trino-420/scripts/tpcds/build/1_create_store_returns.sql + - run/trino-420/scripts/tpcds/build/1_create_store_sales.sql + - run/trino-420/scripts/tpcds/build/1_create_time_dim.sql + - run/trino-420/scripts/tpcds/build/1_create_warehouse.sql + - run/trino-420/scripts/tpcds/build/1_create_web_page.sql + - run/trino-420/scripts/tpcds/build/1_create_web_returns.sql + - run/trino-420/scripts/tpcds/build/1_create_web_sales.sql + - run/trino-420/scripts/tpcds/build/1_create_web_site.sql + - run/trino-420/scripts/tpcds/build/2_load_call_center.sql + - run/trino-420/scripts/tpcds/build/2_load_catalog_page.sql + - run/trino-420/scripts/tpcds/build/2_load_catalog_returns.sql + - run/trino-420/scripts/tpcds/build/2_load_catalog_sales.sql + - run/trino-420/scripts/tpcds/build/2_load_customer.sql + - run/trino-420/scripts/tpcds/build/2_load_customer_address.sql + - run/trino-420/scripts/tpcds/build/2_load_customer_demographics.sql + - run/trino-420/scripts/tpcds/build/2_load_date_dim.sql + - run/trino-420/scripts/tpcds/build/2_load_household_demographics.sql + - run/trino-420/scripts/tpcds/build/2_load_income_band.sql + - run/trino-420/scripts/tpcds/build/2_load_inventory.sql + - run/trino-420/scripts/tpcds/build/2_load_item.sql + - run/trino-420/scripts/tpcds/build/2_load_promotion.sql + - run/trino-420/scripts/tpcds/build/2_load_reason.sql + - run/trino-420/scripts/tpcds/build/2_load_ship_mode.sql + - run/trino-420/scripts/tpcds/build/2_load_store.sql + - run/trino-420/scripts/tpcds/build/2_load_store_returns.sql + - run/trino-420/scripts/tpcds/build/2_load_store_sales.sql + - run/trino-420/scripts/tpcds/build/2_load_time_dim.sql + - run/trino-420/scripts/tpcds/build/2_load_warehouse.sql + - run/trino-420/scripts/tpcds/build/2_load_web_page.sql + - run/trino-420/scripts/tpcds/build/2_load_web_returns.sql + - run/trino-420/scripts/tpcds/build/2_load_web_sales.sql + - run/trino-420/scripts/tpcds/build/2_load_web_site.sql +# Compute statistics for tables +- id: analyze + files: + - run/trino-420/scripts/tpcds/build/3_analyze_call_center.sql + - run/trino-420/scripts/tpcds/build/3_analyze_catalog_page.sql + - run/trino-420/scripts/tpcds/build/3_analyze_catalog_returns.sql + - run/trino-420/scripts/tpcds/build/3_analyze_catalog_sales.sql + - run/trino-420/scripts/tpcds/build/3_analyze_customer.sql + - run/trino-420/scripts/tpcds/build/3_analyze_customer_address.sql + - run/trino-420/scripts/tpcds/build/3_analyze_customer_demographics.sql + - run/trino-420/scripts/tpcds/build/3_analyze_date_dim.sql + - run/trino-420/scripts/tpcds/build/3_analyze_household_demographics.sql + - run/trino-420/scripts/tpcds/build/3_analyze_income_band.sql + - run/trino-420/scripts/tpcds/build/3_analyze_inventory.sql + - run/trino-420/scripts/tpcds/build/3_analyze_item.sql + - run/trino-420/scripts/tpcds/build/3_analyze_promotion.sql + - run/trino-420/scripts/tpcds/build/3_analyze_reason.sql + - run/trino-420/scripts/tpcds/build/3_analyze_ship_mode.sql + - run/trino-420/scripts/tpcds/build/3_analyze_store.sql + - run/trino-420/scripts/tpcds/build/3_analyze_store_returns.sql + - run/trino-420/scripts/tpcds/build/3_analyze_store_sales.sql + - run/trino-420/scripts/tpcds/build/3_analyze_time_dim.sql + - run/trino-420/scripts/tpcds/build/3_analyze_warehouse.sql + - run/trino-420/scripts/tpcds/build/3_analyze_web_page.sql + - run/trino-420/scripts/tpcds/build/3_analyze_web_returns.sql + - run/trino-420/scripts/tpcds/build/3_analyze_web_sales.sql + - run/trino-420/scripts/tpcds/build/3_analyze_web_site.sql +# Execution of TPC-DS queries +- id: single_user + files: + - run/trino-420/scripts/tpcds/single_user/query1.sql + - run/trino-420/scripts/tpcds/single_user/query2.sql + - run/trino-420/scripts/tpcds/single_user/query3.sql + - run/trino-420/scripts/tpcds/single_user/query4.sql + - run/trino-420/scripts/tpcds/single_user/query5.sql + - run/trino-420/scripts/tpcds/single_user/query6.sql + - run/trino-420/scripts/tpcds/single_user/query7.sql + - run/trino-420/scripts/tpcds/single_user/query8.sql + - run/trino-420/scripts/tpcds/single_user/query9.sql + - run/trino-420/scripts/tpcds/single_user/query10.sql + - run/trino-420/scripts/tpcds/single_user/query11.sql + - run/trino-420/scripts/tpcds/single_user/query12.sql + - run/trino-420/scripts/tpcds/single_user/query13.sql + - run/trino-420/scripts/tpcds/single_user/query14.sql + - run/trino-420/scripts/tpcds/single_user/query15.sql + - run/trino-420/scripts/tpcds/single_user/query16.sql + - run/trino-420/scripts/tpcds/single_user/query17.sql + - run/trino-420/scripts/tpcds/single_user/query18.sql + - run/trino-420/scripts/tpcds/single_user/query19.sql + - run/trino-420/scripts/tpcds/single_user/query20.sql + - run/trino-420/scripts/tpcds/single_user/query21.sql + - run/trino-420/scripts/tpcds/single_user/query22.sql + - run/trino-420/scripts/tpcds/single_user/query23.sql + - run/trino-420/scripts/tpcds/single_user/query24.sql + - run/trino-420/scripts/tpcds/single_user/query25.sql + - run/trino-420/scripts/tpcds/single_user/query26.sql + - run/trino-420/scripts/tpcds/single_user/query27.sql + - run/trino-420/scripts/tpcds/single_user/query28.sql + - run/trino-420/scripts/tpcds/single_user/query29.sql + - run/trino-420/scripts/tpcds/single_user/query30.sql + - run/trino-420/scripts/tpcds/single_user/query31.sql + - run/trino-420/scripts/tpcds/single_user/query32.sql + - run/trino-420/scripts/tpcds/single_user/query33.sql + - run/trino-420/scripts/tpcds/single_user/query34.sql + - run/trino-420/scripts/tpcds/single_user/query35.sql + - run/trino-420/scripts/tpcds/single_user/query36.sql + - run/trino-420/scripts/tpcds/single_user/query37.sql + - run/trino-420/scripts/tpcds/single_user/query38.sql + - run/trino-420/scripts/tpcds/single_user/query39.sql + - run/trino-420/scripts/tpcds/single_user/query40.sql + - run/trino-420/scripts/tpcds/single_user/query41.sql + - run/trino-420/scripts/tpcds/single_user/query42.sql + - run/trino-420/scripts/tpcds/single_user/query43.sql + - run/trino-420/scripts/tpcds/single_user/query44.sql + - run/trino-420/scripts/tpcds/single_user/query45.sql + - run/trino-420/scripts/tpcds/single_user/query46.sql + - run/trino-420/scripts/tpcds/single_user/query47.sql + - run/trino-420/scripts/tpcds/single_user/query48.sql + - run/trino-420/scripts/tpcds/single_user/query49.sql + - run/trino-420/scripts/tpcds/single_user/query50.sql + - run/trino-420/scripts/tpcds/single_user/query51.sql + - run/trino-420/scripts/tpcds/single_user/query52.sql + - run/trino-420/scripts/tpcds/single_user/query53.sql + - run/trino-420/scripts/tpcds/single_user/query54.sql + - run/trino-420/scripts/tpcds/single_user/query55.sql + - run/trino-420/scripts/tpcds/single_user/query56.sql + - run/trino-420/scripts/tpcds/single_user/query57.sql + - run/trino-420/scripts/tpcds/single_user/query58.sql + - run/trino-420/scripts/tpcds/single_user/query59.sql + - run/trino-420/scripts/tpcds/single_user/query60.sql + - run/trino-420/scripts/tpcds/single_user/query61.sql + - run/trino-420/scripts/tpcds/single_user/query62.sql + - run/trino-420/scripts/tpcds/single_user/query63.sql + - run/trino-420/scripts/tpcds/single_user/query64.sql + - run/trino-420/scripts/tpcds/single_user/query65.sql + - run/trino-420/scripts/tpcds/single_user/query66.sql + - run/trino-420/scripts/tpcds/single_user/query67.sql + - run/trino-420/scripts/tpcds/single_user/query68.sql + - run/trino-420/scripts/tpcds/single_user/query69.sql + - run/trino-420/scripts/tpcds/single_user/query70.sql + - run/trino-420/scripts/tpcds/single_user/query71.sql + - run/trino-420/scripts/tpcds/single_user/query72.sql + - run/trino-420/scripts/tpcds/single_user/query73.sql + - run/trino-420/scripts/tpcds/single_user/query74.sql + - run/trino-420/scripts/tpcds/single_user/query75.sql + - run/trino-420/scripts/tpcds/single_user/query76.sql + - run/trino-420/scripts/tpcds/single_user/query77.sql + - run/trino-420/scripts/tpcds/single_user/query78.sql + - run/trino-420/scripts/tpcds/single_user/query79.sql + - run/trino-420/scripts/tpcds/single_user/query80.sql + - run/trino-420/scripts/tpcds/single_user/query81.sql + - run/trino-420/scripts/tpcds/single_user/query82.sql + - run/trino-420/scripts/tpcds/single_user/query83.sql + - run/trino-420/scripts/tpcds/single_user/query84.sql + - run/trino-420/scripts/tpcds/single_user/query85.sql + - run/trino-420/scripts/tpcds/single_user/query86.sql + - run/trino-420/scripts/tpcds/single_user/query87.sql + - run/trino-420/scripts/tpcds/single_user/query88.sql + - run/trino-420/scripts/tpcds/single_user/query89.sql + - run/trino-420/scripts/tpcds/single_user/query90.sql + - run/trino-420/scripts/tpcds/single_user/query91.sql + - run/trino-420/scripts/tpcds/single_user/query92.sql + - run/trino-420/scripts/tpcds/single_user/query93.sql + - run/trino-420/scripts/tpcds/single_user/query94.sql + - run/trino-420/scripts/tpcds/single_user/query95.sql + - run/trino-420/scripts/tpcds/single_user/query96.sql + - run/trino-420/scripts/tpcds/single_user/query97.sql + - run/trino-420/scripts/tpcds/single_user/query98.sql + - run/trino-420/scripts/tpcds/single_user/query99.sql + permutation_orders_path: run/auxiliary/tpcds/single_user/permutation_orders/ +# Execution of TPC-DS data maintenance queries +- id: data_maintenance + files: + - run/trino-420/scripts/tpcds/data_maintenance/DF_CS.sql + - run/trino-420/scripts/tpcds/data_maintenance/DF_I.sql + - run/trino-420/scripts/tpcds/data_maintenance/DF_SS.sql + - run/trino-420/scripts/tpcds/data_maintenance/DF_WS.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_CR.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_CS.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_I.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_SR.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_SS.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_WR.sql + - run/trino-420/scripts/tpcds/data_maintenance/LF_WS.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat +# Execution of optimize on all benchmark tables +- id: optimize + files: + - run/trino-420/scripts/tpcds/optimize/o_call_center.sql + - run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql + - run/trino-420/scripts/tpcds/optimize/o_catalog_returns.sql + - run/trino-420/scripts/tpcds/optimize/o_catalog_sales.sql + - run/trino-420/scripts/tpcds/optimize/o_customer.sql + - run/trino-420/scripts/tpcds/optimize/o_customer_address.sql + - run/trino-420/scripts/tpcds/optimize/o_customer_demographics.sql + - run/trino-420/scripts/tpcds/optimize/o_date_dim.sql + - run/trino-420/scripts/tpcds/optimize/o_household_demographics.sql + - run/trino-420/scripts/tpcds/optimize/o_income_band.sql + - run/trino-420/scripts/tpcds/optimize/o_inventory.sql + - run/trino-420/scripts/tpcds/optimize/o_item.sql + - run/trino-420/scripts/tpcds/optimize/o_promotion.sql + - run/trino-420/scripts/tpcds/optimize/o_reason.sql + - run/trino-420/scripts/tpcds/optimize/o_ship_mode.sql + - run/trino-420/scripts/tpcds/optimize/o_store.sql + - run/trino-420/scripts/tpcds/optimize/o_store_returns.sql + - run/trino-420/scripts/tpcds/optimize/o_store_sales.sql + - run/trino-420/scripts/tpcds/optimize/o_time_dim.sql + - run/trino-420/scripts/tpcds/optimize/o_warehouse.sql + - run/trino-420/scripts/tpcds/optimize/o_web_page.sql + - run/trino-420/scripts/tpcds/optimize/o_web_returns.sql + - run/trino-420/scripts/tpcds/optimize/o_web_sales.sql + - run/trino-420/scripts/tpcds/optimize/o_web_site.sql +# Execution of optimize on all benchmark tables but splitting optimization +# of partitioned tables into batches by relying on dependent task executor +- id: optimize_split + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor + files: + - run/trino-420/scripts/tpcds/optimize/o_call_center.sql + - run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_NULL.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_NULL.sql + - run/trino-420/scripts/tpcds/optimize/o_customer.sql + - run/trino-420/scripts/tpcds/optimize/o_customer_address.sql + - run/trino-420/scripts/tpcds/optimize/o_customer_demographics.sql + - run/trino-420/scripts/tpcds/optimize/o_date_dim.sql + - run/trino-420/scripts/tpcds/optimize/o_household_demographics.sql + - run/trino-420/scripts/tpcds/optimize/o_income_band.sql + - run/trino-420/scripts/tpcds/optimize_split/o_inventory_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_inventory_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_inventory_NULL.sql + - run/trino-420/scripts/tpcds/optimize/o_item.sql + - run/trino-420/scripts/tpcds/optimize/o_promotion.sql + - run/trino-420/scripts/tpcds/optimize/o_reason.sql + - run/trino-420/scripts/tpcds/optimize/o_ship_mode.sql + - run/trino-420/scripts/tpcds/optimize/o_store.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_returns_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_returns_NULL.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_sales_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_store_sales_NULL.sql + - run/trino-420/scripts/tpcds/optimize/o_time_dim.sql + - run/trino-420/scripts/tpcds/optimize/o_warehouse.sql + - run/trino-420/scripts/tpcds/optimize/o_web_page.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_returns_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_returns_NULL.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_sales_IN.sql + - run/trino-420/scripts/tpcds/optimize_split/o_web_sales_NULL.sql + - run/trino-420/scripts/tpcds/optimize/o_web_site.sql diff --git a/run/trino-420/config/tpcds/setup_experiment.yaml b/run/trino-420/config/tpcds/setup_experiment.yaml new file mode 100644 index 00000000..d122811f --- /dev/null +++ b/run/trino-420/config/tpcds/setup_experiment.yaml @@ -0,0 +1,32 @@ +# Description: Setup experiment +--- +version: 1 +id: setup_experiment +phases: +- id: setup + sessions: + - tasks: + - template_id: setup +- id: setup_data_maintenance + sessions: + - tasks: + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance + - template_id: setup_data_maintenance diff --git a/src/main/resources/config/trino/tpcds/w0_tpcds.yaml b/run/trino-420/config/tpcds/w0_tpcds.yaml similarity index 100% rename from src/main/resources/config/trino/tpcds/w0_tpcds.yaml rename to run/trino-420/config/tpcds/w0_tpcds.yaml diff --git a/src/main/resources/config/trino/tpcds/wp1_longevity.yaml b/run/trino-420/config/tpcds/wp1_longevity.yaml similarity index 69% rename from src/main/resources/config/trino/tpcds/wp1_longevity.yaml rename to run/trino-420/config/tpcds/wp1_longevity.yaml index 936169fd..1a200455 100644 --- a/src/main/resources/config/trino/tpcds/wp1_longevity.yaml +++ b/run/trino-420/config/tpcds/wp1_longevity.yaml @@ -3,23 +3,6 @@ version: 1 id: wp1_longevity phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: diff --git a/src/main/resources/config/trino/tpcds/wp2_resilience.yaml b/run/trino-420/config/tpcds/wp2_resilience.yaml similarity index 66% rename from src/main/resources/config/trino/tpcds/wp2_resilience.yaml rename to run/trino-420/config/tpcds/wp2_resilience.yaml index d95edafe..58b0bd7a 100644 --- a/src/main/resources/config/trino/tpcds/wp2_resilience.yaml +++ b/run/trino-420/config/tpcds/wp2_resilience.yaml @@ -3,25 +3,6 @@ version: 1 id: wp2_resilience phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: @@ -46,7 +27,9 @@ phases: - id: optimize_1 sessions: - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 - id: single_user_2o sessions: - tasks: @@ -65,7 +48,9 @@ phases: - id: optimize_2 sessions: - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 - id: single_user_3o sessions: - tasks: @@ -86,7 +71,9 @@ phases: - id: optimize_3 sessions: - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 - id: single_user_4o sessions: - tasks: diff --git a/src/main/resources/config/trino/tpcds/wp3_rw_concurrency.yaml b/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml similarity index 63% rename from src/main/resources/config/trino/tpcds/wp3_rw_concurrency.yaml rename to run/trino-420/config/tpcds/wp3_rw_concurrency.yaml index a3ff60db..26afcbb5 100644 --- a/src/main/resources/config/trino/tpcds/wp3_rw_concurrency.yaml +++ b/run/trino-420/config/tpcds/wp3_rw_concurrency.yaml @@ -3,25 +3,6 @@ version: 1 id: wp3_rw_concurrency phases: -- id: setup - sessions: - - tasks: - - template_id: setup -- id: setup_data_maintenance - sessions: - - tasks: - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - - template_id: setup_data_maintenance - id: init sessions: - tasks: @@ -42,7 +23,9 @@ phases: - tasks: - template_id: single_user - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 - id: single_user_2o_data_maintenance_2 sessions: - tasks: @@ -57,7 +40,9 @@ phases: - tasks: - template_id: single_user - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 - id: single_user_3o_data_maintenance_3 sessions: - tasks: @@ -74,4 +59,6 @@ phases: - tasks: - template_id: single_user - tasks: - - template_id: optimize + - template_id: optimize_split + task_executor_arguments: + dependent_task_batch_size: 100 diff --git a/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb b/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb new file mode 100644 index 00000000..a23b7a21 Binary files /dev/null and b/run/trino-420/results/trino-420-2024-02-01-8xStandard_E8s_v5.duckdb differ diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_call_center.sql b/run/trino-420/scripts/tpcds/build/1_create_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_call_center.sql rename to run/trino-420/scripts/tpcds/build/1_create_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_catalog_page.sql b/run/trino-420/scripts/tpcds/build/1_create_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_catalog_page.sql rename to run/trino-420/scripts/tpcds/build/1_create_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_catalog_returns.sql b/run/trino-420/scripts/tpcds/build/1_create_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_catalog_returns.sql rename to run/trino-420/scripts/tpcds/build/1_create_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_catalog_sales.sql b/run/trino-420/scripts/tpcds/build/1_create_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_catalog_sales.sql rename to run/trino-420/scripts/tpcds/build/1_create_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_customer.sql b/run/trino-420/scripts/tpcds/build/1_create_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_customer.sql rename to run/trino-420/scripts/tpcds/build/1_create_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_customer_address.sql b/run/trino-420/scripts/tpcds/build/1_create_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_customer_address.sql rename to run/trino-420/scripts/tpcds/build/1_create_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_customer_demographics.sql b/run/trino-420/scripts/tpcds/build/1_create_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_customer_demographics.sql rename to run/trino-420/scripts/tpcds/build/1_create_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_date_dim.sql b/run/trino-420/scripts/tpcds/build/1_create_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_date_dim.sql rename to run/trino-420/scripts/tpcds/build/1_create_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_household_demographics.sql b/run/trino-420/scripts/tpcds/build/1_create_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_household_demographics.sql rename to run/trino-420/scripts/tpcds/build/1_create_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_income_band.sql b/run/trino-420/scripts/tpcds/build/1_create_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_income_band.sql rename to run/trino-420/scripts/tpcds/build/1_create_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_inventory.sql b/run/trino-420/scripts/tpcds/build/1_create_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_inventory.sql rename to run/trino-420/scripts/tpcds/build/1_create_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_item.sql b/run/trino-420/scripts/tpcds/build/1_create_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_item.sql rename to run/trino-420/scripts/tpcds/build/1_create_item.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_promotion.sql b/run/trino-420/scripts/tpcds/build/1_create_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_promotion.sql rename to run/trino-420/scripts/tpcds/build/1_create_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_reason.sql b/run/trino-420/scripts/tpcds/build/1_create_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_reason.sql rename to run/trino-420/scripts/tpcds/build/1_create_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_ship_mode.sql b/run/trino-420/scripts/tpcds/build/1_create_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_ship_mode.sql rename to run/trino-420/scripts/tpcds/build/1_create_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_store.sql b/run/trino-420/scripts/tpcds/build/1_create_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_store.sql rename to run/trino-420/scripts/tpcds/build/1_create_store.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_store_returns.sql b/run/trino-420/scripts/tpcds/build/1_create_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_store_returns.sql rename to run/trino-420/scripts/tpcds/build/1_create_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_store_sales.sql b/run/trino-420/scripts/tpcds/build/1_create_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_store_sales.sql rename to run/trino-420/scripts/tpcds/build/1_create_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_time_dim.sql b/run/trino-420/scripts/tpcds/build/1_create_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_time_dim.sql rename to run/trino-420/scripts/tpcds/build/1_create_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_warehouse.sql b/run/trino-420/scripts/tpcds/build/1_create_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_warehouse.sql rename to run/trino-420/scripts/tpcds/build/1_create_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_web_page.sql b/run/trino-420/scripts/tpcds/build/1_create_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_web_page.sql rename to run/trino-420/scripts/tpcds/build/1_create_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_web_returns.sql b/run/trino-420/scripts/tpcds/build/1_create_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_web_returns.sql rename to run/trino-420/scripts/tpcds/build/1_create_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_web_sales.sql b/run/trino-420/scripts/tpcds/build/1_create_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_web_sales.sql rename to run/trino-420/scripts/tpcds/build/1_create_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/1_create_web_site.sql b/run/trino-420/scripts/tpcds/build/1_create_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/1_create_web_site.sql rename to run/trino-420/scripts/tpcds/build/1_create_web_site.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_call_center.sql b/run/trino-420/scripts/tpcds/build/2_load_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_call_center.sql rename to run/trino-420/scripts/tpcds/build/2_load_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_catalog_page.sql b/run/trino-420/scripts/tpcds/build/2_load_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_catalog_page.sql rename to run/trino-420/scripts/tpcds/build/2_load_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_catalog_returns.sql b/run/trino-420/scripts/tpcds/build/2_load_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_catalog_returns.sql rename to run/trino-420/scripts/tpcds/build/2_load_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_catalog_sales.sql b/run/trino-420/scripts/tpcds/build/2_load_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_catalog_sales.sql rename to run/trino-420/scripts/tpcds/build/2_load_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_customer.sql b/run/trino-420/scripts/tpcds/build/2_load_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_customer.sql rename to run/trino-420/scripts/tpcds/build/2_load_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_customer_address.sql b/run/trino-420/scripts/tpcds/build/2_load_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_customer_address.sql rename to run/trino-420/scripts/tpcds/build/2_load_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_customer_demographics.sql b/run/trino-420/scripts/tpcds/build/2_load_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_customer_demographics.sql rename to run/trino-420/scripts/tpcds/build/2_load_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_date_dim.sql b/run/trino-420/scripts/tpcds/build/2_load_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_date_dim.sql rename to run/trino-420/scripts/tpcds/build/2_load_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_household_demographics.sql b/run/trino-420/scripts/tpcds/build/2_load_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_household_demographics.sql rename to run/trino-420/scripts/tpcds/build/2_load_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_income_band.sql b/run/trino-420/scripts/tpcds/build/2_load_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_income_band.sql rename to run/trino-420/scripts/tpcds/build/2_load_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_inventory.sql b/run/trino-420/scripts/tpcds/build/2_load_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_inventory.sql rename to run/trino-420/scripts/tpcds/build/2_load_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_item.sql b/run/trino-420/scripts/tpcds/build/2_load_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_item.sql rename to run/trino-420/scripts/tpcds/build/2_load_item.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_promotion.sql b/run/trino-420/scripts/tpcds/build/2_load_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_promotion.sql rename to run/trino-420/scripts/tpcds/build/2_load_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_reason.sql b/run/trino-420/scripts/tpcds/build/2_load_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_reason.sql rename to run/trino-420/scripts/tpcds/build/2_load_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_ship_mode.sql b/run/trino-420/scripts/tpcds/build/2_load_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_ship_mode.sql rename to run/trino-420/scripts/tpcds/build/2_load_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_store.sql b/run/trino-420/scripts/tpcds/build/2_load_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_store.sql rename to run/trino-420/scripts/tpcds/build/2_load_store.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_store_returns.sql b/run/trino-420/scripts/tpcds/build/2_load_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_store_returns.sql rename to run/trino-420/scripts/tpcds/build/2_load_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_store_sales.sql b/run/trino-420/scripts/tpcds/build/2_load_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_store_sales.sql rename to run/trino-420/scripts/tpcds/build/2_load_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_time_dim.sql b/run/trino-420/scripts/tpcds/build/2_load_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_time_dim.sql rename to run/trino-420/scripts/tpcds/build/2_load_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_warehouse.sql b/run/trino-420/scripts/tpcds/build/2_load_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_warehouse.sql rename to run/trino-420/scripts/tpcds/build/2_load_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_web_page.sql b/run/trino-420/scripts/tpcds/build/2_load_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_web_page.sql rename to run/trino-420/scripts/tpcds/build/2_load_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_web_returns.sql b/run/trino-420/scripts/tpcds/build/2_load_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_web_returns.sql rename to run/trino-420/scripts/tpcds/build/2_load_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_web_sales.sql b/run/trino-420/scripts/tpcds/build/2_load_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_web_sales.sql rename to run/trino-420/scripts/tpcds/build/2_load_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/2_load_web_site.sql b/run/trino-420/scripts/tpcds/build/2_load_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/2_load_web_site.sql rename to run/trino-420/scripts/tpcds/build/2_load_web_site.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_call_center.sql b/run/trino-420/scripts/tpcds/build/3_analyze_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_call_center.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_call_center.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_page.sql b/run/trino-420/scripts/tpcds/build/3_analyze_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_page.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_returns.sql b/run/trino-420/scripts/tpcds/build/3_analyze_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_returns.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_sales.sql b/run/trino-420/scripts/tpcds/build/3_analyze_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_sales.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_customer.sql b/run/trino-420/scripts/tpcds/build/3_analyze_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_customer.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_customer.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_address.sql b/run/trino-420/scripts/tpcds/build/3_analyze_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_address.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_demographics.sql b/run/trino-420/scripts/tpcds/build/3_analyze_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_demographics.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_date_dim.sql b/run/trino-420/scripts/tpcds/build/3_analyze_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_date_dim.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_household_demographics.sql b/run/trino-420/scripts/tpcds/build/3_analyze_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_household_demographics.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_income_band.sql b/run/trino-420/scripts/tpcds/build/3_analyze_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_income_band.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_income_band.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_inventory.sql b/run/trino-420/scripts/tpcds/build/3_analyze_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_inventory.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_inventory.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_item.sql b/run/trino-420/scripts/tpcds/build/3_analyze_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_item.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_item.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_promotion.sql b/run/trino-420/scripts/tpcds/build/3_analyze_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_promotion.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_promotion.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_reason.sql b/run/trino-420/scripts/tpcds/build/3_analyze_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_reason.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_reason.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_ship_mode.sql b/run/trino-420/scripts/tpcds/build/3_analyze_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_ship_mode.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_store.sql b/run/trino-420/scripts/tpcds/build/3_analyze_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_store.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_store.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_store_returns.sql b/run/trino-420/scripts/tpcds/build/3_analyze_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_store_returns.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_store_sales.sql b/run/trino-420/scripts/tpcds/build/3_analyze_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_store_sales.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_time_dim.sql b/run/trino-420/scripts/tpcds/build/3_analyze_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_time_dim.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_warehouse.sql b/run/trino-420/scripts/tpcds/build/3_analyze_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_warehouse.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_web_page.sql b/run/trino-420/scripts/tpcds/build/3_analyze_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_web_page.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_web_page.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_web_returns.sql b/run/trino-420/scripts/tpcds/build/3_analyze_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_web_returns.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_web_sales.sql b/run/trino-420/scripts/tpcds/build/3_analyze_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_web_sales.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/build/trino/3_analyze_web_site.sql b/run/trino-420/scripts/tpcds/build/3_analyze_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/build/trino/3_analyze_web_site.sql rename to run/trino-420/scripts/tpcds/build/3_analyze_web_site.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/DF_CS.sql b/run/trino-420/scripts/tpcds/data_maintenance/DF_CS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/DF_CS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/DF_CS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/DF_I.sql b/run/trino-420/scripts/tpcds/data_maintenance/DF_I.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/DF_I.sql rename to run/trino-420/scripts/tpcds/data_maintenance/DF_I.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/DF_SS.sql b/run/trino-420/scripts/tpcds/data_maintenance/DF_SS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/DF_SS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/DF_SS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/DF_WS.sql b/run/trino-420/scripts/tpcds/data_maintenance/DF_WS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/DF_WS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/DF_WS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CR.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_CR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CR.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_CR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CS.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_CS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_CS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_I.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_I.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_I.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_I.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SR.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_SR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SR.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_SR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SS.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_SS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_SS.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WR.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_WR.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WR.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_WR.sql diff --git a/src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WS.sql b/run/trino-420/scripts/tpcds/data_maintenance/LF_WS.sql similarity index 100% rename from src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WS.sql rename to run/trino-420/scripts/tpcds/data_maintenance/LF_WS.sql diff --git a/src/main/resources/scripts/tpcds/init/trino/init.sql b/run/trino-420/scripts/tpcds/init/init.sql similarity index 100% rename from src/main/resources/scripts/tpcds/init/trino/init.sql rename to run/trino-420/scripts/tpcds/init/init.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_call_center.sql b/run/trino-420/scripts/tpcds/optimize/o_call_center.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_call_center.sql rename to run/trino-420/scripts/tpcds/optimize/o_call_center.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_catalog_page.sql b/run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_catalog_page.sql rename to run/trino-420/scripts/tpcds/optimize/o_catalog_page.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_catalog_returns.sql b/run/trino-420/scripts/tpcds/optimize/o_catalog_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_catalog_returns.sql rename to run/trino-420/scripts/tpcds/optimize/o_catalog_returns.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_catalog_sales.sql b/run/trino-420/scripts/tpcds/optimize/o_catalog_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_catalog_sales.sql rename to run/trino-420/scripts/tpcds/optimize/o_catalog_sales.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_customer.sql b/run/trino-420/scripts/tpcds/optimize/o_customer.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_customer.sql rename to run/trino-420/scripts/tpcds/optimize/o_customer.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_customer_address.sql b/run/trino-420/scripts/tpcds/optimize/o_customer_address.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_customer_address.sql rename to run/trino-420/scripts/tpcds/optimize/o_customer_address.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_customer_demographics.sql b/run/trino-420/scripts/tpcds/optimize/o_customer_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_customer_demographics.sql rename to run/trino-420/scripts/tpcds/optimize/o_customer_demographics.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_date_dim.sql b/run/trino-420/scripts/tpcds/optimize/o_date_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_date_dim.sql rename to run/trino-420/scripts/tpcds/optimize/o_date_dim.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_household_demographics.sql b/run/trino-420/scripts/tpcds/optimize/o_household_demographics.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_household_demographics.sql rename to run/trino-420/scripts/tpcds/optimize/o_household_demographics.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_income_band.sql b/run/trino-420/scripts/tpcds/optimize/o_income_band.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_income_band.sql rename to run/trino-420/scripts/tpcds/optimize/o_income_band.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_inventory.sql b/run/trino-420/scripts/tpcds/optimize/o_inventory.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_inventory.sql rename to run/trino-420/scripts/tpcds/optimize/o_inventory.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_item.sql b/run/trino-420/scripts/tpcds/optimize/o_item.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_item.sql rename to run/trino-420/scripts/tpcds/optimize/o_item.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_promotion.sql b/run/trino-420/scripts/tpcds/optimize/o_promotion.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_promotion.sql rename to run/trino-420/scripts/tpcds/optimize/o_promotion.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_reason.sql b/run/trino-420/scripts/tpcds/optimize/o_reason.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_reason.sql rename to run/trino-420/scripts/tpcds/optimize/o_reason.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_ship_mode.sql b/run/trino-420/scripts/tpcds/optimize/o_ship_mode.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_ship_mode.sql rename to run/trino-420/scripts/tpcds/optimize/o_ship_mode.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_store.sql b/run/trino-420/scripts/tpcds/optimize/o_store.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_store.sql rename to run/trino-420/scripts/tpcds/optimize/o_store.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_store_returns.sql b/run/trino-420/scripts/tpcds/optimize/o_store_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_store_returns.sql rename to run/trino-420/scripts/tpcds/optimize/o_store_returns.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_store_sales.sql b/run/trino-420/scripts/tpcds/optimize/o_store_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_store_sales.sql rename to run/trino-420/scripts/tpcds/optimize/o_store_sales.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_time_dim.sql b/run/trino-420/scripts/tpcds/optimize/o_time_dim.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_time_dim.sql rename to run/trino-420/scripts/tpcds/optimize/o_time_dim.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_warehouse.sql b/run/trino-420/scripts/tpcds/optimize/o_warehouse.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_warehouse.sql rename to run/trino-420/scripts/tpcds/optimize/o_warehouse.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_web_page.sql b/run/trino-420/scripts/tpcds/optimize/o_web_page.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_web_page.sql rename to run/trino-420/scripts/tpcds/optimize/o_web_page.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_web_returns.sql b/run/trino-420/scripts/tpcds/optimize/o_web_returns.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_web_returns.sql rename to run/trino-420/scripts/tpcds/optimize/o_web_returns.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_web_sales.sql b/run/trino-420/scripts/tpcds/optimize/o_web_sales.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_web_sales.sql rename to run/trino-420/scripts/tpcds/optimize/o_web_sales.sql diff --git a/src/main/resources/scripts/tpcds/optimize/trino/o_web_site.sql b/run/trino-420/scripts/tpcds/optimize/o_web_site.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize/trino/o_web_site.sql rename to run/trino-420/scripts/tpcds/optimize/o_web_site.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_catalog_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_inventory_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_inventory_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_inventory_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_inventory_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_inventory_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_inventory_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_returns_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_returns_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_sales_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_sales_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_store_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_returns_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_returns_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_returns_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_IN.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_IN.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_IN.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_sales_IN.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_NULL.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_NULL.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_NULL.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_sales_NULL.sql diff --git a/src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_SELECT.sql b/run/trino-420/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql similarity index 100% rename from src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_SELECT.sql rename to run/trino-420/scripts/tpcds/optimize_split/o_web_sales_SELECT.sql diff --git a/src/main/resources/scripts/tpcds/setup/trino/ddl-external-tables.sql b/run/trino-420/scripts/tpcds/setup/ddl-external-tables.sql similarity index 100% rename from src/main/resources/scripts/tpcds/setup/trino/ddl-external-tables.sql rename to run/trino-420/scripts/tpcds/setup/ddl-external-tables.sql diff --git a/src/main/resources/scripts/tpcds/setup_data_maintenance/trino/ddl-external-tables-refresh.sql b/run/trino-420/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql similarity index 100% rename from src/main/resources/scripts/tpcds/setup_data_maintenance/trino/ddl-external-tables-refresh.sql rename to run/trino-420/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query1.sql b/run/trino-420/scripts/tpcds/single_user/query1.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query1.sql rename to run/trino-420/scripts/tpcds/single_user/query1.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query10.sql b/run/trino-420/scripts/tpcds/single_user/query10.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query10.sql rename to run/trino-420/scripts/tpcds/single_user/query10.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query11.sql b/run/trino-420/scripts/tpcds/single_user/query11.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query11.sql rename to run/trino-420/scripts/tpcds/single_user/query11.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query12.sql b/run/trino-420/scripts/tpcds/single_user/query12.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query12.sql rename to run/trino-420/scripts/tpcds/single_user/query12.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query13.sql b/run/trino-420/scripts/tpcds/single_user/query13.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query13.sql rename to run/trino-420/scripts/tpcds/single_user/query13.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query14.sql b/run/trino-420/scripts/tpcds/single_user/query14.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query14.sql rename to run/trino-420/scripts/tpcds/single_user/query14.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query15.sql b/run/trino-420/scripts/tpcds/single_user/query15.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query15.sql rename to run/trino-420/scripts/tpcds/single_user/query15.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query16.sql b/run/trino-420/scripts/tpcds/single_user/query16.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query16.sql rename to run/trino-420/scripts/tpcds/single_user/query16.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query17.sql b/run/trino-420/scripts/tpcds/single_user/query17.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query17.sql rename to run/trino-420/scripts/tpcds/single_user/query17.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query18.sql b/run/trino-420/scripts/tpcds/single_user/query18.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query18.sql rename to run/trino-420/scripts/tpcds/single_user/query18.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query19.sql b/run/trino-420/scripts/tpcds/single_user/query19.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query19.sql rename to run/trino-420/scripts/tpcds/single_user/query19.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query2.sql b/run/trino-420/scripts/tpcds/single_user/query2.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query2.sql rename to run/trino-420/scripts/tpcds/single_user/query2.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query20.sql b/run/trino-420/scripts/tpcds/single_user/query20.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query20.sql rename to run/trino-420/scripts/tpcds/single_user/query20.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query21.sql b/run/trino-420/scripts/tpcds/single_user/query21.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query21.sql rename to run/trino-420/scripts/tpcds/single_user/query21.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query22.sql b/run/trino-420/scripts/tpcds/single_user/query22.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query22.sql rename to run/trino-420/scripts/tpcds/single_user/query22.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query23.sql b/run/trino-420/scripts/tpcds/single_user/query23.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query23.sql rename to run/trino-420/scripts/tpcds/single_user/query23.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query24.sql b/run/trino-420/scripts/tpcds/single_user/query24.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query24.sql rename to run/trino-420/scripts/tpcds/single_user/query24.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query25.sql b/run/trino-420/scripts/tpcds/single_user/query25.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query25.sql rename to run/trino-420/scripts/tpcds/single_user/query25.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query26.sql b/run/trino-420/scripts/tpcds/single_user/query26.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query26.sql rename to run/trino-420/scripts/tpcds/single_user/query26.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query27.sql b/run/trino-420/scripts/tpcds/single_user/query27.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query27.sql rename to run/trino-420/scripts/tpcds/single_user/query27.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query28.sql b/run/trino-420/scripts/tpcds/single_user/query28.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query28.sql rename to run/trino-420/scripts/tpcds/single_user/query28.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query29.sql b/run/trino-420/scripts/tpcds/single_user/query29.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query29.sql rename to run/trino-420/scripts/tpcds/single_user/query29.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query3.sql b/run/trino-420/scripts/tpcds/single_user/query3.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query3.sql rename to run/trino-420/scripts/tpcds/single_user/query3.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query30.sql b/run/trino-420/scripts/tpcds/single_user/query30.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query30.sql rename to run/trino-420/scripts/tpcds/single_user/query30.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query31.sql b/run/trino-420/scripts/tpcds/single_user/query31.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query31.sql rename to run/trino-420/scripts/tpcds/single_user/query31.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query32.sql b/run/trino-420/scripts/tpcds/single_user/query32.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query32.sql rename to run/trino-420/scripts/tpcds/single_user/query32.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query33.sql b/run/trino-420/scripts/tpcds/single_user/query33.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query33.sql rename to run/trino-420/scripts/tpcds/single_user/query33.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query34.sql b/run/trino-420/scripts/tpcds/single_user/query34.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query34.sql rename to run/trino-420/scripts/tpcds/single_user/query34.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query35.sql b/run/trino-420/scripts/tpcds/single_user/query35.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query35.sql rename to run/trino-420/scripts/tpcds/single_user/query35.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query36.sql b/run/trino-420/scripts/tpcds/single_user/query36.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query36.sql rename to run/trino-420/scripts/tpcds/single_user/query36.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query37.sql b/run/trino-420/scripts/tpcds/single_user/query37.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query37.sql rename to run/trino-420/scripts/tpcds/single_user/query37.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query38.sql b/run/trino-420/scripts/tpcds/single_user/query38.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query38.sql rename to run/trino-420/scripts/tpcds/single_user/query38.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query39.sql b/run/trino-420/scripts/tpcds/single_user/query39.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query39.sql rename to run/trino-420/scripts/tpcds/single_user/query39.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query4.sql b/run/trino-420/scripts/tpcds/single_user/query4.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query4.sql rename to run/trino-420/scripts/tpcds/single_user/query4.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query40.sql b/run/trino-420/scripts/tpcds/single_user/query40.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query40.sql rename to run/trino-420/scripts/tpcds/single_user/query40.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query41.sql b/run/trino-420/scripts/tpcds/single_user/query41.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query41.sql rename to run/trino-420/scripts/tpcds/single_user/query41.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query42.sql b/run/trino-420/scripts/tpcds/single_user/query42.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query42.sql rename to run/trino-420/scripts/tpcds/single_user/query42.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query43.sql b/run/trino-420/scripts/tpcds/single_user/query43.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query43.sql rename to run/trino-420/scripts/tpcds/single_user/query43.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query44.sql b/run/trino-420/scripts/tpcds/single_user/query44.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query44.sql rename to run/trino-420/scripts/tpcds/single_user/query44.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query45.sql b/run/trino-420/scripts/tpcds/single_user/query45.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query45.sql rename to run/trino-420/scripts/tpcds/single_user/query45.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query46.sql b/run/trino-420/scripts/tpcds/single_user/query46.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query46.sql rename to run/trino-420/scripts/tpcds/single_user/query46.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query47.sql b/run/trino-420/scripts/tpcds/single_user/query47.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query47.sql rename to run/trino-420/scripts/tpcds/single_user/query47.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query48.sql b/run/trino-420/scripts/tpcds/single_user/query48.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query48.sql rename to run/trino-420/scripts/tpcds/single_user/query48.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query49.sql b/run/trino-420/scripts/tpcds/single_user/query49.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query49.sql rename to run/trino-420/scripts/tpcds/single_user/query49.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query5.sql b/run/trino-420/scripts/tpcds/single_user/query5.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query5.sql rename to run/trino-420/scripts/tpcds/single_user/query5.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query50.sql b/run/trino-420/scripts/tpcds/single_user/query50.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query50.sql rename to run/trino-420/scripts/tpcds/single_user/query50.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query51.sql b/run/trino-420/scripts/tpcds/single_user/query51.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query51.sql rename to run/trino-420/scripts/tpcds/single_user/query51.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query52.sql b/run/trino-420/scripts/tpcds/single_user/query52.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query52.sql rename to run/trino-420/scripts/tpcds/single_user/query52.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query53.sql b/run/trino-420/scripts/tpcds/single_user/query53.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query53.sql rename to run/trino-420/scripts/tpcds/single_user/query53.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query54.sql b/run/trino-420/scripts/tpcds/single_user/query54.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query54.sql rename to run/trino-420/scripts/tpcds/single_user/query54.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query55.sql b/run/trino-420/scripts/tpcds/single_user/query55.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query55.sql rename to run/trino-420/scripts/tpcds/single_user/query55.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query56.sql b/run/trino-420/scripts/tpcds/single_user/query56.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query56.sql rename to run/trino-420/scripts/tpcds/single_user/query56.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query57.sql b/run/trino-420/scripts/tpcds/single_user/query57.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query57.sql rename to run/trino-420/scripts/tpcds/single_user/query57.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query58.sql b/run/trino-420/scripts/tpcds/single_user/query58.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query58.sql rename to run/trino-420/scripts/tpcds/single_user/query58.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query59.sql b/run/trino-420/scripts/tpcds/single_user/query59.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query59.sql rename to run/trino-420/scripts/tpcds/single_user/query59.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query6.sql b/run/trino-420/scripts/tpcds/single_user/query6.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query6.sql rename to run/trino-420/scripts/tpcds/single_user/query6.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query60.sql b/run/trino-420/scripts/tpcds/single_user/query60.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query60.sql rename to run/trino-420/scripts/tpcds/single_user/query60.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query61.sql b/run/trino-420/scripts/tpcds/single_user/query61.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query61.sql rename to run/trino-420/scripts/tpcds/single_user/query61.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query62.sql b/run/trino-420/scripts/tpcds/single_user/query62.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query62.sql rename to run/trino-420/scripts/tpcds/single_user/query62.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query63.sql b/run/trino-420/scripts/tpcds/single_user/query63.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query63.sql rename to run/trino-420/scripts/tpcds/single_user/query63.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query64.sql b/run/trino-420/scripts/tpcds/single_user/query64.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query64.sql rename to run/trino-420/scripts/tpcds/single_user/query64.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query65.sql b/run/trino-420/scripts/tpcds/single_user/query65.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query65.sql rename to run/trino-420/scripts/tpcds/single_user/query65.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query66.sql b/run/trino-420/scripts/tpcds/single_user/query66.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query66.sql rename to run/trino-420/scripts/tpcds/single_user/query66.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query67.sql b/run/trino-420/scripts/tpcds/single_user/query67.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query67.sql rename to run/trino-420/scripts/tpcds/single_user/query67.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query68.sql b/run/trino-420/scripts/tpcds/single_user/query68.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query68.sql rename to run/trino-420/scripts/tpcds/single_user/query68.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query69.sql b/run/trino-420/scripts/tpcds/single_user/query69.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query69.sql rename to run/trino-420/scripts/tpcds/single_user/query69.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query7.sql b/run/trino-420/scripts/tpcds/single_user/query7.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query7.sql rename to run/trino-420/scripts/tpcds/single_user/query7.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query70.sql b/run/trino-420/scripts/tpcds/single_user/query70.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query70.sql rename to run/trino-420/scripts/tpcds/single_user/query70.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query71.sql b/run/trino-420/scripts/tpcds/single_user/query71.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query71.sql rename to run/trino-420/scripts/tpcds/single_user/query71.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query72.sql b/run/trino-420/scripts/tpcds/single_user/query72.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query72.sql rename to run/trino-420/scripts/tpcds/single_user/query72.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query73.sql b/run/trino-420/scripts/tpcds/single_user/query73.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query73.sql rename to run/trino-420/scripts/tpcds/single_user/query73.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query74.sql b/run/trino-420/scripts/tpcds/single_user/query74.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query74.sql rename to run/trino-420/scripts/tpcds/single_user/query74.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query75.sql b/run/trino-420/scripts/tpcds/single_user/query75.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query75.sql rename to run/trino-420/scripts/tpcds/single_user/query75.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query76.sql b/run/trino-420/scripts/tpcds/single_user/query76.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query76.sql rename to run/trino-420/scripts/tpcds/single_user/query76.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query77.sql b/run/trino-420/scripts/tpcds/single_user/query77.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query77.sql rename to run/trino-420/scripts/tpcds/single_user/query77.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query78.sql b/run/trino-420/scripts/tpcds/single_user/query78.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query78.sql rename to run/trino-420/scripts/tpcds/single_user/query78.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query79.sql b/run/trino-420/scripts/tpcds/single_user/query79.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query79.sql rename to run/trino-420/scripts/tpcds/single_user/query79.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query8.sql b/run/trino-420/scripts/tpcds/single_user/query8.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query8.sql rename to run/trino-420/scripts/tpcds/single_user/query8.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query80.sql b/run/trino-420/scripts/tpcds/single_user/query80.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query80.sql rename to run/trino-420/scripts/tpcds/single_user/query80.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query81.sql b/run/trino-420/scripts/tpcds/single_user/query81.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query81.sql rename to run/trino-420/scripts/tpcds/single_user/query81.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query82.sql b/run/trino-420/scripts/tpcds/single_user/query82.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query82.sql rename to run/trino-420/scripts/tpcds/single_user/query82.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query83.sql b/run/trino-420/scripts/tpcds/single_user/query83.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query83.sql rename to run/trino-420/scripts/tpcds/single_user/query83.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query84.sql b/run/trino-420/scripts/tpcds/single_user/query84.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query84.sql rename to run/trino-420/scripts/tpcds/single_user/query84.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query85.sql b/run/trino-420/scripts/tpcds/single_user/query85.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query85.sql rename to run/trino-420/scripts/tpcds/single_user/query85.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query86.sql b/run/trino-420/scripts/tpcds/single_user/query86.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query86.sql rename to run/trino-420/scripts/tpcds/single_user/query86.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query87.sql b/run/trino-420/scripts/tpcds/single_user/query87.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query87.sql rename to run/trino-420/scripts/tpcds/single_user/query87.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query88.sql b/run/trino-420/scripts/tpcds/single_user/query88.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query88.sql rename to run/trino-420/scripts/tpcds/single_user/query88.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query89.sql b/run/trino-420/scripts/tpcds/single_user/query89.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query89.sql rename to run/trino-420/scripts/tpcds/single_user/query89.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query9.sql b/run/trino-420/scripts/tpcds/single_user/query9.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query9.sql rename to run/trino-420/scripts/tpcds/single_user/query9.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query90.sql b/run/trino-420/scripts/tpcds/single_user/query90.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query90.sql rename to run/trino-420/scripts/tpcds/single_user/query90.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query91.sql b/run/trino-420/scripts/tpcds/single_user/query91.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query91.sql rename to run/trino-420/scripts/tpcds/single_user/query91.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query92.sql b/run/trino-420/scripts/tpcds/single_user/query92.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query92.sql rename to run/trino-420/scripts/tpcds/single_user/query92.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query93.sql b/run/trino-420/scripts/tpcds/single_user/query93.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query93.sql rename to run/trino-420/scripts/tpcds/single_user/query93.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query94.sql b/run/trino-420/scripts/tpcds/single_user/query94.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query94.sql rename to run/trino-420/scripts/tpcds/single_user/query94.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query95.sql b/run/trino-420/scripts/tpcds/single_user/query95.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query95.sql rename to run/trino-420/scripts/tpcds/single_user/query95.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query96.sql b/run/trino-420/scripts/tpcds/single_user/query96.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query96.sql rename to run/trino-420/scripts/tpcds/single_user/query96.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query97.sql b/run/trino-420/scripts/tpcds/single_user/query97.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query97.sql rename to run/trino-420/scripts/tpcds/single_user/query97.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query98.sql b/run/trino-420/scripts/tpcds/single_user/query98.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query98.sql rename to run/trino-420/scripts/tpcds/single_user/query98.sql diff --git a/src/main/resources/scripts/tpcds/single_user/trino/query99.sql b/run/trino-420/scripts/tpcds/single_user/query99.sql similarity index 100% rename from src/main/resources/scripts/tpcds/single_user/trino/query99.sql rename to run/trino-420/scripts/tpcds/single_user/query99.sql diff --git a/src/main/java/com/microsoft/lst_bench/Driver.java b/src/main/java/com/microsoft/lst_bench/Driver.java index caf4b7f5..f9464462 100644 --- a/src/main/java/com/microsoft/lst_bench/Driver.java +++ b/src/main/java/com/microsoft/lst_bench/Driver.java @@ -20,7 +20,7 @@ import com.microsoft.lst_bench.common.BenchmarkRunnable; import com.microsoft.lst_bench.common.LSTBenchmarkExecutor; import com.microsoft.lst_bench.input.BenchmarkObjectFactory; -import com.microsoft.lst_bench.input.TaskLibrary; +import com.microsoft.lst_bench.input.Library; import com.microsoft.lst_bench.input.Workload; import com.microsoft.lst_bench.input.config.ConnectionConfig; import com.microsoft.lst_bench.input.config.ConnectionsConfig; @@ -50,7 +50,7 @@ public class Driver { private static final Logger LOGGER = LoggerFactory.getLogger(Driver.class); - private static final String OPT_INPUT_TASK_LIBRARY_FILE = "task-library"; + private static final String OPT_INPUT_LIBRARY_FILE = "library"; private static final String OPT_INPUT_WORKLOAD_FILE = "workload"; private static final String OPT_INPUT_CONNECTION_CONFIG_FILE = "connections-config"; private static final String OPT_INPUT_EXPERIMENT_CONFIG_FILE = "experiment-config"; @@ -61,7 +61,7 @@ private Driver() {} /** Main method. */ public static void main(String[] args) throws Exception { - String inputTaskLibraryFile = null; + String inputLibraryFile = null; String inputWorkloadFile = null; String inputConnectionsConfigFile = null; String inputExperimentConfigFile = null; @@ -75,8 +75,8 @@ public static void main(String[] args) throws Exception { if (cmd.getOptions().length == 0) { usageAndHelp(); } else { - if (cmd.hasOption(OPT_INPUT_TASK_LIBRARY_FILE)) { - inputTaskLibraryFile = cmd.getOptionValue(OPT_INPUT_TASK_LIBRARY_FILE); + if (cmd.hasOption(OPT_INPUT_LIBRARY_FILE)) { + inputLibraryFile = cmd.getOptionValue(OPT_INPUT_LIBRARY_FILE); } if (cmd.hasOption(OPT_INPUT_WORKLOAD_FILE)) { inputWorkloadFile = cmd.getOptionValue(OPT_INPUT_WORKLOAD_FILE); @@ -97,14 +97,14 @@ public static void main(String[] args) throws Exception { } // Validate input values - Validate.notNull(inputTaskLibraryFile, "TaskExec library file is required."); + Validate.notNull(inputLibraryFile, "Library file is required."); Validate.notNull(inputWorkloadFile, "Workload file is required."); Validate.notNull(inputConnectionsConfigFile, "Connections config file is required."); Validate.notNull(inputExperimentConfigFile, "Experiment config file is required."); Validate.notNull(inputTelemetryConfigFile, "Telemetry config file is required."); // Create Java objects from input files - final TaskLibrary taskLibrary = FileParser.loadTaskLibrary(inputTaskLibraryFile); + final Library library = FileParser.loadLibrary(inputLibraryFile); final Workload workload = FileParser.loadWorkload(inputWorkloadFile); final ConnectionsConfig connectionsConfig = FileParser.loadConnectionsConfig(inputConnectionsConfigFile); @@ -113,12 +113,12 @@ public static void main(String[] args) throws Exception { final TelemetryConfig telemetryConfig = FileParser.loadTelemetryConfig(inputTelemetryConfigFile); - run(taskLibrary, workload, connectionsConfig, experimentConfig, telemetryConfig); + run(library, workload, connectionsConfig, experimentConfig, telemetryConfig); } /** Run benchmark. */ public static void run( - TaskLibrary taskLibrary, + Library library, Workload workload, ConnectionsConfig connectionsConfig, ExperimentConfig experimentConfig, @@ -151,7 +151,7 @@ public static void run( // Create experiment configuration final BenchmarkConfig benchmarkConfig = - BenchmarkObjectFactory.benchmarkConfig(experimentConfig, taskLibrary, workload); + BenchmarkObjectFactory.benchmarkConfig(experimentConfig, library, workload); // Run experiment final BenchmarkRunnable experiment = @@ -166,10 +166,10 @@ private static Options createOptions() { Option.builder() .required() .option("l") - .longOpt(OPT_INPUT_TASK_LIBRARY_FILE) + .longOpt(OPT_INPUT_LIBRARY_FILE) .hasArg() .argName("arg") - .desc("Path to input file containing the library with task templates") + .desc("Path to input file containing the library with templates") .build(); options.addOption(inputTaskLibraryFile); diff --git a/src/main/java/com/microsoft/lst_bench/client/JDBCConnection.java b/src/main/java/com/microsoft/lst_bench/client/JDBCConnection.java index 03ab71dc..a15e5f86 100644 --- a/src/main/java/com/microsoft/lst_bench/client/JDBCConnection.java +++ b/src/main/java/com/microsoft/lst_bench/client/JDBCConnection.java @@ -17,7 +17,10 @@ import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.SQLWarning; import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; import org.apache.commons.lang3.exception.ExceptionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,10 +32,12 @@ public class JDBCConnection implements Connection { private final java.sql.Connection connection; private final int maxNumRetries; + private final boolean showWarnings; - public JDBCConnection(java.sql.Connection connection, int maxNumRetries) { + public JDBCConnection(java.sql.Connection connection, int maxNumRetries, boolean showWarnings) { this.connection = connection; this.maxNumRetries = maxNumRetries; + this.showWarnings = showWarnings; } @Override @@ -49,9 +54,12 @@ private QueryResult execute(String sqlText, boolean ignoreResults) throws Client QueryResult queryResult = null; int errorCount = 0; - // Retry count is in addition to the 1 default try, thus '<='. - while (errorCount <= this.maxNumRetries) { - try (Statement s = connection.createStatement()) { + // Infinite retries if number of retries is set to '-1', otherwise retry count is in addition to + // the 1 default try, thus '<='. + while (this.maxNumRetries == -1 || errorCount <= this.maxNumRetries) { + Statement s = null; + try { + s = connection.createStatement(); boolean hasResults = s.execute(sqlText); if (hasResults) { ResultSet rs = s.getResultSet(); @@ -64,15 +72,31 @@ private QueryResult execute(String sqlText, boolean ignoreResults) throws Client queryResult.populate(rs); } } - // Return here if successful + // Log verbosely, if enabled. + if (this.showWarnings && LOGGER.isWarnEnabled()) { + LOGGER.warn( + createWarningString( + s, + /* prefix= */ errorCount > 0 + ? ("Retried query, error count: " + errorCount) + : "")); + } + // Return here if successful. return queryResult; } catch (Exception e) { queryResult = null; String lastErrorMsg = - "Query execution (" - + this.maxNumRetries - + " retries) unsuccessful; stack trace: " + "Query execution attempt " + + (errorCount + 1) + + " unsuccessful, will retry " + + (this.maxNumRetries - errorCount) + + " more times; " + + createWarningString(s, /* prefix= */ "") + + "stack trace: " + ExceptionUtils.getStackTrace(e); + + // Log execution error and any pending warnings associated with this statement, useful for + // debugging. if (errorCount == this.maxNumRetries) { LOGGER.error(lastErrorMsg); throw new ClientException(lastErrorMsg); @@ -80,6 +104,22 @@ private QueryResult execute(String sqlText, boolean ignoreResults) throws Client LOGGER.warn(lastErrorMsg); } errorCount++; + } finally { + if (s != null) { + try { + s.close(); + } catch (Exception e) { + String closingError = "Error when closing statement."; + // Only throw error if it has not been thrown in the try block to avoid overwriting the + // error. + if (errorCount != this.maxNumRetries) { + LOGGER.error(closingError); + throw new ClientException("Error when closing statement."); + } else { + LOGGER.warn(closingError); + } + } + } } } // Return here if max retries reached without success @@ -94,4 +134,23 @@ public void close() throws ClientException { throw new ClientException(e); } } + + private String createWarningString(Statement s, String prefix) throws ClientException { + List warningList = new ArrayList<>(); + + if (s != null) { + SQLWarning warning; + try { + warning = s.getWarnings(); + while (warning != null) { + warningList.add(warning.getMessage()); + warning = warning.getNextWarning(); + } + } catch (SQLException e) { + throw new ClientException(e.getMessage()); + } + } + + return prefix + ";" + String.join("; ", warningList); + } } diff --git a/src/main/java/com/microsoft/lst_bench/client/JDBCConnectionManager.java b/src/main/java/com/microsoft/lst_bench/client/JDBCConnectionManager.java index ce2c8ec5..9f3bdf4b 100644 --- a/src/main/java/com/microsoft/lst_bench/client/JDBCConnectionManager.java +++ b/src/main/java/com/microsoft/lst_bench/client/JDBCConnectionManager.java @@ -24,15 +24,18 @@ public class JDBCConnectionManager implements ConnectionManager { private final String url; - private final int max_num_retries; + private final int maxNumRetries; + private final boolean showWarnings; @Nullable private final String username; @Nullable private final String password; - public JDBCConnectionManager(String url, int max_num_retries, String username, String password) { + public JDBCConnectionManager( + String url, int maxNumRetries, boolean showWarnings, String username, String password) { this.url = url; - this.max_num_retries = max_num_retries; + this.maxNumRetries = maxNumRetries; + this.showWarnings = showWarnings; this.username = username; this.password = password; } @@ -41,10 +44,13 @@ public JDBCConnectionManager(String url, int max_num_retries, String username, S public Connection createConnection() throws ClientException { try { if (StringUtils.isEmpty(username)) { - return new JDBCConnection(DriverManager.getConnection(url), this.max_num_retries); + return new JDBCConnection( + DriverManager.getConnection(url), this.maxNumRetries, this.showWarnings); } else { return new JDBCConnection( - DriverManager.getConnection(url, username, password), this.max_num_retries); + DriverManager.getConnection(url, username, password), + this.maxNumRetries, + this.showWarnings); } } catch (SQLException e) { throw new ClientException(e); diff --git a/src/main/java/com/microsoft/lst_bench/client/QueryResult.java b/src/main/java/com/microsoft/lst_bench/client/QueryResult.java index 5c49f935..e6e58cab 100644 --- a/src/main/java/com/microsoft/lst_bench/client/QueryResult.java +++ b/src/main/java/com/microsoft/lst_bench/client/QueryResult.java @@ -19,24 +19,32 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.Map.Entry; import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; /** - * Represents the query result of a query issued against a source. Query result entries should be - * mapped to column name -> list of column values. + * Represents the result of a query issued against a data source. If the query result contains a + * single column, the entries will be mapped to the column name. Otherwise, the entries will be + * mapped to a special key "multi_values_clause" which is used to represent a multi-column result. */ public class QueryResult { - private final Map> valueList; + private final List columnNames; + private final List columnTypes; + private final List> valueList; + private static final String MULTI_VALUES_KEY = "multi_values_clause"; private static final String RESULT = "Result"; public QueryResult() { - this.valueList = new HashMap<>(); + this(new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); + } + + QueryResult(List columnNames, List columnTypes, List> valueList) { + this.columnNames = columnNames; + this.columnTypes = columnTypes; + this.valueList = valueList; } // TODO: Determine whether this can be done lazily i.e., after the statement has finished @@ -45,45 +53,70 @@ public void populate(ResultSet rs) throws SQLException { ResultSetMetaData rsmd = rs.getMetaData(); for (int j = 1; j <= rsmd.getColumnCount(); j++) { - valueList.put(rsmd.getColumnName(j), new ArrayList<>()); + columnNames.add(rsmd.getColumnName(j)); + columnTypes.add(rsmd.getColumnType(j)); + valueList.add(new ArrayList<>()); } while (rs.next()) { for (int j = 1; j <= rsmd.getColumnCount(); j++) { - valueList.get(rsmd.getColumnName(j)).add(rs.getObject(j)); + valueList.get(j - 1).add(rs.getObject(j)); } } } public Integer getValueListSize() { Integer size = null; - for (Entry> pair : valueList.entrySet()) { - size = pair.getValue().size(); + for (List values : valueList) { + size = values.size(); break; } return size; } public boolean containsEmptyResultColumnOnly() { - if (valueList.keySet().size() == 1 - && valueList.containsKey(RESULT) - && valueList.get(RESULT).size() == 0) { - return true; - } - return false; + return columnNames.size() == 1 + && columnNames.get(0).equals(RESULT) + && valueList.get(0).isEmpty(); } - public Map getStringMappings(int listMin, int listMax) { - Map result = new HashMap<>(); - for (String key : this.valueList.keySet()) { + public Pair getStringMappings(int listMin, int listMax) { + if (columnNames.size() == 1) { List localList = - this.valueList.get(key).subList(listMin, listMax).stream() - .map(s -> s.toString()) + valueList.get(0).subList(listMin, listMax).stream() + .map(Object::toString) + .map(s -> wrapString(s, columnTypes.get(0))) .collect(Collectors.toUnmodifiableList()); - // TODO: This assumes a VARCHAR type (or implicit casting by the engine), - // we should probably handle it more generically using data types. - result.put(key, "'" + String.join("','", localList) + "'"); + return Pair.of(columnNames.get(0), String.join(",", localList)); + } + StringBuilder multiValuesClause = new StringBuilder(); + for (int i = listMin; i < listMax; i++) { + multiValuesClause.append("("); + for (int j = 0; j < valueList.size(); j++) { + multiValuesClause + .append(wrapString(valueList.get(j).get(i).toString(), columnTypes.get(j))) + .append(","); + } + // Remove trailing comma + multiValuesClause.setLength(multiValuesClause.length() - 1); + multiValuesClause.append("),"); + } + // Remove trailing comma + multiValuesClause.setLength(multiValuesClause.length() - 1); + return Pair.of(MULTI_VALUES_KEY, multiValuesClause.toString()); + } + + private String wrapString(String value, int type) { + switch (type) { + case java.sql.Types.BIGINT: + case java.sql.Types.INTEGER: + case java.sql.Types.SMALLINT: + case java.sql.Types.TINYINT: + return value; + default: + // Currently assumes String for all other types. + // TODO: Better handling and testing of data types across engines. + return "'" + value + "'"; } - return result; } } diff --git a/src/main/java/com/microsoft/lst_bench/common/BenchmarkConfig.java b/src/main/java/com/microsoft/lst_bench/common/BenchmarkConfig.java index 41adca17..e4f1ecd5 100644 --- a/src/main/java/com/microsoft/lst_bench/common/BenchmarkConfig.java +++ b/src/main/java/com/microsoft/lst_bench/common/BenchmarkConfig.java @@ -26,19 +26,13 @@ public class BenchmarkConfig { private final String id; private final int repetitions; private final Map metadata; - private final Map arguments; private final WorkloadExec workload; public BenchmarkConfig( - String id, - int repetitions, - Map metadata, - Map arguments, - WorkloadExec workload) { + String id, int repetitions, Map metadata, WorkloadExec workload) { this.id = id; this.repetitions = repetitions; this.metadata = Collections.unmodifiableMap(metadata == null ? new HashMap<>() : metadata); - this.arguments = Collections.unmodifiableMap(arguments == null ? new HashMap<>() : arguments); this.workload = workload; } @@ -54,10 +48,6 @@ public Map getMetadata() { return metadata; } - public Map getArguments() { - return arguments; - } - public WorkloadExec getWorkload() { return workload; } diff --git a/src/main/java/com/microsoft/lst_bench/common/SessionExecutor.java b/src/main/java/com/microsoft/lst_bench/common/SessionExecutor.java index 657e6b73..ab018ad5 100644 --- a/src/main/java/com/microsoft/lst_bench/common/SessionExecutor.java +++ b/src/main/java/com/microsoft/lst_bench/common/SessionExecutor.java @@ -21,6 +21,7 @@ import com.microsoft.lst_bench.exec.SessionExec; import com.microsoft.lst_bench.exec.TaskExec; import com.microsoft.lst_bench.task.TaskExecutor; +import com.microsoft.lst_bench.task.util.TaskExecutorArguments; import com.microsoft.lst_bench.telemetry.EventInfo; import com.microsoft.lst_bench.telemetry.EventInfo.EventType; import com.microsoft.lst_bench.telemetry.EventInfo.Status; @@ -121,7 +122,8 @@ private TaskExecutor getTaskExecutor(TaskExec task) { try { Constructor constructor = Class.forName(task.getCustomTaskExecutor()) - .getDeclaredConstructor(SQLTelemetryRegistry.class, String.class, Map.class); + .getDeclaredConstructor( + SQLTelemetryRegistry.class, String.class, TaskExecutorArguments.class); return (TaskExecutor) constructor.newInstance( this.telemetryRegistry, this.experimentStartTime, task.getTaskExecutorArguments()); diff --git a/src/main/java/com/microsoft/lst_bench/exec/TaskExec.java b/src/main/java/com/microsoft/lst_bench/exec/TaskExec.java index 78846814..64e398cb 100644 --- a/src/main/java/com/microsoft/lst_bench/exec/TaskExec.java +++ b/src/main/java/com/microsoft/lst_bench/exec/TaskExec.java @@ -15,8 +15,8 @@ */ package com.microsoft.lst_bench.exec; +import com.microsoft.lst_bench.task.util.TaskExecutorArguments; import java.util.List; -import java.util.Map; import javax.annotation.Nullable; import org.immutables.value.Value; @@ -33,7 +33,7 @@ public interface TaskExec { @Nullable String getTimeTravelPhaseId(); @Value.Parameter(false) - @Nullable Map getTaskExecutorArguments(); + @Nullable TaskExecutorArguments getTaskExecutorArguments(); @Value.Parameter(false) @Nullable String getCustomTaskExecutor(); diff --git a/src/main/java/com/microsoft/lst_bench/input/BenchmarkObjectFactory.java b/src/main/java/com/microsoft/lst_bench/input/BenchmarkObjectFactory.java index a93d56a5..24bc2d18 100644 --- a/src/main/java/com/microsoft/lst_bench/input/BenchmarkObjectFactory.java +++ b/src/main/java/com/microsoft/lst_bench/input/BenchmarkObjectFactory.java @@ -32,9 +32,11 @@ import com.microsoft.lst_bench.input.config.JDBCConnectionConfig; import com.microsoft.lst_bench.input.config.SparkConnectionConfig; import com.microsoft.lst_bench.sql.SQLParser; +import com.microsoft.lst_bench.task.util.TaskExecutorArguments; import com.microsoft.lst_bench.util.FileParser; import com.microsoft.lst_bench.util.StringUtils; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -75,6 +77,7 @@ private static JDBCConnectionManager jdbcConnectionManager( return new JDBCConnectionManager( connectionConfig.getUrl(), connectionConfig.getMaxNumRetries(), + connectionConfig.showWarnings(), connectionConfig.getUsername(), connectionConfig.getPassword()); } @@ -89,126 +92,185 @@ private static SparkConnectionManager sparkConnectionManager( * workload. * * @param experimentConfig the experiment configuration - * @param taskLibrary the task library + * @param library the library * @param workload the workload * @return a benchmark configuration */ public static BenchmarkConfig benchmarkConfig( - ExperimentConfig experimentConfig, TaskLibrary taskLibrary, Workload workload) { - Map idToTaskTemplate = parseTaskLibrary(taskLibrary); + ExperimentConfig experimentConfig, Library library, Workload workload) { + InternalLibrary internalLibrary = parseLibrary(library); ImmutableWorkloadExec workloadExec = - createWorkloadExec(workload, idToTaskTemplate, experimentConfig); + createWorkloadExec(workload, internalLibrary, experimentConfig); return new BenchmarkConfig( experimentConfig.getId(), experimentConfig.getRepetitions(), experimentConfig.getMetadata(), - experimentConfig.getArguments(), workloadExec); } - /** - * Parses the task library to create a map of task templates with unique IDs. - * - * @param taskLibrary the task library to parse - * @return a map of task templates with unique IDs - * @throws IllegalArgumentException if there are duplicate task template IDs - */ - private static Map parseTaskLibrary(TaskLibrary taskLibrary) { - Map idToTaskTemplate = new HashMap<>(); - for (TaskTemplate taskTemplate : taskLibrary.getTaskTemplates()) { - if (idToTaskTemplate.containsKey(taskTemplate.getId())) { - throw new IllegalArgumentException("Duplicate task template id: " + taskTemplate.getId()); - } - idToTaskTemplate.put(taskTemplate.getId(), taskTemplate); - } - return idToTaskTemplate; - } - /** * Creates a workload execution from the workload and task library. * * @param workload the workload to execute - * @param idToTaskTemplate a map of task templates with unique IDs + * @param internalLibrary a library with task, session, and phase templates * @param experimentConfig the experiment configuration * @return a workload execution * @throws IllegalArgumentException if the workload contains an invalid task template ID */ private static ImmutableWorkloadExec createWorkloadExec( - Workload workload, - Map idToTaskTemplate, - ExperimentConfig experimentConfig) { + Workload workload, InternalLibrary internalLibrary, ExperimentConfig experimentConfig) { + List phases = workload.getPhases(); Map taskTemplateIdToPermuteOrderCounter = new HashMap<>(); Map taskTemplateIdToParameterValuesCounter = new HashMap<>(); - List phases = new ArrayList<>(); - for (Phase phase : workload.getPhases()) { + List phaseExecList = new ArrayList<>(); + for (Phase phase : phases) { PhaseExec phaseExec = createPhaseExec( phase, - idToTaskTemplate, + internalLibrary, experimentConfig, taskTemplateIdToPermuteOrderCounter, taskTemplateIdToParameterValuesCounter); - phases.add(phaseExec); + phaseExecList.add(phaseExec); } - return ImmutableWorkloadExec.of(workload.getId(), phases); + return ImmutableWorkloadExec.of(workload.getId(), phaseExecList); } private static PhaseExec createPhaseExec( Phase phase, - Map idToTaskTemplate, + InternalLibrary internalLibrary, ExperimentConfig experimentConfig, Map taskTemplateIdToPermuteOrderCounter, Map taskTemplateIdToParameterValuesCounter) { - List sessions = new ArrayList<>(); - for (int i = 0; i < phase.getSessions().size(); i++) { - Session session = phase.getSessions().get(i); + List sessions; + if (phase.getSessions() != null) { + sessions = phase.getSessions(); + } else if (phase.getTemplateId() != null) { + PhaseTemplate phaseTemplate = + internalLibrary.getIdToPhaseTemplate().get(phase.getTemplateId()); + if (phaseTemplate == null) { + throw new IllegalArgumentException("Unknown phase template id: " + phase.getTemplateId()); + } + sessions = phaseTemplate.getSessions(); + } else { + throw new IllegalStateException("Unknown phase type"); + } + List sessionExecList = new ArrayList<>(); + for (int i = 0; i < sessions.size(); i++) { + Session session = sessions.get(i); String sessionId = String.valueOf(i); SessionExec sessionExec = createSessionExec( sessionId, session, - idToTaskTemplate, + internalLibrary, experimentConfig, taskTemplateIdToPermuteOrderCounter, taskTemplateIdToParameterValuesCounter); - sessions.add(sessionExec); + sessionExecList.add(sessionExec); } - return ImmutablePhaseExec.of(phase.getId(), sessions); + return ImmutablePhaseExec.of(phase.getId(), sessionExecList); } private static SessionExec createSessionExec( String sessionId, Session session, - Map idToTaskTemplate, + InternalLibrary internalLibrary, ExperimentConfig experimentConfig, Map taskTemplateIdToPermuteOrderCounter, Map taskTemplateIdToParameterValuesCounter) { - List tasks = new ArrayList<>(); - for (int j = 0; j < session.getTasks().size(); j++) { - Task task = session.getTasks().get(j); + List tasks = getTasksFromSession(session, internalLibrary); + List taskExecList = new ArrayList<>(); + for (int j = 0; j < tasks.size(); j++) { + Task task = tasks.get(j); String taskId = task.getTemplateId() + "_" + j; TaskExec taskExec = createTaskExec( taskId, task, - idToTaskTemplate, + internalLibrary, experimentConfig, taskTemplateIdToPermuteOrderCounter, taskTemplateIdToParameterValuesCounter); - tasks.add(taskExec); + taskExecList.add(taskExec); } return ImmutableSessionExec.of( - sessionId, tasks, ObjectUtils.defaultIfNull(session.getTargetEndpoint(), 0)); + sessionId, taskExecList, ObjectUtils.defaultIfNull(session.getTargetEndpoint(), 0)); + } + + private static List getTasksFromSession(Session session, InternalLibrary internalLibrary) { + if (session.getTasks() != null) { + return session.getTasks(); + } else if (session.getTemplateId() != null) { + return getTasksFromSessionTemplate(session.getTemplateId(), internalLibrary); + } else if (session.getTasksSequences() != null) { + return expandTasksSequences(session.getTasksSequences(), internalLibrary); + } + throw new IllegalStateException("Unknown session type"); + } + + private static List getTasksFromSessionTemplate( + String templateId, InternalLibrary internalLibrary) { + SessionTemplate sessionTemplate = internalLibrary.getIdToSessionTemplate().get(templateId); + if (sessionTemplate == null) { + throw new IllegalArgumentException("Unknown session template id: " + templateId); + } + if (sessionTemplate.getTasks() != null) { + return sessionTemplate.getTasks(); + } else if (sessionTemplate.getTasksSequences() != null) { + return expandTasksSequences(sessionTemplate.getTasksSequences(), internalLibrary); + } + throw new IllegalStateException("Unknown session type"); + } + + /** + * Expands tasks sequences into a list of tasks. TODO: Nested sequences. + * + * @param tasksSequences the tasks sequences to expand + * @param internalLibrary a library with task, session, and phase templates + * @return a list of tasks with tasks sequences expanded + * @throws IllegalArgumentException if a task references an unknown tasks sequence ID + */ + private static List expandTasksSequences( + List tasksSequences, InternalLibrary internalLibrary) { + List expandedTasks = new ArrayList<>(); + for (TasksSequence tasksSequence : tasksSequences) { + if (tasksSequence.getPreparedTasksSequenceId() != null) { + TasksSequence preparedTasksSequence = + internalLibrary.getIdToTasksSequence().get(tasksSequence.getPreparedTasksSequenceId()); + if (preparedTasksSequence == null) { + throw new IllegalArgumentException( + "Unknown prepared tasks sequence id: " + tasksSequence.getPreparedTasksSequenceId()); + } + expandedTasks.addAll(preparedTasksSequence.getTasks()); + } else { + expandedTasks.addAll(tasksSequence.getTasks()); + } + } + return Collections.unmodifiableList(expandedTasks); } private static TaskExec createTaskExec( String taskId, Task task, - Map idToTaskTemplate, + InternalLibrary internalLibrary, ExperimentConfig experimentConfig, Map taskTemplateIdToPermuteOrderCounter, Map taskTemplateIdToParameterValuesCounter) { - TaskTemplate taskTemplate = idToTaskTemplate.get(task.getTemplateId()); + if (task.getPreparedTaskId() != null) { + Task preparedTask = internalLibrary.getIdToPrepatedTask().get(task.getPreparedTaskId()); + if (preparedTask == null) { + throw new IllegalArgumentException("Unknown prepared task id: " + task.getPreparedTaskId()); + } + return createTaskExec( + taskId, + preparedTask, + internalLibrary, + experimentConfig, + taskTemplateIdToPermuteOrderCounter, + taskTemplateIdToParameterValuesCounter); + } + TaskTemplate taskTemplate = internalLibrary.getIdToTaskTemplate().get(task.getTemplateId()); if (taskTemplate == null) { throw new IllegalArgumentException("Unknown task template id: " + task.getTemplateId()); } @@ -219,10 +281,18 @@ private static TaskExec createTaskExec( experimentConfig, taskTemplateIdToPermuteOrderCounter, taskTemplateIdToParameterValuesCounter); + + // TODO: Figure out whether we should turn this into a class variable to avoid recomputation. + // Allow the use of globally (via the experiment config defined) arguments to be parsed into the + // tasks. + TaskExecutorArguments taskExecutorArguments = + new TaskExecutorArguments(experimentConfig.getTaskExecutorArguments()); + taskExecutorArguments.addArguments(task.getTaskExecutorArguments()); + return ImmutableTaskExec.of(taskId, files) .withTimeTravelPhaseId(task.getTimeTravelPhaseId()) .withCustomTaskExecutor(taskTemplate.getCustomTaskExecutor()) - .withTaskExecutorArguments(task.getTaskExecutorArguments()); + .withTaskExecutorArguments(taskExecutorArguments); } private static List createFileExecList( @@ -311,4 +381,107 @@ private static List applyParameterValues( .map(f -> StringUtils.replaceParameters(f, parameterValues)) .collect(Collectors.toList()); } + + /** + * Parses the library. + * + * @param library the library to parse + * @return the library internal representation + * @throws IllegalArgumentException if there are duplicate IDs + */ + private static InternalLibrary parseLibrary(Library library) { + Map idToTaskTemplate = new HashMap<>(); + for (TaskTemplate taskTemplate : library.getTaskTemplates()) { + if (idToTaskTemplate.containsKey(taskTemplate.getId())) { + throw new IllegalArgumentException("Duplicate task template id: " + taskTemplate.getId()); + } + idToTaskTemplate.put(taskTemplate.getId(), taskTemplate); + } + Map idToSessionTemplate = new HashMap<>(); + if (library.getSessionTemplates() != null) { + for (SessionTemplate sessionTemplate : library.getSessionTemplates()) { + if (idToSessionTemplate.containsKey(sessionTemplate.getId())) { + throw new IllegalArgumentException( + "Duplicate session template id: " + sessionTemplate.getId()); + } + idToSessionTemplate.put(sessionTemplate.getId(), sessionTemplate); + } + } + Map idToPhaseTemplate = new HashMap<>(); + if (library.getPhaseTemplates() != null) { + for (PhaseTemplate phaseTemplate : library.getPhaseTemplates()) { + if (idToPhaseTemplate.containsKey(phaseTemplate.getId())) { + throw new IllegalArgumentException( + "Duplicate phase template id: " + phaseTemplate.getId()); + } + idToPhaseTemplate.put(phaseTemplate.getId(), phaseTemplate); + } + } + Map idToPreparedTask = new HashMap<>(); + if (library.getPreparedTasks() != null) { + for (Task preparedTask : library.getPreparedTasks()) { + if (idToPreparedTask.containsKey(preparedTask.getId())) { + throw new IllegalArgumentException("Duplicate prepared task id: " + preparedTask.getId()); + } + idToPreparedTask.put(preparedTask.getId(), preparedTask); + } + } + Map idToPreparedTasksSequence = new HashMap<>(); + if (library.getPreparedTasksSequences() != null) { + for (TasksSequence preparedTasksSequence : library.getPreparedTasksSequences()) { + if (idToPreparedTasksSequence.containsKey(preparedTasksSequence.getId())) { + throw new IllegalArgumentException( + "Duplicate prepared tasks sequence id: " + preparedTasksSequence.getId()); + } + idToPreparedTasksSequence.put(preparedTasksSequence.getId(), preparedTasksSequence); + } + } + return new InternalLibrary( + idToTaskTemplate, + idToSessionTemplate, + idToPhaseTemplate, + idToPreparedTask, + idToPreparedTasksSequence); + } + + private static class InternalLibrary { + private final Map idToTaskTemplate; + private final Map idToSessionTemplate; + private final Map idToPhaseTemplate; + private final Map idToPreparedTask; + private final Map idToTasksSequence; + + InternalLibrary( + Map idToTaskTemplate, + Map idToSessionTemplate, + Map idToPhaseTemplate, + Map idToPreparedTask, + Map idToTasksSequence) { + this.idToTaskTemplate = Collections.unmodifiableMap(idToTaskTemplate); + this.idToSessionTemplate = Collections.unmodifiableMap(idToSessionTemplate); + this.idToPhaseTemplate = Collections.unmodifiableMap(idToPhaseTemplate); + this.idToPreparedTask = Collections.unmodifiableMap(idToPreparedTask); + this.idToTasksSequence = Collections.unmodifiableMap(idToTasksSequence); + } + + private Map getIdToTaskTemplate() { + return idToTaskTemplate; + } + + private Map getIdToSessionTemplate() { + return idToSessionTemplate; + } + + private Map getIdToPhaseTemplate() { + return idToPhaseTemplate; + } + + private Map getIdToPrepatedTask() { + return idToPreparedTask; + } + + private Map getIdToTasksSequence() { + return idToTasksSequence; + } + } } diff --git a/src/main/java/com/microsoft/lst_bench/input/Library.java b/src/main/java/com/microsoft/lst_bench/input/Library.java new file mode 100644 index 00000000..537363fa --- /dev/null +++ b/src/main/java/com/microsoft/lst_bench/input/Library.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.input; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; +import javax.annotation.Nullable; +import org.immutables.value.Value; + +/** + * Represents an input task library containing task templates that can be instantiated to create + * tasks. + */ +@Value.Immutable +@Value.Style(jdkOnly = true) +@JsonSerialize(as = ImmutableLibrary.class) +@JsonDeserialize(as = ImmutableLibrary.class) +@JsonInclude(JsonInclude.Include.NON_NULL) +public interface Library { + int getVersion(); + + @JsonProperty("task_templates") + List getTaskTemplates(); + + @JsonProperty("session_templates") + @Nullable List getSessionTemplates(); + + @JsonProperty("phase_templates") + @Nullable List getPhaseTemplates(); + + @JsonProperty("prepared_tasks") + @Nullable List getPreparedTasks(); + + @JsonProperty("prepared_tasks_sequences") + @Nullable List getPreparedTasksSequences(); +} diff --git a/src/main/java/com/microsoft/lst_bench/input/Phase.java b/src/main/java/com/microsoft/lst_bench/input/Phase.java index 6600f93e..b48c71a4 100644 --- a/src/main/java/com/microsoft/lst_bench/input/Phase.java +++ b/src/main/java/com/microsoft/lst_bench/input/Phase.java @@ -16,9 +16,11 @@ package com.microsoft.lst_bench.input; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import java.util.List; +import javax.annotation.Nullable; import org.immutables.value.Value; /** POJO class meant to be used to deserialize an input phase. */ @@ -30,5 +32,18 @@ public interface Phase { String getId(); - List getSessions(); + @JsonProperty("template_id") + @Nullable String getTemplateId(); + + @Nullable List getSessions(); + + /** Validates that a phase has exactly one of template ID or list of sessions defined. */ + @Value.Check + default void check() { + boolean onlyOneTrue = getTemplateId() != null ^ getSessions() != null; + if (!onlyOneTrue) { + throw new IllegalStateException( + "Must have exactly one of template id or list of sessions defined"); + } + } } diff --git a/src/main/java/com/microsoft/lst_bench/input/TaskLibrary.java b/src/main/java/com/microsoft/lst_bench/input/PhaseTemplate.java similarity index 71% rename from src/main/java/com/microsoft/lst_bench/input/TaskLibrary.java rename to src/main/java/com/microsoft/lst_bench/input/PhaseTemplate.java index eae1c9f1..34940ae6 100644 --- a/src/main/java/com/microsoft/lst_bench/input/TaskLibrary.java +++ b/src/main/java/com/microsoft/lst_bench/input/PhaseTemplate.java @@ -16,24 +16,22 @@ package com.microsoft.lst_bench.input; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import java.util.List; import org.immutables.value.Value; /** - * Represents an input task library containing task templates that can be instantiated to create - * tasks. + * A phase template is a template for a phase. Importantly, it references the sessions that are + * required to run the phase. */ @Value.Immutable @Value.Style(jdkOnly = true) -@JsonSerialize(as = ImmutableTaskLibrary.class) -@JsonDeserialize(as = ImmutableTaskLibrary.class) +@JsonSerialize(as = ImmutablePhaseTemplate.class) +@JsonDeserialize(as = ImmutablePhaseTemplate.class) @JsonInclude(JsonInclude.Include.NON_NULL) -public interface TaskLibrary { - int getVersion(); +public interface PhaseTemplate { + String getId(); - @JsonProperty("task_templates") - List getTaskTemplates(); + List getSessions(); } diff --git a/src/main/java/com/microsoft/lst_bench/input/Session.java b/src/main/java/com/microsoft/lst_bench/input/Session.java index 59f1353d..14774027 100644 --- a/src/main/java/com/microsoft/lst_bench/input/Session.java +++ b/src/main/java/com/microsoft/lst_bench/input/Session.java @@ -30,8 +30,31 @@ @JsonDeserialize(as = ImmutableSession.class) @JsonInclude(JsonInclude.Include.NON_NULL) public interface Session { - List getTasks(); + + @JsonProperty("template_id") + @Nullable String getTemplateId(); + + @Nullable List getTasks(); + + @JsonProperty("tasks_sequences") + @Nullable List getTasksSequences(); @JsonProperty("target_endpoint") @Nullable Integer getTargetEndpoint(); + + /** + * Validates that a session has exactly one of template ID, list of tasks, or list of tasks + * sequences defined. + */ + @Value.Check + default void check() { + boolean onlyOneTrue = + (getTemplateId() != null && getTasks() == null && getTasksSequences() == null) + || (getTemplateId() == null && getTasks() != null && getTasksSequences() == null) + || (getTemplateId() == null && getTasks() == null && getTasksSequences() != null); + if (!onlyOneTrue) { + throw new IllegalStateException( + "Must have exactly one of template id, list of tasks, or list of tasks sequences defined"); + } + } } diff --git a/src/main/java/com/microsoft/lst_bench/input/SessionTemplate.java b/src/main/java/com/microsoft/lst_bench/input/SessionTemplate.java new file mode 100644 index 00000000..b860e1e8 --- /dev/null +++ b/src/main/java/com/microsoft/lst_bench/input/SessionTemplate.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.input; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; +import javax.annotation.Nullable; +import org.immutables.value.Value; + +/** + * A session template is a template for a session. Importantly, it references the tasks that are + * required to run the session. + */ +@Value.Immutable +@Value.Style(jdkOnly = true) +@JsonSerialize(as = ImmutableSessionTemplate.class) +@JsonDeserialize(as = ImmutableSessionTemplate.class) +@JsonInclude(JsonInclude.Include.NON_NULL) +public interface SessionTemplate { + String getId(); + + @Nullable List getTasks(); + + @JsonProperty("tasks_sequences") + @Nullable List getTasksSequences(); + + @Value.Check + default void check() { + boolean onlyOneTrue = getTasks() != null ^ getTasksSequences() != null; + if (!onlyOneTrue) { + throw new IllegalStateException( + "Must have exactly one of list of tasks sequences or list of tasks defined"); + } + } +} diff --git a/src/main/java/com/microsoft/lst_bench/input/Task.java b/src/main/java/com/microsoft/lst_bench/input/Task.java index 6023d83d..792ca75e 100644 --- a/src/main/java/com/microsoft/lst_bench/input/Task.java +++ b/src/main/java/com/microsoft/lst_bench/input/Task.java @@ -31,8 +31,15 @@ @JsonDeserialize(as = ImmutableTask.class) @JsonInclude(JsonInclude.Include.NON_NULL) public interface Task { + + @JsonProperty("id") + @Nullable String getId(); + + @JsonProperty("prepared_task_id") + @Nullable String getPreparedTaskId(); + @JsonProperty("template_id") - String getTemplateId(); + @Nullable String getTemplateId(); @JsonProperty("permute_order") @Nullable Boolean isPermuteOrder(); @@ -41,7 +48,7 @@ public interface Task { @Nullable String getTimeTravelPhaseId(); @JsonProperty("task_executor_arguments") - @Nullable Map getTaskExecutorArguments(); + @Nullable Map getTaskExecutorArguments(); @JsonProperty("replace_regex") @Nullable List getReplaceRegex(); @@ -54,4 +61,14 @@ interface ReplaceRegex { String getReplacement(); } + + /** Validates that a task has exactly one of template ID or prepared task ID defined. */ + @Value.Check + default void check() { + boolean onlyOneTrue = getTemplateId() != null ^ getPreparedTaskId() != null; + if (!onlyOneTrue) { + throw new IllegalStateException( + "Must have exactly one of template id or prepared task id defined"); + } + } } diff --git a/src/main/java/com/microsoft/lst_bench/input/TasksSequence.java b/src/main/java/com/microsoft/lst_bench/input/TasksSequence.java new file mode 100644 index 00000000..2f2400fa --- /dev/null +++ b/src/main/java/com/microsoft/lst_bench/input/TasksSequence.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.input; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; +import javax.annotation.Nullable; +import org.immutables.value.Value; + +/** Represents a sequence of tasks to be executed in order. */ +@Value.Immutable +@Value.Style(jdkOnly = true) +@JsonSerialize(as = ImmutableTasksSequence.class) +@JsonDeserialize(as = ImmutableTasksSequence.class) +@JsonInclude(JsonInclude.Include.NON_NULL) +public interface TasksSequence { + @JsonProperty("id") + @Nullable String getId(); + + @JsonProperty("prepared_tasks_sequence_id") + @Nullable String getPreparedTasksSequenceId(); + + @Nullable List getTasks(); + + @Value.Check + default void check() { + boolean onlyOneTrue = getPreparedTasksSequenceId() != null ^ getTasks() != null; + if (!onlyOneTrue) { + throw new IllegalStateException( + "Must have exactly one of prepared tasks sequence id or list of tasks defined"); + } + } +} diff --git a/src/main/java/com/microsoft/lst_bench/input/config/ExperimentConfig.java b/src/main/java/com/microsoft/lst_bench/input/config/ExperimentConfig.java index 930353d1..9b414a0e 100644 --- a/src/main/java/com/microsoft/lst_bench/input/config/ExperimentConfig.java +++ b/src/main/java/com/microsoft/lst_bench/input/config/ExperimentConfig.java @@ -41,6 +41,6 @@ public interface ExperimentConfig { @JsonProperty("parameter_values") @Nullable Map getParameterValues(); - @JsonProperty("arguments") - @Nullable Map getArguments(); + @JsonProperty("task_executor_arguments") + @Nullable Map getTaskExecutorArguments(); } diff --git a/src/main/java/com/microsoft/lst_bench/input/config/JDBCConnectionConfig.java b/src/main/java/com/microsoft/lst_bench/input/config/JDBCConnectionConfig.java index ae5f73e7..40d04668 100644 --- a/src/main/java/com/microsoft/lst_bench/input/config/JDBCConnectionConfig.java +++ b/src/main/java/com/microsoft/lst_bench/input/config/JDBCConnectionConfig.java @@ -37,6 +37,12 @@ default int getMaxNumRetries() { return 0; } + @JsonProperty("show_warnings") + @Value.Default + default boolean showWarnings() { + return false; + } + @Nullable String getUsername(); @Nullable String getPassword(); diff --git a/src/main/java/com/microsoft/lst_bench/task/TaskExecutor.java b/src/main/java/com/microsoft/lst_bench/task/TaskExecutor.java index 516ee8ef..998efa07 100644 --- a/src/main/java/com/microsoft/lst_bench/task/TaskExecutor.java +++ b/src/main/java/com/microsoft/lst_bench/task/TaskExecutor.java @@ -21,6 +21,7 @@ import com.microsoft.lst_bench.exec.FileExec; import com.microsoft.lst_bench.exec.StatementExec; import com.microsoft.lst_bench.exec.TaskExec; +import com.microsoft.lst_bench.task.util.TaskExecutorArguments; import com.microsoft.lst_bench.telemetry.EventInfo; import com.microsoft.lst_bench.telemetry.EventInfo.EventType; import com.microsoft.lst_bench.telemetry.EventInfo.Status; @@ -44,43 +45,17 @@ public class TaskExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(TaskExecutor.class); - private final String SKIP_ERRONEOUS_QUERY_DELIMITER = ";"; - private final String SKIP_ERRONEOUS_QUERY_STRINGS_KEY = "skip_erroneous_query_strings"; - protected final SQLTelemetryRegistry telemetryRegistry; protected final String experimentStartTime; - protected final Map arguments; - - protected final String[] exceptionStrings; + protected final TaskExecutorArguments arguments; public TaskExecutor( SQLTelemetryRegistry telemetryRegistry, String experimentStartTime, - Map arguments) { + TaskExecutorArguments arguments) { this.experimentStartTime = experimentStartTime; this.telemetryRegistry = telemetryRegistry; this.arguments = arguments; - this.exceptionStrings = getExceptionStrings(); - } - - protected Map getArguments() { - return this.arguments; - } - - private String[] getExceptionStrings() { - // Check whether there are any strings that errors are allowed to contain. In that case, we skip - // the erroneous query and log a warning. - String[] exceptionStrings; - if (this.getArguments() == null - || this.getArguments().get(SKIP_ERRONEOUS_QUERY_STRINGS_KEY) == null) { - exceptionStrings = new String[] {}; - } else { - exceptionStrings = - this.getArguments() - .get(SKIP_ERRONEOUS_QUERY_STRINGS_KEY) - .split(SKIP_ERRONEOUS_QUERY_DELIMITER); - } - return exceptionStrings; } public void executeTask(Connection connection, TaskExec task, Map values) @@ -106,50 +81,63 @@ protected final QueryResult executeStatement( Map values, boolean ignoreResults) throws ClientException { - boolean skip = false; + boolean execute = true; QueryResult queryResult = null; Instant statementStartTime = Instant.now(); - try { - if (ignoreResults) { - connection.execute(StringUtils.replaceParameters(statement, values).getStatement()); - } else { - queryResult = - connection.executeQuery( - StringUtils.replaceParameters(statement, values).getStatement()); - } - } catch (Exception e) { - String loggedError = - "Exception executing statement: " - + statement.getId() - + ", statement text: " - + statement.getStatement() - + "; error message: " - + e.getMessage(); - for (String skipException : exceptionStrings) { - if (e.getMessage().contains(skipException)) { - LOGGER.warn(loggedError); + + while (execute) { + try { + if (ignoreResults) { + connection.execute(StringUtils.replaceParameters(statement, values).getStatement()); + } else { + queryResult = + connection.executeQuery( + StringUtils.replaceParameters(statement, values).getStatement()); + } + execute = false; + writeStatementEvent( + statementStartTime, statement.getId(), Status.SUCCESS, /* payload= */ null); + + } catch (Exception e) { + String loggedError = + "Exception executing statement: " + + statement.getId() + + ", statement text: " + + statement.getStatement() + + "; error message: " + + e.getMessage(); + + if (containsException(e.getMessage(), this.arguments.getRetryExceptionStrings())) { + // If retry is specified, log a warning and continue. + LOGGER.warn("Query failed but retry mechanism is set: {}", loggedError); writeStatementEvent( statementStartTime, statement.getId(), Status.WARN, /* payload= */ loggedError); - - skip = true; - break; + continue; + } else if (containsException(e.getMessage(), this.arguments.getSkipExceptionStrings())) { + // If skip is specified, log a warning and stop query execution. + LOGGER.warn("Query failed but skip mechanism is set: " + loggedError); + writeStatementEvent( + statementStartTime, statement.getId(), Status.WARN, /* payload= */ loggedError); + execute = false; + } else { + LOGGER.error(loggedError); + writeStatementEvent( + statementStartTime, statement.getId(), Status.FAILURE, /* payload= */ loggedError); + throw e; } } + } - if (!skip) { - LOGGER.error(loggedError); - writeStatementEvent( - statementStartTime, statement.getId(), Status.FAILURE, /* payload= */ loggedError); + return queryResult; + } - throw e; + private boolean containsException(String message, String[] exceptionStrings) { + for (String exception : exceptionStrings) { + if (message.contains(exception)) { + return true; } } - // Only log success if we have not skipped execution. - if (!skip) { - writeStatementEvent( - statementStartTime, statement.getId(), Status.SUCCESS, /* payload= */ null); - } - return queryResult; + return false; } protected final EventInfo writeFileEvent(Instant startTime, String id, Status status) { diff --git a/src/main/java/com/microsoft/lst_bench/task/custom/DependentTaskExecutor.java b/src/main/java/com/microsoft/lst_bench/task/custom/DependentTaskExecutor.java index ac44611f..5cccd036 100644 --- a/src/main/java/com/microsoft/lst_bench/task/custom/DependentTaskExecutor.java +++ b/src/main/java/com/microsoft/lst_bench/task/custom/DependentTaskExecutor.java @@ -22,11 +22,14 @@ import com.microsoft.lst_bench.exec.StatementExec; import com.microsoft.lst_bench.exec.TaskExec; import com.microsoft.lst_bench.task.TaskExecutor; +import com.microsoft.lst_bench.task.util.TaskExecutorArguments; import com.microsoft.lst_bench.telemetry.EventInfo.Status; import com.microsoft.lst_bench.telemetry.SQLTelemetryRegistry; +import com.microsoft.lst_bench.util.TaskExecutorArgumentsParser; import java.time.Instant; import java.util.HashMap; import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,24 +46,22 @@ public class DependentTaskExecutor extends TaskExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(DependentTaskExecutor.class); - private final int DEFAULT_BATCH_SIZE = 1; - private final String DEPENDENT_TASK_BATCH_SIZE = "dependent_task_batch_size"; + private final DependentTaskExecutorArguments dependentArguments; public DependentTaskExecutor( SQLTelemetryRegistry telemetryRegistry, String experimentStartTime, - Map arguments) { + TaskExecutorArguments arguments) { super(telemetryRegistry, experimentStartTime, arguments); + dependentArguments = new DependentTaskExecutorArguments(arguments.getArguments()); } @Override public void executeTask(Connection connection, TaskExec task, Map values) throws ClientException { - int batchSize; - if (this.getArguments() == null || this.getArguments().get(DEPENDENT_TASK_BATCH_SIZE) == null) { - batchSize = DEFAULT_BATCH_SIZE; - } else { - batchSize = Integer.valueOf(this.getArguments().get(DEPENDENT_TASK_BATCH_SIZE)); + Integer batchSize = dependentArguments.getBatchSize(); + if (batchSize == null) { + throw new ClientException("Batch size needs to be set for dependent task execution."); } QueryResult queryResult = null; @@ -88,7 +89,8 @@ public void executeTask(Connection connection, TaskExec task, Map size ? size : (j + batchSize); Map localValues = new HashMap<>(values); - localValues.putAll(queryResult.getStringMappings(j, localMax)); + Pair batch = queryResult.getStringMappings(j, localMax); + localValues.put(batch.getKey(), batch.getValue()); executeStatement(connection, statement, localValues, true); } // Reset query result. @@ -102,4 +104,18 @@ public void executeTask(Connection connection, TaskExec task, Map arguments) { + super(arguments); + this.batchSize = TaskExecutorArgumentsParser.parseBatchSize(arguments); + } + + public Integer getBatchSize() { + return this.batchSize; + } + } } diff --git a/src/main/java/com/microsoft/lst_bench/task/util/TaskExecutorArguments.java b/src/main/java/com/microsoft/lst_bench/task/util/TaskExecutorArguments.java new file mode 100644 index 00000000..976a4778 --- /dev/null +++ b/src/main/java/com/microsoft/lst_bench/task/util/TaskExecutorArguments.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.task.util; + +import com.microsoft.lst_bench.util.TaskExecutorArgumentsParser; +import java.util.Map; +import java.util.stream.Stream; + +public class TaskExecutorArguments { + + private String[] retryExceptionStrings; + private String[] skipExceptionStrings; + private Map arguments; + + public TaskExecutorArguments(Map arguments) { + this.retryExceptionStrings = TaskExecutorArgumentsParser.parseRetryExceptionStrings(arguments); + this.skipExceptionStrings = TaskExecutorArgumentsParser.parseSkipExceptionStrings(arguments); + this.arguments = arguments; + } + + public String[] getRetryExceptionStrings() { + return this.retryExceptionStrings; + } + + public String[] getSkipExceptionStrings() { + return this.skipExceptionStrings; + } + + public Map getArguments() { + return this.arguments; + } + + // Added arguments are automatically appended if possible. + public void addArguments(Map arguments) { + if (arguments == null) { + return; + } else if (this.arguments == null) { + this.arguments = arguments; + } else { + this.arguments.putAll(arguments); + } + + this.retryExceptionStrings = + Stream.of( + this.getRetryExceptionStrings(), + TaskExecutorArgumentsParser.parseRetryExceptionStrings(arguments)) + .flatMap(Stream::of) + .toArray(String[]::new); + + this.skipExceptionStrings = + Stream.of( + this.getSkipExceptionStrings(), + TaskExecutorArgumentsParser.parseSkipExceptionStrings(arguments)) + .flatMap(Stream::of) + .toArray(String[]::new); + } +} diff --git a/src/main/java/com/microsoft/lst_bench/util/FileParser.java b/src/main/java/com/microsoft/lst_bench/util/FileParser.java old mode 100644 new mode 100755 index 757fd1c6..f6fb950a --- a/src/main/java/com/microsoft/lst_bench/util/FileParser.java +++ b/src/main/java/com/microsoft/lst_bench/util/FileParser.java @@ -18,7 +18,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; -import com.microsoft.lst_bench.input.TaskLibrary; +import com.microsoft.lst_bench.input.Library; import com.microsoft.lst_bench.input.Workload; import com.microsoft.lst_bench.input.config.ConnectionsConfig; import com.microsoft.lst_bench.input.config.ExperimentConfig; @@ -30,10 +30,10 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -45,15 +45,7 @@ public class FileParser { private static final ObjectMapper YAML_MAPPER = new YAMLMapper(); - private static final String SCHEMAS_PATH = - "src" - + File.separator - + "main" - + File.separator - + "resources" - + File.separator - + "schemas" - + File.separator; + private static final String SCHEMAS_PATH = "schemas" + File.separator; private FileParser() { // Defeat instantiation @@ -127,9 +119,13 @@ public static Map getParameterValues(String parameterValuesFile, /** * Reads the YAML file and replaces all environment variables (if present). Validates the YAML * file according to the schema. Creates and returns a `TaskLibrary` object. + * + *

Exports LIB_PATH for the directory of the file, so that the file contents can reference it + * as ${LIB_PATH}. */ - public static TaskLibrary loadTaskLibrary(String filePath) throws IOException { - return createObject(filePath, TaskLibrary.class, SCHEMAS_PATH + "task_library.json"); + public static Library loadLibrary(String filePath) throws IOException { + exportFilePath(filePath, "LIB_PATH"); + return createObject(filePath, Library.class, SCHEMAS_PATH + "library.json"); } /** @@ -137,6 +133,7 @@ public static TaskLibrary loadTaskLibrary(String filePath) throws IOException { * file according to the schema. Creates and returns a `Workload` object. */ public static Workload loadWorkload(String filePath) throws IOException { + exportFilePath(filePath, "WL_PATH"); return createObject(filePath, Workload.class, SCHEMAS_PATH + "workload.json"); } @@ -145,6 +142,7 @@ public static Workload loadWorkload(String filePath) throws IOException { * file according to the schema. Creates and returns a `ConnectionsConfig` object. */ public static ConnectionsConfig loadConnectionsConfig(String filePath) throws IOException { + exportFilePath(filePath, "CON_PATH"); return createObject( filePath, ConnectionsConfig.class, SCHEMAS_PATH + "connections_config.json"); } @@ -154,6 +152,7 @@ public static ConnectionsConfig loadConnectionsConfig(String filePath) throws IO * file according to the schema. Creates and returns a `ExperimentConfig` object. */ public static ExperimentConfig loadExperimentConfig(String filePath) throws IOException { + exportFilePath(filePath, "EXP_PATH"); return createObject(filePath, ExperimentConfig.class, SCHEMAS_PATH + "experiment_config.json"); } @@ -162,6 +161,7 @@ public static ExperimentConfig loadExperimentConfig(String filePath) throws IOEx * file according to the schema. Creates and returns a `TelemetryConfig` object. */ public static TelemetryConfig loadTelemetryConfig(String filePath) throws IOException { + exportFilePath(filePath, "TEL_PATH"); return createObject(filePath, TelemetryConfig.class, SCHEMAS_PATH + "telemetry_config.json"); } @@ -172,13 +172,31 @@ public static TelemetryConfig loadTelemetryConfig(String filePath) throws IOExce */ private static T createObject(String filePath, Class objectType, String schemaFilePath) throws IOException { - String resolvedYAMLContent = StringUtils.replaceEnvVars(new File(filePath)); + + // Verify that files exist + File file = new File(filePath); + if (!file.exists()) { + throw new IllegalArgumentException("File does not exist: " + filePath); + } + + InputStream schemaInputStream = + FileParser.class.getClassLoader().getResourceAsStream(schemaFilePath); + if (schemaInputStream == null) { + throw new IllegalArgumentException("Schema file does not exist: " + schemaFilePath); + } + + String resolvedYAMLContent = StringUtils.replaceEnvVars(file); + + if (resolvedYAMLContent == null) { + throw new IllegalArgumentException("Error resolving environment variables in YAML file"); + } + // Validate YAML file contents JsonSchemaFactory factory = JsonSchemaFactory.builder(JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012)) .objectMapper(YAML_MAPPER) .build(); - JsonSchema schema = factory.getSchema(Files.newInputStream(Paths.get(schemaFilePath))); + JsonSchema schema = factory.getSchema(schemaInputStream); JsonNode jsonNodeDirect = YAML_MAPPER.readTree(resolvedYAMLContent); Set errorsFromFile = schema.validate(jsonNodeDirect); if (!errorsFromFile.isEmpty()) { @@ -187,4 +205,11 @@ private static T createObject(String filePath, Class objectType, String s // Create and return POJO return YAML_MAPPER.treeToValue(jsonNodeDirect, objectType); } + + /** Exports the directory of the file as an environment variable. */ + private static void exportFilePath(String file, String variableName) { + File f = new File(file); + String directory = f.isDirectory() ? file : f.getParent(); + System.setProperty(variableName, directory); + } } diff --git a/src/main/java/com/microsoft/lst_bench/util/StringUtils.java b/src/main/java/com/microsoft/lst_bench/util/StringUtils.java index e81e21ff..eccddcf6 100644 --- a/src/main/java/com/microsoft/lst_bench/util/StringUtils.java +++ b/src/main/java/com/microsoft/lst_bench/util/StringUtils.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -92,9 +93,10 @@ public static FileExec replaceRegex(FileExec f, String regex, String replacement } /** - * Reads the contents of the `sourceFile` and replaces any environment variables if present. If - * the environment variable is not set, the default value is used if specified. All other - * parameters are ignored. + * Reads the contents of the `sourceFile` and replaces any environment variables and JVM + * properties if present. JVM properties take precedence over environment variables. If a + * environment variable is not set, the default value is used if specified. All other parameters + * are ignored. */ public static String replaceEnvVars(File sourceFile) throws IOException { if (sourceFile == null || !sourceFile.isFile()) { @@ -102,7 +104,9 @@ public static String replaceEnvVars(File sourceFile) throws IOException { LOGGER.debug("replaceEnvVars received a null or missing file."); return null; } - StringSubstitutor envSub = new StringSubstitutor(System.getenv()); + Map env = new HashMap<>(System.getenv()); + System.getProperties().forEach((k, v) -> env.put(k.toString(), v.toString())); + StringSubstitutor envSub = new StringSubstitutor(env); return envSub.replace(FileUtils.readFileToString(sourceFile, StandardCharsets.UTF_8)); } } diff --git a/src/main/java/com/microsoft/lst_bench/util/TaskExecutorArgumentsParser.java b/src/main/java/com/microsoft/lst_bench/util/TaskExecutorArgumentsParser.java new file mode 100644 index 00000000..ac931241 --- /dev/null +++ b/src/main/java/com/microsoft/lst_bench/util/TaskExecutorArgumentsParser.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.util; + +import java.util.Map; + +public class TaskExecutorArgumentsParser { + + private static final String RETRY_ERRONEOUS_QUERY_DELIMITER = ";"; + private static final String RETRY_ERRONEOUS_QUERY_STRINGS_KEY = "retry_erroneous_query_strings"; + private static final String SKIP_ERRONEOUS_QUERY_DELIMITER = ";"; + private static final String SKIP_ERRONEOUS_QUERY_STRINGS_KEY = "skip_erroneous_query_strings"; + private static final int DEFAULT_BATCH_SIZE = 1; + private static final String DEPENDENT_TASK_BATCH_SIZE = "dependent_task_batch_size"; + + public static String[] parseSkipExceptionStrings(Map arguments) { + // Check whether there are any strings that errors are allowed to contain. In that case, we skip + // the erroneous query and log a warning. + String[] exceptionStrings; + if (arguments == null || arguments.get(SKIP_ERRONEOUS_QUERY_STRINGS_KEY) == null) { + exceptionStrings = new String[] {}; + } else { + exceptionStrings = + arguments + .get(SKIP_ERRONEOUS_QUERY_STRINGS_KEY) + .toString() + .split(SKIP_ERRONEOUS_QUERY_DELIMITER); + } + return exceptionStrings; + } + + public static String[] parseRetryExceptionStrings(Map arguments) { + // Check whether there are any strings that tell us that we should continue to retry this query + // until successful. + String[] exceptionStrings; + if (arguments == null || arguments.get(RETRY_ERRONEOUS_QUERY_STRINGS_KEY) == null) { + exceptionStrings = new String[] {}; + } else { + exceptionStrings = + arguments + .get(RETRY_ERRONEOUS_QUERY_STRINGS_KEY) + .toString() + .split(RETRY_ERRONEOUS_QUERY_DELIMITER); + } + return exceptionStrings; + } + + public static Integer parseBatchSize(Map arguments) { + // Parses the batch size, currently used for dependent task execution. + Integer batchSize = null; + if (arguments == null || arguments.get(DEPENDENT_TASK_BATCH_SIZE) == null) { + batchSize = DEFAULT_BATCH_SIZE; + } else { + batchSize = Integer.valueOf(arguments.get(DEPENDENT_TASK_BATCH_SIZE).toString()); + } + return batchSize; + } +} diff --git a/src/main/resources/config/spark/sample_connections_config.yaml b/src/main/resources/config/spark/sample_connections_config.yaml index fc6551f6..60d9c4ae 100644 --- a/src/main/resources/config/spark/sample_connections_config.yaml +++ b/src/main/resources/config/spark/sample_connections_config.yaml @@ -6,6 +6,7 @@ connections: driver: org.apache.hive.jdbc.HiveDriver url: jdbc:hive2://127.0.0.1:10000 max_num_retries: 3 + show_warnings: true username: ${DATABASE_USER:-spark_admin} password: ${DATABASE_PASSWORD} - id: spark_1 diff --git a/src/main/resources/config/spark/tpcds/task_library.yaml b/src/main/resources/config/spark/tpcds/task_library.yaml deleted file mode 100644 index a90f126b..00000000 --- a/src/main/resources/config/spark/tpcds/task_library.yaml +++ /dev/null @@ -1,529 +0,0 @@ -# Description: Tasks Library ---- -version: 1 -task_templates: -# Create external tables needed for benchmark -- id: setup - files: - - src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql -# Create data maintenance external tables needed for benchmark -- id: setup_data_maintenance - files: - - src/main/resources/scripts/tpcds/setup_data_maintenance/spark/ddl-external-tables-refresh.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat -# Create schema and drop existing tables -- id: init - files: - - src/main/resources/scripts/tpcds/init/spark/init.sql -# Create benchmark tables and load data into them -- id: build - files: - - src/main/resources/scripts/tpcds/build/spark/1_create_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer_address.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_date_dim.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_household_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_income_band.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_inventory.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_item.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_promotion.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_reason.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_ship_mode.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_time_dim.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_warehouse.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_page.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_site.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer_address.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_date_dim.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_household_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_income_band.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_inventory.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_item.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_promotion.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_reason.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_ship_mode.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_time_dim.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_warehouse.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_page.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_site.sql -# Compute statistics for tables -- id: analyze - files: - - src/main/resources/scripts/tpcds/build/spark/3_analyze_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_customer.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_address.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_date_dim.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_household_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_income_band.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_inventory.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_item.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_promotion.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_reason.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_ship_mode.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_store.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_store_returns.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_store_sales.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_time_dim.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_warehouse.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_web_page.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_web_returns.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_web_sales.sql - - src/main/resources/scripts/tpcds/build/spark/3_analyze_web_site.sql -# Execution of TPC-DS queries (possibly in a previous point-in-time) -- id: single_user - files: - - src/main/resources/scripts/tpcds/single_user/spark/query1.sql - - src/main/resources/scripts/tpcds/single_user/spark/query2.sql - - src/main/resources/scripts/tpcds/single_user/spark/query3.sql - - src/main/resources/scripts/tpcds/single_user/spark/query4.sql - - src/main/resources/scripts/tpcds/single_user/spark/query5.sql - - src/main/resources/scripts/tpcds/single_user/spark/query6.sql - - src/main/resources/scripts/tpcds/single_user/spark/query7.sql - - src/main/resources/scripts/tpcds/single_user/spark/query8.sql - - src/main/resources/scripts/tpcds/single_user/spark/query9.sql - - src/main/resources/scripts/tpcds/single_user/spark/query10.sql - - src/main/resources/scripts/tpcds/single_user/spark/query11.sql - - src/main/resources/scripts/tpcds/single_user/spark/query12.sql - - src/main/resources/scripts/tpcds/single_user/spark/query13.sql - - src/main/resources/scripts/tpcds/single_user/spark/query14.sql - - src/main/resources/scripts/tpcds/single_user/spark/query15.sql - - src/main/resources/scripts/tpcds/single_user/spark/query16.sql - - src/main/resources/scripts/tpcds/single_user/spark/query17.sql - - src/main/resources/scripts/tpcds/single_user/spark/query18.sql - - src/main/resources/scripts/tpcds/single_user/spark/query19.sql - - src/main/resources/scripts/tpcds/single_user/spark/query20.sql - - src/main/resources/scripts/tpcds/single_user/spark/query21.sql - - src/main/resources/scripts/tpcds/single_user/spark/query22.sql - - src/main/resources/scripts/tpcds/single_user/spark/query23.sql - - src/main/resources/scripts/tpcds/single_user/spark/query24.sql - - src/main/resources/scripts/tpcds/single_user/spark/query25.sql - - src/main/resources/scripts/tpcds/single_user/spark/query26.sql - - src/main/resources/scripts/tpcds/single_user/spark/query27.sql - - src/main/resources/scripts/tpcds/single_user/spark/query28.sql - - src/main/resources/scripts/tpcds/single_user/spark/query29.sql - - src/main/resources/scripts/tpcds/single_user/spark/query30.sql - - src/main/resources/scripts/tpcds/single_user/spark/query31.sql - - src/main/resources/scripts/tpcds/single_user/spark/query32.sql - - src/main/resources/scripts/tpcds/single_user/spark/query33.sql - - src/main/resources/scripts/tpcds/single_user/spark/query34.sql - - src/main/resources/scripts/tpcds/single_user/spark/query35.sql - - src/main/resources/scripts/tpcds/single_user/spark/query36.sql - - src/main/resources/scripts/tpcds/single_user/spark/query37.sql - - src/main/resources/scripts/tpcds/single_user/spark/query38.sql - - src/main/resources/scripts/tpcds/single_user/spark/query39.sql - - src/main/resources/scripts/tpcds/single_user/spark/query40.sql - - src/main/resources/scripts/tpcds/single_user/spark/query41.sql - - src/main/resources/scripts/tpcds/single_user/spark/query42.sql - - src/main/resources/scripts/tpcds/single_user/spark/query43.sql - - src/main/resources/scripts/tpcds/single_user/spark/query44.sql - - src/main/resources/scripts/tpcds/single_user/spark/query45.sql - - src/main/resources/scripts/tpcds/single_user/spark/query46.sql - - src/main/resources/scripts/tpcds/single_user/spark/query47.sql - - src/main/resources/scripts/tpcds/single_user/spark/query48.sql - - src/main/resources/scripts/tpcds/single_user/spark/query49.sql - - src/main/resources/scripts/tpcds/single_user/spark/query50.sql - - src/main/resources/scripts/tpcds/single_user/spark/query51.sql - - src/main/resources/scripts/tpcds/single_user/spark/query52.sql - - src/main/resources/scripts/tpcds/single_user/spark/query53.sql - - src/main/resources/scripts/tpcds/single_user/spark/query54.sql - - src/main/resources/scripts/tpcds/single_user/spark/query55.sql - - src/main/resources/scripts/tpcds/single_user/spark/query56.sql - - src/main/resources/scripts/tpcds/single_user/spark/query57.sql - - src/main/resources/scripts/tpcds/single_user/spark/query58.sql - - src/main/resources/scripts/tpcds/single_user/spark/query59.sql - - src/main/resources/scripts/tpcds/single_user/spark/query60.sql - - src/main/resources/scripts/tpcds/single_user/spark/query61.sql - - src/main/resources/scripts/tpcds/single_user/spark/query62.sql - - src/main/resources/scripts/tpcds/single_user/spark/query63.sql - - src/main/resources/scripts/tpcds/single_user/spark/query64.sql - - src/main/resources/scripts/tpcds/single_user/spark/query65.sql - - src/main/resources/scripts/tpcds/single_user/spark/query66.sql - - src/main/resources/scripts/tpcds/single_user/spark/query67.sql - - src/main/resources/scripts/tpcds/single_user/spark/query68.sql - - src/main/resources/scripts/tpcds/single_user/spark/query69.sql - - src/main/resources/scripts/tpcds/single_user/spark/query70.sql - - src/main/resources/scripts/tpcds/single_user/spark/query71.sql - - src/main/resources/scripts/tpcds/single_user/spark/query72.sql - - src/main/resources/scripts/tpcds/single_user/spark/query73.sql - - src/main/resources/scripts/tpcds/single_user/spark/query74.sql - - src/main/resources/scripts/tpcds/single_user/spark/query75.sql - - src/main/resources/scripts/tpcds/single_user/spark/query76.sql - - src/main/resources/scripts/tpcds/single_user/spark/query77.sql - - src/main/resources/scripts/tpcds/single_user/spark/query78.sql - - src/main/resources/scripts/tpcds/single_user/spark/query79.sql - - src/main/resources/scripts/tpcds/single_user/spark/query80.sql - - src/main/resources/scripts/tpcds/single_user/spark/query81.sql - - src/main/resources/scripts/tpcds/single_user/spark/query82.sql - - src/main/resources/scripts/tpcds/single_user/spark/query83.sql - - src/main/resources/scripts/tpcds/single_user/spark/query84.sql - - src/main/resources/scripts/tpcds/single_user/spark/query85.sql - - src/main/resources/scripts/tpcds/single_user/spark/query86.sql - - src/main/resources/scripts/tpcds/single_user/spark/query87.sql - - src/main/resources/scripts/tpcds/single_user/spark/query88.sql - - src/main/resources/scripts/tpcds/single_user/spark/query89.sql - - src/main/resources/scripts/tpcds/single_user/spark/query90.sql - - src/main/resources/scripts/tpcds/single_user/spark/query91.sql - - src/main/resources/scripts/tpcds/single_user/spark/query92.sql - - src/main/resources/scripts/tpcds/single_user/spark/query93.sql - - src/main/resources/scripts/tpcds/single_user/spark/query94.sql - - src/main/resources/scripts/tpcds/single_user/spark/query95.sql - - src/main/resources/scripts/tpcds/single_user/spark/query96.sql - - src/main/resources/scripts/tpcds/single_user/spark/query97.sql - - src/main/resources/scripts/tpcds/single_user/spark/query98.sql - - src/main/resources/scripts/tpcds/single_user/spark/query99.sql - permutation_orders_path: src/main/resources/auxiliary/tpcds/single_user/permutation_orders/ - supports_time_travel: true -# Execution of TPC-DS data maintenance queries (Delta) -- id: data_maintenance_delta - files: - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-merge.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-merge.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-merge.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-merge.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WS.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat -# Execution of TPC-DS data maintenance queries (Iceberg) -- id: data_maintenance_iceberg - files: - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WS.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat -# Execution of TPC-DS data maintenance queries (Hudi) -- id: data_maintenance_hudi - files: - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_CS-mixed.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_I-mixed.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_SS-mixed.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/DF_WS-mixed.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WR.sql - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_WS.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat -# Execution of optimize on all benchmark tables (Delta) -- id: optimize_delta - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_inventory-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-delta.sql -# Execution of optimize on all benchmark tables but splitting optimization -# of partitioned tables into batches by relying on dependent task executor (Delta) -- id: optimize_split_delta - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-delta.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-delta.sql -# Execution of optimize on all benchmark tables (Iceberg) -- id: optimize_iceberg - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_inventory-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-iceberg.sql -# Execution of optimize on all benchmark tables but splitting optimization -# of partitioned tables into batches by relying on dependent task executor (Iceberg) -- id: optimize_split_iceberg - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-iceberg.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-iceberg.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-iceberg.sql -# Execution of optimize on all benchmark tables (Hudi) -- id: optimize_hudi - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_returns-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_sales-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_inventory-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_returns-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store_sales-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_returns-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_sales-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-hudi.sql -# Execution of optimize on all benchmark tables but splitting optimization -# of partitioned tables into batches by relying on dependent task executor (Hudi) -- id: optimize_split_hudi - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_call_center-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_catalog_page-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_sales_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_address-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_customer_demographics-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_date_dim-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_household_demographics-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_income_band-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_inventory_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_item-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_promotion-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_reason-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_store-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_returns_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_store_sales_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_time_dim-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_warehouse-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_page-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_returns_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_IN-hudi.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_web_sales_NULL-hudi.sql - - src/main/resources/scripts/tpcds/optimize/spark/o_web_site-hudi.sql -# Execution of dependent TPC-DS data maintenance queries -- id: data_maintenance_dependent - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor - files: - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CR_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_CS_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_I_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SR_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_SS_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WR_insert.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_1.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_2.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_3.sql - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/LF_WS_insert.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat \ No newline at end of file diff --git a/src/main/resources/config/spark/tpch/task_library.yaml b/src/main/resources/config/spark/tpch/task_library.yaml deleted file mode 100644 index 426d96dd..00000000 --- a/src/main/resources/config/spark/tpch/task_library.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Description: Tasks Library ---- -version: 1 -task_templates: -# Create external tables needed for benchmark -- id: setup - files: - - src/main/resources/scripts/tpch/setup/spark/ddl-external-tables.sql -# Create data maintenance external tables needed for benchmark -- id: setup_data_maintenance - files: - - src/main/resources/scripts/tpch/setup_data_maintenance/spark/ddl-external-tables-refresh.sql - parameter_values_file: src/main/resources/auxiliary/tpch/setup_data_maintenance/parameter_values.dat -# Create schema and drop existing tables -- id: init - files: - - src/main/resources/scripts/tpch/init/spark/init.sql -# Create benchmark tables and load data into them -- id: build - files: - - src/main/resources/scripts/tpch/build/spark/1_create_customer.sql - - src/main/resources/scripts/tpch/build/spark/1_create_lineitem.sql - - src/main/resources/scripts/tpch/build/spark/1_create_nation.sql - - src/main/resources/scripts/tpch/build/spark/1_create_orders.sql - - src/main/resources/scripts/tpch/build/spark/1_create_part.sql - - src/main/resources/scripts/tpch/build/spark/1_create_partsupp.sql - - src/main/resources/scripts/tpch/build/spark/1_create_region.sql - - src/main/resources/scripts/tpch/build/spark/1_create_supplier.sql - - src/main/resources/scripts/tpch/build/spark/2_load_customer.sql - - src/main/resources/scripts/tpch/build/spark/2_load_lineitem.sql - - src/main/resources/scripts/tpch/build/spark/2_load_nation.sql - - src/main/resources/scripts/tpch/build/spark/2_load_orders.sql - - src/main/resources/scripts/tpch/build/spark/2_load_part.sql - - src/main/resources/scripts/tpch/build/spark/2_load_partsupp.sql - - src/main/resources/scripts/tpch/build/spark/2_load_region.sql - - src/main/resources/scripts/tpch/build/spark/2_load_supplier.sql -# Compute statistics for tables -- id: analyze - files: - - src/main/resources/scripts/tpch/build/spark/3_analyze_customer.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_lineitem.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_nation.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_orders.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_part.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_partsupp.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_region.sql - - src/main/resources/scripts/tpch/build/spark/3_analyze_supplier.sql -# Execution of TPC-H queries -- id: single_user - files: - - src/main/resources/scripts/tpch/single_user/spark/query1.sql - - src/main/resources/scripts/tpch/single_user/spark/query2.sql - - src/main/resources/scripts/tpch/single_user/spark/query3.sql - - src/main/resources/scripts/tpch/single_user/spark/query4.sql - - src/main/resources/scripts/tpch/single_user/spark/query5.sql - - src/main/resources/scripts/tpch/single_user/spark/query6.sql - - src/main/resources/scripts/tpch/single_user/spark/query7.sql - - src/main/resources/scripts/tpch/single_user/spark/query8.sql - - src/main/resources/scripts/tpch/single_user/spark/query9.sql - - src/main/resources/scripts/tpch/single_user/spark/query10.sql - - src/main/resources/scripts/tpch/single_user/spark/query11.sql - - src/main/resources/scripts/tpch/single_user/spark/query12.sql - - src/main/resources/scripts/tpch/single_user/spark/query13.sql - - src/main/resources/scripts/tpch/single_user/spark/query14.sql - - src/main/resources/scripts/tpch/single_user/spark/query15.sql - - src/main/resources/scripts/tpch/single_user/spark/query16.sql - - src/main/resources/scripts/tpch/single_user/spark/query17.sql - - src/main/resources/scripts/tpch/single_user/spark/query18.sql - - src/main/resources/scripts/tpch/single_user/spark/query19.sql - - src/main/resources/scripts/tpch/single_user/spark/query20.sql - - src/main/resources/scripts/tpch/single_user/spark/query21.sql - - src/main/resources/scripts/tpch/single_user/spark/query22.sql -# Execution of RF1 TPC-H data maintenance queries -- id: data_maintenance_1 - files: - - src/main/resources/scripts/tpch/data_maintenance/spark/RF1.sql - parameter_values_file: src/main/resources/auxiliary/tpch/data_maintenance/parameter_values.dat -# Execution of RF2 TPC-H data maintenance queries (using MERGE) -- id: data_maintenance_2_merge - files: - - src/main/resources/scripts/tpch/data_maintenance/spark/RF2-merge.sql - parameter_values_file: src/main/resources/auxiliary/tpch/data_maintenance/parameter_values.dat -# Execution of RF2 TPC-H data maintenance queries (using DELETE) -- id: data_maintenance_2_delete - files: - - src/main/resources/scripts/tpch/data_maintenance/spark/RF2.sql - parameter_values_file: src/main/resources/auxiliary/tpch/data_maintenance/parameter_values.dat diff --git a/src/main/resources/config/trino/tpcds/task_library.yaml b/src/main/resources/config/trino/tpcds/task_library.yaml deleted file mode 100644 index 79c6047c..00000000 --- a/src/main/resources/config/trino/tpcds/task_library.yaml +++ /dev/null @@ -1,282 +0,0 @@ -# Description: Tasks Library ---- -version: 1 -task_templates: -# Create external tables needed for benchmark -- id: setup - files: - - src/main/resources/scripts/tpcds/setup/trino/ddl-external-tables.sql -# Create data maintenance external tables needed for benchmark -- id: setup_data_maintenance - files: - - src/main/resources/scripts/tpcds/setup_data_maintenance/trino/ddl-external-tables-refresh.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat -# Create schema and drop existing tables -- id: init - files: - - src/main/resources/scripts/tpcds/init/trino/init.sql -# Create benchmark tables and load data into them -- id: build - files: - - src/main/resources/scripts/tpcds/build/trino/1_create_call_center.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_catalog_page.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_customer.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_customer_address.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_date_dim.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_household_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_income_band.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_inventory.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_item.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_promotion.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_reason.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_ship_mode.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_store.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_store_returns.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_store_sales.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_time_dim.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_warehouse.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_web_page.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_web_returns.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_web_sales.sql - - src/main/resources/scripts/tpcds/build/trino/1_create_web_site.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_call_center.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_catalog_page.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_customer.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_customer_address.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_date_dim.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_household_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_income_band.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_inventory.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_item.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_promotion.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_reason.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_ship_mode.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_store.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_store_returns.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_store_sales.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_time_dim.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_warehouse.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_web_page.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_web_returns.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_web_sales.sql - - src/main/resources/scripts/tpcds/build/trino/2_load_web_site.sql -# Compute statistics for tables -- id: analyze - files: - - src/main/resources/scripts/tpcds/build/trino/3_analyze_call_center.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_page.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_customer.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_address.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_date_dim.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_household_demographics.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_income_band.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_inventory.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_item.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_promotion.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_reason.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_ship_mode.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_store.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_store_returns.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_store_sales.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_time_dim.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_warehouse.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_web_page.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_web_returns.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_web_sales.sql - - src/main/resources/scripts/tpcds/build/trino/3_analyze_web_site.sql -# Execution of TPC-DS queries -- id: single_user - files: - - src/main/resources/scripts/tpcds/single_user/trino/query1.sql - - src/main/resources/scripts/tpcds/single_user/trino/query2.sql - - src/main/resources/scripts/tpcds/single_user/trino/query3.sql - - src/main/resources/scripts/tpcds/single_user/trino/query4.sql - - src/main/resources/scripts/tpcds/single_user/trino/query5.sql - - src/main/resources/scripts/tpcds/single_user/trino/query6.sql - - src/main/resources/scripts/tpcds/single_user/trino/query7.sql - - src/main/resources/scripts/tpcds/single_user/trino/query8.sql - - src/main/resources/scripts/tpcds/single_user/trino/query9.sql - - src/main/resources/scripts/tpcds/single_user/trino/query10.sql - - src/main/resources/scripts/tpcds/single_user/trino/query11.sql - - src/main/resources/scripts/tpcds/single_user/trino/query12.sql - - src/main/resources/scripts/tpcds/single_user/trino/query13.sql - - src/main/resources/scripts/tpcds/single_user/trino/query14.sql - - src/main/resources/scripts/tpcds/single_user/trino/query15.sql - - src/main/resources/scripts/tpcds/single_user/trino/query16.sql - - src/main/resources/scripts/tpcds/single_user/trino/query17.sql - - src/main/resources/scripts/tpcds/single_user/trino/query18.sql - - src/main/resources/scripts/tpcds/single_user/trino/query19.sql - - src/main/resources/scripts/tpcds/single_user/trino/query20.sql - - src/main/resources/scripts/tpcds/single_user/trino/query21.sql - - src/main/resources/scripts/tpcds/single_user/trino/query22.sql - - src/main/resources/scripts/tpcds/single_user/trino/query23.sql - - src/main/resources/scripts/tpcds/single_user/trino/query24.sql - - src/main/resources/scripts/tpcds/single_user/trino/query25.sql - - src/main/resources/scripts/tpcds/single_user/trino/query26.sql - - src/main/resources/scripts/tpcds/single_user/trino/query27.sql - - src/main/resources/scripts/tpcds/single_user/trino/query28.sql - - src/main/resources/scripts/tpcds/single_user/trino/query29.sql - - src/main/resources/scripts/tpcds/single_user/trino/query30.sql - - src/main/resources/scripts/tpcds/single_user/trino/query31.sql - - src/main/resources/scripts/tpcds/single_user/trino/query32.sql - - src/main/resources/scripts/tpcds/single_user/trino/query33.sql - - src/main/resources/scripts/tpcds/single_user/trino/query34.sql - - src/main/resources/scripts/tpcds/single_user/trino/query35.sql - - src/main/resources/scripts/tpcds/single_user/trino/query36.sql - - src/main/resources/scripts/tpcds/single_user/trino/query37.sql - - src/main/resources/scripts/tpcds/single_user/trino/query38.sql - - src/main/resources/scripts/tpcds/single_user/trino/query39.sql - - src/main/resources/scripts/tpcds/single_user/trino/query40.sql - - src/main/resources/scripts/tpcds/single_user/trino/query41.sql - - src/main/resources/scripts/tpcds/single_user/trino/query42.sql - - src/main/resources/scripts/tpcds/single_user/trino/query43.sql - - src/main/resources/scripts/tpcds/single_user/trino/query44.sql - - src/main/resources/scripts/tpcds/single_user/trino/query45.sql - - src/main/resources/scripts/tpcds/single_user/trino/query46.sql - - src/main/resources/scripts/tpcds/single_user/trino/query47.sql - - src/main/resources/scripts/tpcds/single_user/trino/query48.sql - - src/main/resources/scripts/tpcds/single_user/trino/query49.sql - - src/main/resources/scripts/tpcds/single_user/trino/query50.sql - - src/main/resources/scripts/tpcds/single_user/trino/query51.sql - - src/main/resources/scripts/tpcds/single_user/trino/query52.sql - - src/main/resources/scripts/tpcds/single_user/trino/query53.sql - - src/main/resources/scripts/tpcds/single_user/trino/query54.sql - - src/main/resources/scripts/tpcds/single_user/trino/query55.sql - - src/main/resources/scripts/tpcds/single_user/trino/query56.sql - - src/main/resources/scripts/tpcds/single_user/trino/query57.sql - - src/main/resources/scripts/tpcds/single_user/trino/query58.sql - - src/main/resources/scripts/tpcds/single_user/trino/query59.sql - - src/main/resources/scripts/tpcds/single_user/trino/query60.sql - - src/main/resources/scripts/tpcds/single_user/trino/query61.sql - - src/main/resources/scripts/tpcds/single_user/trino/query62.sql - - src/main/resources/scripts/tpcds/single_user/trino/query63.sql - - src/main/resources/scripts/tpcds/single_user/trino/query64.sql - - src/main/resources/scripts/tpcds/single_user/trino/query65.sql - - src/main/resources/scripts/tpcds/single_user/trino/query66.sql - - src/main/resources/scripts/tpcds/single_user/trino/query67.sql - - src/main/resources/scripts/tpcds/single_user/trino/query68.sql - - src/main/resources/scripts/tpcds/single_user/trino/query69.sql - - src/main/resources/scripts/tpcds/single_user/trino/query70.sql - - src/main/resources/scripts/tpcds/single_user/trino/query71.sql - - src/main/resources/scripts/tpcds/single_user/trino/query72.sql - - src/main/resources/scripts/tpcds/single_user/trino/query73.sql - - src/main/resources/scripts/tpcds/single_user/trino/query74.sql - - src/main/resources/scripts/tpcds/single_user/trino/query75.sql - - src/main/resources/scripts/tpcds/single_user/trino/query76.sql - - src/main/resources/scripts/tpcds/single_user/trino/query77.sql - - src/main/resources/scripts/tpcds/single_user/trino/query78.sql - - src/main/resources/scripts/tpcds/single_user/trino/query79.sql - - src/main/resources/scripts/tpcds/single_user/trino/query80.sql - - src/main/resources/scripts/tpcds/single_user/trino/query81.sql - - src/main/resources/scripts/tpcds/single_user/trino/query82.sql - - src/main/resources/scripts/tpcds/single_user/trino/query83.sql - - src/main/resources/scripts/tpcds/single_user/trino/query84.sql - - src/main/resources/scripts/tpcds/single_user/trino/query85.sql - - src/main/resources/scripts/tpcds/single_user/trino/query86.sql - - src/main/resources/scripts/tpcds/single_user/trino/query87.sql - - src/main/resources/scripts/tpcds/single_user/trino/query88.sql - - src/main/resources/scripts/tpcds/single_user/trino/query89.sql - - src/main/resources/scripts/tpcds/single_user/trino/query90.sql - - src/main/resources/scripts/tpcds/single_user/trino/query91.sql - - src/main/resources/scripts/tpcds/single_user/trino/query92.sql - - src/main/resources/scripts/tpcds/single_user/trino/query93.sql - - src/main/resources/scripts/tpcds/single_user/trino/query94.sql - - src/main/resources/scripts/tpcds/single_user/trino/query95.sql - - src/main/resources/scripts/tpcds/single_user/trino/query96.sql - - src/main/resources/scripts/tpcds/single_user/trino/query97.sql - - src/main/resources/scripts/tpcds/single_user/trino/query98.sql - - src/main/resources/scripts/tpcds/single_user/trino/query99.sql - permutation_orders_path: src/main/resources/auxiliary/tpcds/single_user/permutation_orders/ -# Execution of TPC-DS data maintenance queries -- id: data_maintenance - files: - - src/main/resources/scripts/tpcds/data_maintenance/trino/DF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/DF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/DF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/DF_WS.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CR.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_CS.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_I.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SR.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_SS.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WR.sql - - src/main/resources/scripts/tpcds/data_maintenance/trino/LF_WS.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat -# Execution of optimize on all benchmark tables -- id: optimize - files: - - src/main/resources/scripts/tpcds/optimize/trino/o_call_center.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_catalog_page.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_catalog_returns.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_catalog_sales.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer_address.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer_demographics.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_date_dim.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_household_demographics.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_income_band.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_inventory.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_item.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_promotion.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_reason.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_ship_mode.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_store.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_store_returns.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_store_sales.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_time_dim.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_warehouse.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_page.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_returns.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_sales.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_site.sql -# Execution of optimize on all benchmark tables but splitting optimization -# of partitioned tables into batches by relying on dependent task executor -- id: optimize_split - files: - - src/main/resources/scripts/tpcds/optimize/trino/o_call_center.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_catalog_page.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_returns_NULL.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_catalog_sales_NULL.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer_address.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_customer_demographics.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_date_dim.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_household_demographics.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_income_band.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_inventory_NULL.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_item.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_promotion.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_reason.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_ship_mode.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_store.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_returns_NULL.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_store_sales_NULL.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_time_dim.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_warehouse.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_page.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_returns_NULL.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_IN.sql - - src/main/resources/scripts/tpcds/optimize_split/trino/o_web_sales_NULL.sql - - src/main/resources/scripts/tpcds/optimize/trino/o_web_site.sql diff --git a/src/main/resources/schemas/connections_config.json b/src/main/resources/schemas/connections_config.json index fe201224..1088a228 100644 --- a/src/main/resources/schemas/connections_config.json +++ b/src/main/resources/schemas/connections_config.json @@ -32,7 +32,11 @@ }, "max_num_retries": { "type": "integer", - "title": "The number of times a query can be retried (default: 0)" + "title": "The number of times a query can be retried (default: 0). If set to -1, it will retry until successful" + }, + "show_warnings": { + "type": "boolean", + "title": "Whether statements are logged in a verbose manner (may incur execution overhead). Set to 'false' by default." }, "username": { "type": "string", diff --git a/src/main/resources/schemas/experiment_config.json b/src/main/resources/schemas/experiment_config.json index 0854cf7c..5882ef18 100644 --- a/src/main/resources/schemas/experiment_config.json +++ b/src/main/resources/schemas/experiment_config.json @@ -25,6 +25,11 @@ "type": "object", "title": "Experiment parameter values", "description": "Map of parameter name-value pairs. When a parameter name is referenced in a SQL script used in the experiment, it will be replaced by its corresponding value" + }, + "task_executor_arguments": { + "type": "object", + "title": "Task executor arguments", + "description": "Any arguments passed to a (custom) task executor can be defined here. Arguments set here are defined globally for all tasks." } }, "additionalProperties": false diff --git a/src/main/resources/schemas/instance.json b/src/main/resources/schemas/instance.json new file mode 100644 index 00000000..97951115 --- /dev/null +++ b/src/main/resources/schemas/instance.json @@ -0,0 +1,189 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "title": "Workload entity instances", + "$defs" : { + "task": { + "type": "object", + "title": "Task", + "oneOf": [ + { + "type": "object", + "required": [ "prepared_task_id" ], + "properties": { + "prepared_task_id": { + "type": "string", + "title": "Identifier of a prepared task" + } + } + }, + { + "allOf": [ + { + "oneOf": [ + { + "$ref": "resource:/schemas/template.json#/$defs/task_template" + }, + { + "type": "object", + "required": [ "template_id" ], + "properties": { + "template_id": { + "type": "string", + "title": "Template identifier", + "description": "Identifier of the template that this task is based on" + } + } + } + ] + }, + { + "type": "object", + "properties": { + "permute_order": { + "type": "boolean", + "title": "Enable scripts order permutation", + "description": "If task template specified a permutation orders path, this property can be used to specify whether the permutation should be applied or not" + }, + "time_travel_phase_id": { + "type": "string", + "title": "Time travel phase identifier", + "description": "If the task template supports time travel, this property can be used to specify the version of the data that will be queried by this task" + }, + "task_executor_arguments": { + "type": "object", + "title": "Task executor arguments", + "description": "Any arguments passed to a (custom) task executor can be defined here. Arguments set here are defined locally for a single task." + }, + "replace_regex": { + "type": "array", + "title": "List of regex to match and replace", + "description": "List of regex to match and corresponding replacement in the SQL scripts associated with this task", + "items": { + "type": "object", + "title": "Regex to replace", + "required": [ "pattern", "replacement" ], + "properties": { + "pattern": { + "type": "string", + "title": "Regex pattern to match" + }, + "replacement": { + "type": "string", + "title": "Replacement for each match" + } + }, + "additionalProperties": false + } + } + } + } + ] + } + ] + }, + "tasks_sequence": { + "type": "object", + "title": "Tasks sequence", + "oneOf": [ + { + "type": "object", + "required": [ "prepared_tasks_sequence_id" ], + "properties": { + "prepared_tasks_sequence_id": { + "type": "string", + "title": "Identifier of a prepared tasks sequence" + } + } + }, + { + "type": "object", + "required": [ "tasks" ], + "properties": { + "tasks": { + "type": "array", + "title": "List of tasks", + "description": "List of tasks to be executed in sequence", + "items": { + "type": "object", + "allOf": [ + { + "$ref": "resource:/schemas/instance.json#/$defs/task" + } + ] + } + } + } + } + ] + }, + "session": { + "type": "object", + "title": "Session", + "allOf": [ + { + "oneOf": [ + { + "$ref": "resource:/schemas/template.json#/$defs/session_template" + }, + { + "type": "object", + "required": [ "template_id" ], + "properties": { + "template_id": { + "type": "string", + "title": "Template identifier", + "description": "Identifier of the template that this session is based on" + } + } + } + ] + }, + { + "type": "object", + "properties": { + "target_endpoint": { + "type": "integer", + "title": "Target endpoint index (default: 0)", + "description": "The positional index (starting from 0) of the connection manager within the connections configuration file" + } + } + } + ] + }, + "phase": { + "type": "object", + "title": "Phase", + "allOf": [ + { + "oneOf": [ + { + "$ref": "resource:/schemas/template.json#/$defs/phase_template" + }, + { + "type": "object", + "required": [ "template_id" ], + "properties": { + "template_id": { + "type": "string", + "title": "Template identifier", + "description": "Identifier of the template that this phase is based on" + } + } + } + ] + }, + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the phase" + } + } + } + ] + } + } +} \ No newline at end of file diff --git a/src/main/resources/schemas/library.json b/src/main/resources/schemas/library.json new file mode 100644 index 00000000..874d6247 --- /dev/null +++ b/src/main/resources/schemas/library.json @@ -0,0 +1,154 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "title": "Schema for library definition file", + "required": [ "version", "task_templates" ], + "properties": { + "version": { + "type": "integer", + "title": "File format version" + }, + "task_templates": { + "type": "array", + "title": "List of task templates", + "items": { + "$ref": "#/$defs/id_task_template" + }, + "additionalProperties": false + }, + "session_templates": { + "type": "array", + "title": "List of session templates", + "items": { + "$ref": "#/$defs/id_session_template" + }, + "additionalProperties": false + }, + "phase_templates": { + "type": "array", + "title": "List of phase templates", + "items": { + "$ref": "#/$defs/id_phase_template" + }, + "additionalProperties": false + }, + "prepared_tasks": { + "type": "array", + "title": "List of prepared tasks", + "items": { + "$ref": "#/$defs/id_prepared_task" + }, + "additionalProperties": false + }, + "prepared_tasks_sequences": { + "type": "array", + "title": "List of sequences of prepared tasks", + "items": { + "$ref": "#/$defs/id_tasks_sequence" + } + } + }, + "additionalProperties": false, + "$defs" : { + "id_task_template": { + "type": "object", + "title": "Task template with identifier", + "allOf": [ + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the task template" + } + } + }, + { + "$ref": "resource:/schemas/template.json#/$defs/task_template" + } + ], + "unevaluatedProperties": false + }, + "id_session_template": { + "type": "object", + "title": "Session template with identifier", + "allOf": [ + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the session template" + } + } + }, + { + "$ref": "resource:/schemas/template.json#/$defs/session_template" + } + ], + "unevaluatedProperties": false + }, + "id_phase_template": { + "type": "object", + "title": "Phase template with identifier", + "allOf": [ + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the phase template" + } + } + }, + { + "$ref": "resource:/schemas/template.json#/$defs/phase_template" + } + ], + "unevaluatedProperties": false + }, + "id_prepared_task": { + "type": "object", + "title": "Prepared task with identifier", + "allOf": [ + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the task" + } + } + }, + { + "$ref": "resource:/schemas/instance.json#/$defs/task" + } + ], + "unevaluatedProperties": false + }, + "id_tasks_sequence": { + "type": "object", + "title": "Tasks sequence with identifier", + "allOf": [ + { + "type": "object", + "required": [ "id" ], + "properties": { + "id": { + "type": "string", + "title": "Identifier for the tasks sequence" + } + } + }, + { + "$ref": "resource:/schemas/instance.json#/$defs/tasks_sequence" + } + ], + "unevaluatedProperties": false + } + } +} \ No newline at end of file diff --git a/src/main/resources/schemas/task_library.json b/src/main/resources/schemas/task_library.json deleted file mode 100644 index 89905176..00000000 --- a/src/main/resources/schemas/task_library.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object", - "title": "Schema for task library definition file", - "required": [ "version", "task_templates" ], - "properties": { - "version": { - "type": "integer", - "title": "File format version" - }, - "task_templates": { - "type": "array", - "title": "List of task templates", - "items": { - "type": "object", - "title": "Task template", - "required": [ "id", "files" ], - "properties": { - "id": { - "type": "string", - "title": "Identifier for the task template" - }, - "files": { - "type": "array", - "title": "SQL script file paths", - "description": "List of paths to SQL script files that will be executed as part of the task", - "items": { - "type": "string", - "title": "Path to SQL script file" - } - }, - "parameter_values_file": { - "type": "string", - "title": "Parameter values file path", - "description": "Path to CSV file containing parameter values. Concretely, the schema of the file consists of parameter names that can be referenced in a SQL script used in the task. Row $i$ contains the values that will be used to replace the parameter names in execution $i$ of a task based on this template during the experiment" - }, - "permutation_orders_path": { - "type": "string", - "title": "Permutation orders directory path", - "description": "Path to directory containing a list of files, each with a permutation order of the SQL scripts associated with this template. File $0$ will be used as the permutation order when 'permute_order' property for the task in the workload is either undefined or false, while file $i$ with $i>0$ will be used in execution $i-1$ of a task based on this template during the experiment when 'permute_order' property for the task in the workload is true" - }, - "supports_time_travel": { - "type": "boolean", - "title": "Supports time travel", - "description": "Whether a task based on this template can specify a 'time_travel_phase_id' that will be used to inject time travel information into the SQL scripts executed by the task during the experiment" - }, - "custom_task_executor": { - "type": "string", - "title": "Identifier for a custom task executor", - "description": "If the task template should be executed with a custom task executor, this property can be used to specify which executor to use" - } - }, - "additionalProperties": false - } - } - }, - "additionalProperties": false -} \ No newline at end of file diff --git a/src/main/resources/schemas/template.json b/src/main/resources/schemas/template.json new file mode 100644 index 00000000..8e3b1b78 --- /dev/null +++ b/src/main/resources/schemas/template.json @@ -0,0 +1,105 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "title": "Workload entity templates", + "$defs": { + "task_template": { + "type": "object", + "title": "Task template", + "required": [ "files" ], + "properties": { + "files": { + "type": "array", + "title": "SQL script file paths", + "description": "List of paths to SQL script files that will be executed as part of the task", + "items": { + "type": "string", + "title": "Path to SQL script file" + } + }, + "parameter_values_file": { + "type": "string", + "title": "Parameter values file path", + "description": "Path to CSV file containing parameter values. Concretely, the schema of the file consists of parameter names that can be referenced in a SQL script used in the task. Row $i$ contains the values that will be used to replace the parameter names in execution $i$ of a task based on this template during the experiment" + }, + "permutation_orders_path": { + "type": "string", + "title": "Permutation orders directory path", + "description": "Path to directory containing a list of files, each with a permutation order of the SQL scripts associated with this template. File $0$ will be used as the permutation order when 'permute_order' property for the task in the workload is either undefined or false, while file $i$ with $i>0$ will be used in execution $i-1$ of a task based on this template during the experiment when 'permute_order' property for the task in the workload is true" + }, + "supports_time_travel": { + "type": "boolean", + "title": "Supports time travel", + "description": "Whether a task based on this template can specify a 'time_travel_phase_id' that will be used to inject time travel information into the SQL scripts executed by the task during the experiment" + }, + "custom_task_executor": { + "type": "string", + "title": "Identifier for a custom task executor", + "description": "If the task template should be executed with a custom task executor, this property can be used to specify which executor to use" + } + } + }, + "session_template": { + "type": "object", + "title": "Session template", + "oneOf": [ + { + "type": "object", + "required": [ "tasks" ], + "properties": { + "tasks": { + "type": "array", + "title": "List of tasks", + "items": { + "type": "object", + "allOf": [ + { + "$ref": "resource:/schemas/instance.json#/$defs/task" + } + ] + } + } + } + }, + { + "type": "object", + "required": [ "tasks_sequences" ], + "properties": { + "tasks_sequences": { + "type": "array", + "title": "List of tasks sequences", + "items": { + "type": "object", + "allOf": [ + { + "$ref": "resource:/schemas/instance.json#/$defs/tasks_sequence" + } + ] + } + } + } + } + ] + }, + "phase_template": { + "type": "object", + "title": "Phase template", + "required": [ "sessions" ], + "properties": { + "sessions": { + "type": "array", + "title": "List of sessions", + "items": { + "type": "object", + "allOf": [ + { + "$ref": "resource:/schemas/instance.json#/$defs/session" + } + ], + "unevaluatedProperties": false + } + } + } + } + } +} \ No newline at end of file diff --git a/src/main/resources/schemas/workload.json b/src/main/resources/schemas/workload.json index f94fb328..65900d46 100644 --- a/src/main/resources/schemas/workload.json +++ b/src/main/resources/schemas/workload.json @@ -2,6 +2,7 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "title": "Schema for workload definition file", + "required": [ "version", "id", "phases" ], "properties": { "version": { "type": "integer", @@ -15,85 +16,9 @@ "type": "array", "title": "List of phases", "items": { - "type": "object", - "title": "Phase", - "required": [ "id", "sessions" ], - "properties": { - "id": { - "type": "string", - "title": "Identifier for the phase" - }, - "sessions": { - "type": "array", - "title": "List of sessions", - "items": { - "type": "object", - "title": "Session", - "required": [ "tasks" ], - "properties": { - "tasks": { - "type": "array", - "title": "List of tasks", - "items": { - "type": "object", - "title": "Task", - "required": [ "template_id" ], - "properties": { - "template_id": { - "type": "string", - "title": "Template identifier", - "description": "Identifier of the template that this task is based on" - }, - "permute_order": { - "type": "boolean", - "title": "Enable scripts order permutation", - "description": "If task template specified a permutation orders path, this property can be used to specify whether the permutation should be applied or not" - }, - "time_travel_phase_id": { - "type": "string", - "title": "Time travel phase identifier", - "description": "If the task template supports time travel, this property can be used to specify the version of the data that will be queried by this task" - }, - "task_executor_arguments": { - "type": "object", - "title": "Task executor arguments", - "description": "Any arguments passed to a (custom) task executor need to be defined here." - }, - "replace_regex": { - "type": "array", - "title": "List of regex to match and replace", - "description": "List of regex to match and corresponding replacement in the SQL scripts associated with this task", - "items": { - "type": "object", - "title": "Regex to replace", - "required": [ "pattern", "replacement" ], - "properties": { - "pattern": { - "type": "string", - "title": "Regex pattern to match" - }, - "replacement": { - "type": "string", - "title": "Replacement for each match" - } - } - } - } - } - } - }, - "target_endpoint": { - "type": "integer", - "title": "Target endpoint index (default: 0)", - "description": "The positional index (starting from 0) of the connection manager within the connections configuration file" - } - } - } - } - } + "$ref": "resource:/schemas/instance.json#/$defs/phase" } } }, - "required": [ "version", "id", "phases" ], "additionalProperties": false } \ No newline at end of file diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql deleted file mode 100644 index 6e4594df..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CS_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.catalog_sales -WHERE cs_item_sk IN (${cs_item_sk}) AND cs_order_number IN (${cs_order_number}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql deleted file mode 100644 index edee53fa..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_I_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.inventory -WHERE inv_date_sk IN (${inv_date_sk}) AND inv_item_sk IN (${inv_item_sk}) AND inv_warehouse_sk IN (${inv_warehouse_sk}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql deleted file mode 100644 index 8a6886be..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SR_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.store_returns -WHERE sr_item_sk IN (${sr_item_sk}) AND sr_ticket_number IN (${sr_ticket_number}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql deleted file mode 100644 index 24747e0b..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_SS_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.store_sales -WHERE ss_item_sk IN (${ss_item_sk}) AND ss_ticket_number IN (${ss_ticket_number}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql deleted file mode 100644 index 6a75a967..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WR_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.web_returns -WHERE wr_item_sk IN (${wr_item_sk}) AND wr_order_number IN (${wr_order_number}); diff --git a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql b/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql deleted file mode 100644 index d3b16eb6..00000000 --- a/src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_WS_delete.sql +++ /dev/null @@ -1,2 +0,0 @@ -DELETE FROM ${catalog}.${database}.web_sales -WHERE ws_item_sk IN (${ws_item_sk}) AND ws_order_number IN (${ws_order_number}); diff --git a/src/test/java/com/microsoft/lst_bench/DriverSparkTest.java b/src/test/java/com/microsoft/lst_bench/DriverSparkTest.java index 69e723b7..5a8fe870 100644 --- a/src/test/java/com/microsoft/lst_bench/DriverSparkTest.java +++ b/src/test/java/com/microsoft/lst_bench/DriverSparkTest.java @@ -15,7 +15,7 @@ */ package com.microsoft.lst_bench; -import com.microsoft.lst_bench.input.TaskLibrary; +import com.microsoft.lst_bench.input.Library; import com.microsoft.lst_bench.input.Workload; import com.microsoft.lst_bench.input.config.ConnectionsConfig; import com.microsoft.lst_bench.input.config.ExperimentConfig; @@ -45,7 +45,7 @@ public void testJDBCTPCDSAllTasksDelta() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds-delta.yaml"); } @@ -57,7 +57,7 @@ public void testJDBCTPCDSAllTasksHudi() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds-hudi.yaml"); } @@ -69,7 +69,7 @@ public void testJDBCTPCDSAllTasksIceberg() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds-iceberg.yaml"); } @@ -81,7 +81,7 @@ public void testJDBCTPCHAllTasksDelta() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpch/task_library.yaml", + "run/spark-3.3.1/config/tpch/library.yaml", "src/test/resources/config/spark/w_all_tpch-delta.yaml"); } @@ -93,7 +93,7 @@ public void testJDBCTPCHAllTasksHudi() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpch/task_library.yaml", + "run/spark-3.3.1/config/tpch/library.yaml", "src/test/resources/config/spark/w_all_tpch-hudi.yaml"); } @@ -105,7 +105,7 @@ public void testJDBCTPCHAllTasksIceberg() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpch/task_library.yaml", + "run/spark-3.3.1/config/tpch/library.yaml", "src/test/resources/config/spark/w_all_tpch-iceberg.yaml"); } @@ -117,7 +117,7 @@ public void testJDBCMultiConnectionDelta() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_multi_connection-delta.yaml"); } @@ -129,7 +129,7 @@ public void testJDBCMultiConnectionHudi() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_multi_connection-hudi.yaml"); } @@ -141,7 +141,7 @@ public void testJDBCMultiConnectionIceberg() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_multi_connection-iceberg.yaml"); } @@ -153,7 +153,7 @@ public void testJDBCSessionDelta() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session_jdbc-delta.yaml"); } @@ -165,7 +165,7 @@ public void testJDBCSessionHudi() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session_jdbc-hudi.yaml"); } @@ -177,7 +177,7 @@ public void testJDBCSessionIceberg() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session_jdbc-iceberg.yaml"); } @@ -189,7 +189,7 @@ public void testJDBCSkipFailedQueriesDelta() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_faulty_query_test.yaml"); } @@ -201,7 +201,7 @@ public void testJDBCSkipFailedQueriesHudi() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_faulty_query_test.yaml"); } @@ -213,10 +213,22 @@ public void testJDBCSkipFailedQueriesIceberg() throws Exception { "src/test/resources/config/spark/jdbc_connection_config.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", "src/test/resources/config/spark/telemetry_config.yaml", - "src/test/resources/config/spark/simplified_task_library.yaml", + "src/test/resources/config/spark/simplified_library.yaml", "src/test/resources/config/spark/w_faulty_query_test.yaml"); } + @Test + @EnabledIfSystemProperty(named = "lst-bench.test.lst", matches = "delta") + @EnabledIfSystemProperty(named = "lst-bench.test.connection", matches = "jdbc") + public void testJDBCSkipFailedQueriesGlobalDelta() throws Exception { + runDriver( + "src/test/resources/config/spark/jdbc_connection_config.yaml", + "src/test/resources/config/spark/experiment_config-delta-skip-faulty.yaml", + "src/test/resources/config/spark/telemetry_config.yaml", + "src/test/resources/config/spark/simplified_library.yaml", + "src/test/resources/config/spark/w_faulty_query_test2.yaml"); + } + private void runDriver(String arg0, String arg1, String arg2, String arg3, String arg4) throws Exception { Driver.main(new String[] {"-c", arg0, "-e", arg1, "-t", arg2, "-l", arg3, "-w", arg4}); @@ -227,7 +239,7 @@ private void runDriver(String arg0, String arg1, String arg2, String arg3, Strin @EnabledIfSystemProperty(named = "lst-bench.test.connection", matches = "spark") public void testSparkSessionDelta(@TempDir Path tempDir) throws Exception { testSparkSession( - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session-delta.yaml", "src/test/resources/config/spark/spark_connection_config-delta.yaml", "src/test/resources/config/spark/experiment_config-delta.yaml", @@ -240,7 +252,7 @@ public void testSparkSessionDelta(@TempDir Path tempDir) throws Exception { @EnabledIfSystemProperty(named = "lst-bench.test.connection", matches = "spark") public void testSparkSessionHudi(@TempDir Path tempDir) throws Exception { testSparkSession( - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session-hudi.yaml", "src/test/resources/config/spark/spark_connection_config-hudi.yaml", "src/test/resources/config/spark/experiment_config-hudi.yaml", @@ -253,7 +265,7 @@ public void testSparkSessionHudi(@TempDir Path tempDir) throws Exception { @EnabledIfSystemProperty(named = "lst-bench.test.connection", matches = "spark") public void testSparkSessionIceberg(@TempDir Path tempDir) throws Exception { testSparkSession( - "src/main/resources/config/spark/tpcds/task_library.yaml", + "run/spark-3.3.1/config/tpcds/library.yaml", "src/test/resources/config/spark/w_all_tpcds_single_session-iceberg.yaml", "src/test/resources/config/spark/spark_connection_config-iceberg.yaml", "src/test/resources/config/spark/experiment_config-iceberg.yaml", @@ -265,7 +277,7 @@ private void testSparkSession( String arg0, String arg1, String arg2, String arg3, String arg4, Path tempDir) throws Exception { // Create Java objects from input files - TaskLibrary taskLibrary = FileParser.loadTaskLibrary(arg0); + Library taskLibrary = FileParser.loadLibrary(arg0); Workload workload = FileParser.loadWorkload(arg1); ConnectionsConfig connectionsConfig = FileParser.loadConnectionsConfig(arg2); ExperimentConfig experimentConfig = FileParser.loadExperimentConfig(arg3); diff --git a/src/test/java/com/microsoft/lst_bench/client/QueryResultTest.java b/src/test/java/com/microsoft/lst_bench/client/QueryResultTest.java new file mode 100644 index 00000000..ee16a70a --- /dev/null +++ b/src/test/java/com/microsoft/lst_bench/client/QueryResultTest.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.microsoft.lst_bench.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Test; + +public class QueryResultTest { + + @Test + public void testGetStringMappingsSingleColumn() { + // Given + List columnNames = Collections.singletonList("ColumnName"); + List columnTypes = Collections.singletonList(java.sql.Types.VARCHAR); + List> valueList = + List.of( + Arrays.asList( + "Value1", "Value2", "Value3", "Value4", "Value5", "Value6", "Value7", "Value8")); + QueryResult queryResult = new QueryResult(columnNames, columnTypes, valueList); + + // When + Pair result = queryResult.getStringMappings(0, 4); + + // Then + assertEquals("ColumnName", result.getKey()); + assertEquals("'Value1','Value2','Value3','Value4'", result.getValue()); + } + + @Test + public void testGetStringMappingsMultiColumn() { + // Given + List columnNames = Arrays.asList("Column1", "Column2"); + List columnTypes = Arrays.asList(java.sql.Types.VARCHAR, java.sql.Types.INTEGER); + List> valueList = + Arrays.asList(Arrays.asList("Value1", "Value2", "Value3"), Arrays.asList(1, 2, 3)); + QueryResult queryResult = new QueryResult(columnNames, columnTypes, valueList); + + // When + Pair result = queryResult.getStringMappings(0, 3); + + // Then + assertEquals("multi_values_clause", result.getKey()); + assertEquals("('Value1',1),('Value2',2),('Value3',3)", result.getValue()); + } +} diff --git a/src/test/java/com/microsoft/lst_bench/common/LSTBenchmarkExecutorTest.java b/src/test/java/com/microsoft/lst_bench/common/LSTBenchmarkExecutorTest.java index 3ae38134..310143ee 100644 --- a/src/test/java/com/microsoft/lst_bench/common/LSTBenchmarkExecutorTest.java +++ b/src/test/java/com/microsoft/lst_bench/common/LSTBenchmarkExecutorTest.java @@ -15,13 +15,15 @@ */ package com.microsoft.lst_bench.common; +import static org.mockito.Mockito.doThrow; + import com.microsoft.lst_bench.client.ClientException; import com.microsoft.lst_bench.client.Connection; import com.microsoft.lst_bench.client.ConnectionManager; import com.microsoft.lst_bench.input.BenchmarkObjectFactory; -import com.microsoft.lst_bench.input.ImmutableTaskLibrary; +import com.microsoft.lst_bench.input.ImmutableLibrary; import com.microsoft.lst_bench.input.ImmutableWorkload; -import com.microsoft.lst_bench.input.TaskLibrary; +import com.microsoft.lst_bench.input.Library; import com.microsoft.lst_bench.input.Workload; import com.microsoft.lst_bench.input.config.ExperimentConfig; import com.microsoft.lst_bench.input.config.ImmutableExperimentConfig; @@ -76,10 +78,10 @@ void testNoOpSetup() throws Exception { var idToConnectionManager = new ArrayList(); ExperimentConfig experimentConfig = ImmutableExperimentConfig.builder().id("nooptest").version(1).repetitions(1).build(); - TaskLibrary taskLibrary = ImmutableTaskLibrary.builder().version(1).build(); + Library library = ImmutableLibrary.builder().version(1).build(); Workload workload = ImmutableWorkload.builder().id("nooptest").version(1).build(); - var config = BenchmarkObjectFactory.benchmarkConfig(experimentConfig, taskLibrary, workload); + var config = BenchmarkObjectFactory.benchmarkConfig(experimentConfig, library, workload); SQLTelemetryRegistry telemetryRegistry = getTelemetryRegistry(); @@ -108,17 +110,16 @@ void testExperimentTimelineTelemetry() throws Exception { ExperimentConfig experimentConfig = ImmutableExperimentConfig.builder().id("telemetryTest").version(1).repetitions(1).build(); - URL taskLibFile = - getClass().getClassLoader().getResource("./config/samples/task_library_0.yaml"); - Assertions.assertNotNull(taskLibFile); - TaskLibrary taskLibrary = FileParser.loadTaskLibrary(taskLibFile.getFile()); + URL libFile = getClass().getClassLoader().getResource("./config/samples/library_0.yaml"); + Assertions.assertNotNull(libFile); + Library library = FileParser.loadLibrary(libFile.getFile()); URL workloadFile = getClass().getClassLoader().getResource("./config/spark/w_all_tpcds-delta.yaml"); Assertions.assertNotNull(workloadFile); Workload workload = FileParser.loadWorkload(workloadFile.getFile()); - var config = BenchmarkObjectFactory.benchmarkConfig(experimentConfig, taskLibrary, workload); + var config = BenchmarkObjectFactory.benchmarkConfig(experimentConfig, library, workload); SQLTelemetryRegistry telemetryRegistry = getTelemetryRegistry(); @@ -140,6 +141,44 @@ void testExperimentTimelineTelemetry() throws Exception { } } + /** + * This test checks whether erroneous execution of a query will lead to a (successful) retry if + * the workload specifies that a specific error is permitted. + */ + @Test + void testExperimentRetry() throws Exception { + final String queryString = "SELECT * FROM test;"; + final String errorString = "testError"; + + Connection mockConnection = Mockito.mock(Connection.class); + ConnectionManager mockConnectionManager = Mockito.mock(ConnectionManager.class); + Mockito.when(mockConnectionManager.createConnection()).thenReturn(mockConnection); + doThrow(new ClientException(errorString)).doNothing().when(mockConnection).execute(queryString); + + var connectionManagers = new ArrayList(); + connectionManagers.add(mockConnectionManager); + + ExperimentConfig experimentConfig = + ImmutableExperimentConfig.builder().id("retryTest").version(1).repetitions(1).build(); + + URL libFile = getClass().getClassLoader().getResource("./config/samples/library_retry.yaml"); + Assertions.assertNotNull(libFile); + Library library = FileParser.loadLibrary(libFile.getFile()); + + URL workloadFile = + getClass().getClassLoader().getResource("./config/spark/w_retry_query_test.yaml"); + Assertions.assertNotNull(workloadFile); + Workload workload = FileParser.loadWorkload(workloadFile.getFile()); + + var config = BenchmarkObjectFactory.benchmarkConfig(experimentConfig, library, workload); + + SQLTelemetryRegistry telemetryRegistry = getTelemetryRegistry(); + + LSTBenchmarkExecutor benchmark = + new LSTBenchmarkExecutor(connectionManagers, config, telemetryRegistry); + benchmark.run(); + } + private SQLTelemetryRegistry getTelemetryRegistry() throws ClientException, IOException { URL telemetryConfigFile = getClass().getClassLoader().getResource("./config/spark/telemetry_config.yaml"); diff --git a/src/test/java/com/microsoft/lst_bench/input/ParserTest.java b/src/test/java/com/microsoft/lst_bench/input/ParserTest.java index 8a6ba7b8..bdd818de 100644 --- a/src/test/java/com/microsoft/lst_bench/input/ParserTest.java +++ b/src/test/java/com/microsoft/lst_bench/input/ParserTest.java @@ -44,6 +44,15 @@ public class ParserTest { + File.separator + "spark" + File.separator; + private static final String TPCDS_PATH = + "run" + + File.separator + + "spark-3.3.1" + + File.separator + + "config" + + File.separator + + "tpcds" + + File.separator; @Test public void testParseExperimentConfig() throws IOException { @@ -113,15 +122,14 @@ public void testParseConnectionConfig() throws IOException { @Test public void testParseTaskLibrary() throws IOException { - TaskLibrary taskLibrary = - FileParser.loadTaskLibrary(CONFIG_PATH + "tpcds" + File.separator + "task_library.yaml"); - Assertions.assertEquals(1, taskLibrary.getVersion()); - Assertions.assertEquals(16, taskLibrary.getTaskTemplates().size()); - for (TaskTemplate taskTemplate : taskLibrary.getTaskTemplates()) { + Library library = FileParser.loadLibrary(TPCDS_PATH + "library.yaml"); + Assertions.assertEquals(1, library.getVersion()); + Assertions.assertEquals(16, library.getTaskTemplates().size()); + for (TaskTemplate taskTemplate : library.getTaskTemplates()) { switch (taskTemplate.getId()) { case "setup": Assertions.assertEquals( - "src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql", + "run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql", taskTemplate.getFiles().get(0)); Assertions.assertNull(taskTemplate.getParameterValuesFile()); Assertions.assertNull(taskTemplate.getPermutationOrdersDirectory()); @@ -129,12 +137,12 @@ public void testParseTaskLibrary() throws IOException { break; case "setup_data_maintenance": Assertions.assertEquals( - "src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat", + "run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat", taskTemplate.getParameterValuesFile()); break; case "single_user": Assertions.assertEquals( - "src/main/resources/auxiliary/tpcds/single_user/permutation_orders/", + "run/auxiliary/tpcds/single_user/permutation_orders/", taskTemplate.getPermutationOrdersDirectory()); Assertions.assertEquals(Boolean.TRUE, taskTemplate.supportsTimeTravel()); break; @@ -167,10 +175,9 @@ public void testParseTaskLibrary() throws IOException { @Test public void testParseW0Delta() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "w0_tpcds-delta.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "w0_tpcds-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); - Assertions.assertEquals("w0_tpcds_delta", workload.getId()); + Assertions.assertEquals("w0_tpcds", workload.getId()); Assertions.assertEquals(9, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { @@ -228,10 +235,9 @@ public void testParseW0Delta() throws IOException { @Test public void testParseW0Hudi() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "w0_tpcds-hudi.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "w0_tpcds-hudi-0.12.2.yaml"); Assertions.assertEquals(1, workload.getVersion()); - Assertions.assertEquals("w0_tpcds_hudi", workload.getId()); + Assertions.assertEquals("w0_tpcds", workload.getId()); Assertions.assertEquals(9, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { @@ -304,10 +310,9 @@ public void testParseW0Hudi() throws IOException { @Test public void testParseW0Iceberg() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "w0_tpcds-iceberg.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "w0_tpcds-iceberg-1.1.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); - Assertions.assertEquals("w0_tpcds_iceberg", workload.getId()); + Assertions.assertEquals("w0_tpcds", workload.getId()); Assertions.assertEquals(9, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { @@ -365,24 +370,20 @@ public void testParseW0Iceberg() throws IOException { @Test public void testParseWP1Longevity() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "wp1_longevity.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "wp1_longevity-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); Assertions.assertEquals("wp1_longevity", workload.getId()); - Assertions.assertEquals(15, workload.getPhases().size()); + Assertions.assertEquals(13, workload.getPhases().size()); } @Test public void testParseWP2Resilience() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "wp2_resilience.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "wp2_resilience-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); Assertions.assertEquals("wp2_resilience", workload.getId()); - Assertions.assertEquals(17, workload.getPhases().size()); + Assertions.assertEquals(15, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { - case "setup": - case "setup_data_maintenance": case "init": case "build": case "single_user_1": @@ -408,11 +409,10 @@ public void testParseWP2Resilience() throws IOException { @Test public void testParseWP3RWConcurrency() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "wp3_rw_concurrency.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "wp3_rw_concurrency-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); Assertions.assertEquals("wp3_rw_concurrency", workload.getId()); - Assertions.assertEquals(10, workload.getPhases().size()); + Assertions.assertEquals(8, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { case "single_user_1_data_maintenance_1": @@ -456,8 +456,6 @@ public void testParseWP3RWConcurrency() throws IOException { Assertions.assertNull(taskO.getTimeTravelPhaseId()); } break; - case "setup": - case "setup_data_maintenance": case "init": case "build": case "single_user_2o_data_maintenance_2": @@ -475,8 +473,7 @@ public void testParseWP3RWConcurrency() throws IOException { @Test public void testParseWP3RWConcurrencyMulti() throws IOException { Workload workload = - FileParser.loadWorkload( - CONFIG_PATH + "tpcds" + File.separator + "wp3_rw_concurrency_multi.yaml"); + FileParser.loadWorkload(TPCDS_PATH + "wp3_rw_concurrency_multi-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); Assertions.assertEquals("wp3_rw_concurrency_multi", workload.getId()); Assertions.assertEquals(10, workload.getPhases().size()); @@ -518,11 +515,10 @@ public void testParseWP3RWConcurrencyMulti() throws IOException { @Test public void testParseWP4TimeTravel() throws IOException { - Workload workload = - FileParser.loadWorkload(CONFIG_PATH + "tpcds" + File.separator + "wp4_time_travel.yaml"); + Workload workload = FileParser.loadWorkload(TPCDS_PATH + "wp4_time_travel-delta-2.2.0.yaml"); Assertions.assertEquals(1, workload.getVersion()); Assertions.assertEquals("wp4_time_travel", workload.getId()); - Assertions.assertEquals(18, workload.getPhases().size()); + Assertions.assertEquals(16, workload.getPhases().size()); for (Phase phase : workload.getPhases()) { switch (phase.getId()) { case "single_user_2_0": @@ -547,15 +543,6 @@ public void testParseWP4TimeTravel() throws IOException { Assertions.assertNotNull(task.getTimeTravelPhaseId()); } break; - case "setup_data_maintenance": - { - List sessions = phase.getSessions(); - Assertions.assertEquals(1, sessions.size()); - List tasks = sessions.get(0).getTasks(); - Assertions.assertEquals(8, tasks.size()); - } - break; - case "setup": case "init": case "build": case "data_maintenance_1": diff --git a/src/test/java/com/microsoft/lst_bench/input/ValidationTest.java b/src/test/java/com/microsoft/lst_bench/input/ValidationTest.java index 7d03d742..d25b9adf 100644 --- a/src/test/java/com/microsoft/lst_bench/input/ValidationTest.java +++ b/src/test/java/com/microsoft/lst_bench/input/ValidationTest.java @@ -30,8 +30,10 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Set; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.DisabledIfSystemProperty; import org.junit.jupiter.api.condition.EnabledOnOs; import org.junit.jupiter.api.condition.OS; @@ -143,29 +145,29 @@ private void testValidationConnectionsConfig(String configFilePath) throws IOExc @EnabledOnOs({OS.LINUX, OS.MAC}) @ValueSource( strings = { - "src/main/resources/config/spark/tpcds/task_library.yaml", - "src/main/resources/config/trino/tpcds/task_library.yaml", - "src/main/resources/config/spark/tpch/task_library.yaml" + "run/spark-3.3.1/config/tpcds/library.yaml", + "run/trino-420/config/tpcds/library.yaml", + "run/spark-3.3.1/config/tpch/library.yaml" }) - public void testValidationTaskLibraryUnix(String taskLibraryPath) throws IOException { - testValidationTaskLibrary(taskLibraryPath); + public void testValidationLibraryUnix(String libraryPath) throws IOException { + testValidationLibrary(libraryPath); } @ParameterizedTest @EnabledOnOs({OS.WINDOWS}) @ValueSource( strings = { - "src\\main\\resources\\config\\spark\\tpcds\\task_library.yaml", - "src\\main\\resources\\config\\trino\\tpcds\\task_library.yaml", - "src\\main\\resources\\config\\spark\\tpch\\task_library.yaml" + "run\\spark-3.3.1\\config\\tpcds\\library.yaml", + "run\\trino-420\\config\\tpcds\\library.yaml", + "run\\spark-3.3.1\\config\\tpch\\library.yaml" }) - public void testValidationTaskLibraryWin(String taskLibraryPath) throws IOException { - testValidationTaskLibrary(taskLibraryPath); + public void testValidationLibraryWin(String libraryPath) throws IOException { + testValidationLibrary(libraryPath); } - private void testValidationTaskLibrary(String taskLibraryPath) throws IOException { + private void testValidationLibrary(String libraryPath) throws IOException { // Validate YAML file contents and create POJO object - TaskLibrary taskLibrary = FileParser.loadTaskLibrary(taskLibraryPath); + Library taskLibrary = FileParser.loadLibrary(libraryPath); // Validate YAML generated from POJO object ObjectMapper mapper = new YAMLMapper(); JsonSchemaFactory factory = @@ -173,7 +175,7 @@ private void testValidationTaskLibrary(String taskLibraryPath) throws IOExceptio .objectMapper(mapper) .build(); JsonSchema schema = - factory.getSchema(Files.newInputStream(Paths.get(SCHEMAS_PATH + "task_library.json"))); + factory.getSchema(Files.newInputStream(Paths.get(SCHEMAS_PATH + "library.json"))); JsonNode jsonNodeObject = mapper.convertValue(taskLibrary, JsonNode.class); Set errorsFromPOJO = schema.validate(jsonNodeObject); Assertions.assertEquals( @@ -184,21 +186,21 @@ private void testValidationTaskLibrary(String taskLibraryPath) throws IOExceptio @EnabledOnOs({OS.LINUX, OS.MAC}) @ValueSource( strings = { - "src/main/resources/config/spark/tpcds/w0_tpcds-delta.yaml", - "src/main/resources/config/spark/tpcds/w0_tpcds-hudi.yaml", - "src/main/resources/config/spark/tpcds/w0_tpcds-iceberg.yaml", - "src/main/resources/config/spark/tpcds/wp1_longevity.yaml", - "src/main/resources/config/spark/tpcds/wp2_resilience.yaml", - "src/main/resources/config/spark/tpcds/wp3_rw_concurrency.yaml", - "src/main/resources/config/spark/tpcds/wp3_rw_concurrency_multi.yaml", - "src/main/resources/config/spark/tpcds/wp4_time_travel.yaml", - "src/main/resources/config/trino/tpcds/w0_tpcds.yaml", - "src/main/resources/config/trino/tpcds/wp1_longevity.yaml", - "src/main/resources/config/trino/tpcds/wp2_resilience.yaml", - "src/main/resources/config/trino/tpcds/wp3_rw_concurrency.yaml", - "src/main/resources/config/spark/tpch/w0_tpch-delta.yaml", - "src/main/resources/config/spark/tpch/w0_tpch-hudi.yaml", - "src/main/resources/config/spark/tpch/w0_tpch-iceberg.yaml" + "run/spark-3.3.1/config/tpcds/w0_tpcds-delta-2.2.0.yaml", + "run/spark-3.3.1/config/tpcds/w0_tpcds-hudi-0.12.2.yaml", + "run/spark-3.3.1/config/tpcds/w0_tpcds-iceberg-1.1.0.yaml", + "run/spark-3.3.1/config/tpcds/wp1_longevity-delta-2.2.0.yaml", + "run/spark-3.3.1/config/tpcds/wp2_resilience-delta-2.2.0.yaml", + "run/spark-3.3.1/config/tpcds/wp3_rw_concurrency-delta-2.2.0.yaml", + "run/spark-3.3.1/config/tpcds/wp3_rw_concurrency_multi-delta-2.2.0.yaml", + "run/spark-3.3.1/config/tpcds/wp4_time_travel-delta-2.2.0.yaml", + "run/trino-420/config/tpcds/w0_tpcds.yaml", + "run/trino-420/config/tpcds/wp1_longevity.yaml", + "run/trino-420/config/tpcds/wp2_resilience.yaml", + "run/trino-420/config/tpcds/wp3_rw_concurrency.yaml", + "run/spark-3.3.1/config/tpch/w0_tpch-delta.yaml", + "run/spark-3.3.1/config/tpch/w0_tpch-hudi.yaml", + "run/spark-3.3.1/config/tpch/w0_tpch-iceberg.yaml" }) public void testValidationWorkloadUnix(String workloadFilePath) throws IOException { testValidationWorkload(workloadFilePath); @@ -208,21 +210,21 @@ public void testValidationWorkloadUnix(String workloadFilePath) throws IOExcepti @EnabledOnOs({OS.WINDOWS}) @ValueSource( strings = { - "src\\main\\resources\\config\\spark\\tpcds\\w0_tpcds-delta.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\w0_tpcds-hudi.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\w0_tpcds-iceberg.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\wp1_longevity.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\wp2_resilience.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\wp3_rw_concurrency.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\wp3_rw_concurrency_multi.yaml", - "src\\main\\resources\\config\\spark\\tpcds\\wp4_time_travel.yaml", - "src\\main\\resources\\config\\trino\\tpcds\\w0_tpcds.yaml", - "src\\main\\resources\\config\\trino\\tpcds\\wp1_longevity.yaml", - "src\\main\\resources\\config\\trino\\tpcds\\wp2_resilience.yaml", - "src\\main\\resources\\config\\trino\\tpcds\\wp3_rw_concurrency.yaml", - "src\\main\\resources\\config\\spark\\tpch\\w0_tpch-delta.yaml", - "src\\main\\resources\\config\\spark\\tpch\\w0_tpch-hudi.yaml", - "src\\main\\resources\\config\\spark\\tpch\\w0_tpch-iceberg.yaml" + "run\\spark-3.3.1\\config\\tpcds\\w0_tpcds-delta-2.2.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\w0_tpcds-hudi-0.12.2.yaml", + "run\\spark-3.3.1\\config\\tpcds\\w0_tpcds-iceberg-1.1.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\wp1_longevity-delta-2.2.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\wp2_resilience-delta-2.2.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\wp3_rw_concurrency-delta-2.2.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\wp3_rw_concurrency_multi-delta-2.2.0.yaml", + "run\\spark-3.3.1\\config\\tpcds\\wp4_time_travel-delta-2.2.0.yaml", + "run\\trino-420\\config\\tpcds\\w0_tpcds.yaml", + "run\\trino-420\\config\\tpcds\\wp1_longevity.yaml", + "run\\trino-420\\config\\tpcds\\wp2_resilience.yaml", + "run\\trino-420\\config\\tpcds\\wp3_rw_concurrency.yaml", + "run\\spark-3.3.1\\config\\tpch\\w0_tpch-delta.yaml", + "run\\spark-3.3.1\\config\\tpch\\w0_tpch-hudi.yaml", + "run\\spark-3.3.1\\config\\tpch\\w0_tpch-iceberg.yaml" }) public void testValidationWorkloadWin(String workloadFilePath) throws IOException { testValidationWorkload(workloadFilePath); @@ -305,4 +307,45 @@ private void testValidationIncorrectTelemetryConfig(String configFilePath) { Assertions.assertThrows( IllegalArgumentException.class, () -> FileParser.loadTelemetryConfig(configFilePath)); } + + @Test + public void testIncorrectTaskCreation() { + ImmutableTask.Builder builder = + ImmutableTask.builder().preparedTaskId("pt_id").templateId("t_id"); + Assertions.assertThrows(IllegalStateException.class, builder::build); + builder = ImmutableTask.builder(); + Assertions.assertThrows(IllegalStateException.class, builder::build); + } + + @Test + public void testIncorrectTasksSequenceCreation() { + ImmutableTasksSequence.Builder builder = + ImmutableTasksSequence.builder().preparedTasksSequenceId("pts_id").tasks(new ArrayList<>()); + Assertions.assertThrows(IllegalStateException.class, builder::build); + builder = ImmutableTasksSequence.builder(); + Assertions.assertThrows(IllegalStateException.class, builder::build); + } + + @Test + public void testIncorrectSessionCreation() { + ImmutableSession.Builder builder = + ImmutableSession.builder() + .templateId("t_id") + .tasksSequences(new ArrayList<>()) + .tasks(new ArrayList<>()); + Assertions.assertThrows(IllegalStateException.class, builder::build); + builder = ImmutableSession.builder().templateId("t_id").tasks(new ArrayList<>()); + Assertions.assertThrows(IllegalStateException.class, builder::build); + builder = ImmutableSession.builder(); + Assertions.assertThrows(IllegalStateException.class, builder::build); + } + + @Test + public void testIncorrectPhaseCreation() { + ImmutablePhase.Builder builder = + ImmutablePhase.builder().templateId("t_id").sessions(new ArrayList<>()); + Assertions.assertThrows(IllegalStateException.class, builder::build); + builder = ImmutablePhase.builder(); + Assertions.assertThrows(IllegalStateException.class, builder::build); + } } diff --git a/src/test/resources/config/samples/library_0.yaml b/src/test/resources/config/samples/library_0.yaml new file mode 100644 index 00000000..20d20b22 --- /dev/null +++ b/src/test/resources/config/samples/library_0.yaml @@ -0,0 +1,73 @@ +# Description: Tasks Library +--- +version: 1 +task_templates: +# Create external tables needed for benchmark +- id: setup + files: + - run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql +# Create data maintenance external tables needed for benchmark +- id: setup_data_maintenance + files: + - run/spark-3.3.1/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql + parameter_values_file: run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat +# Create schema and drop existing tables +- id: init + files: + - run/spark-3.3.1/scripts/tpcds/init/init.sql +# Create benchmark tables and load data into them +- id: build + files: + - run/spark-3.3.1/scripts/tpcds/build/1_create_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql +# Execution of TPC-DS queries (possibly in a previous point-in-time) +- id: single_user + files: + - run/spark-3.3.1/scripts/tpcds/single_user/query7.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query15.sql + supports_time_travel: true +# Execution of TPC-DS data maintenance queries (Delta) +- id: data_maintenance_delta + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql +- id: data_maintenance_dependent + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance_dependent/DF_CR_1.sql + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor +- id: optimize_delta + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql +- id: optimize_split_delta + files: + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_SELECT.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_IN-delta.sql + - run/spark-3.3.1/scripts/tpcds/optimize_split/o_catalog_returns_NULL-delta.sql + custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor +prepared_tasks: +# Optimize split instance of task +- id: optimize_split_delta + template_id: optimize_split_delta + task_executor_arguments: + dependent_task_batch_size: 100 + # TODO: Remove this once #182 is fixed + skip_erroneous_query_strings: "[DELTA_FAILED_RECOGNIZE_PREDICATE]" +prepared_tasks_sequences: +# Data maintenance session +- id: seq_data_maintenance_delta + tasks: + - template_id: data_maintenance_delta + - template_id: data_maintenance_delta +session_templates: +# Data maintenance session (combines prepared task sequence with inlined sequence of tasks) +- id: session_data_maintenance_delta + tasks_sequences: + - prepared_tasks_sequence_id: seq_data_maintenance_delta + - tasks: + - template_id: data_maintenance_delta + - template_id: data_maintenance_delta +phase_templates: +# Data maintenance phase +- id: phase_data_maintenance_delta + sessions: + - template_id: session_data_maintenance_delta diff --git a/src/test/resources/config/samples/library_retry.yaml b/src/test/resources/config/samples/library_retry.yaml new file mode 100644 index 00000000..3ace4e6f --- /dev/null +++ b/src/test/resources/config/samples/library_retry.yaml @@ -0,0 +1,7 @@ +# Description: Tasks Library +--- +version: 1 +task_templates: +- id: retry_query + files: + - src/test/resources/scripts/retry_test_query.sql diff --git a/src/test/resources/config/samples/task_library_0.yaml b/src/test/resources/config/samples/task_library_0.yaml deleted file mode 100644 index 8827ed40..00000000 --- a/src/test/resources/config/samples/task_library_0.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Description: Tasks Library ---- -version: 1 -task_templates: -# Create external tables needed for benchmark -- id: setup - files: - - src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql -# Create data maintenance external tables needed for benchmark -- id: setup_data_maintenance - files: - - src/main/resources/scripts/tpcds/setup_data_maintenance/spark/ddl-external-tables-refresh.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat -# Create schema and drop existing tables -- id: init - files: - - src/main/resources/scripts/tpcds/init/spark/init.sql -# Create benchmark tables and load data into them -- id: build - files: - - src/main/resources/scripts/tpcds/build/spark/1_create_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer.sql -# Execution of TPC-DS queries (possibly in a previous point-in-time) -- id: single_user - files: - - src/main/resources/scripts/tpcds/single_user/spark/query7.sql - - src/main/resources/scripts/tpcds/single_user/spark/query15.sql - supports_time_travel: true -# Execution of TPC-DS data maintenance queries (Delta) -- id: data_maintenance_delta - files: - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql -- id: data_maintenance_dependent - files: - - src/main/resources/scripts/tpcds/data_maintenance_dependent/spark/DF_CR_1.sql - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor -- id: optimize_delta - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql -- id: optimize_split_delta - files: - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_SELECT.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_IN-delta.sql - - src/main/resources/scripts/tpcds/optimize_split/spark/o_catalog_returns_NULL-delta.sql - custom_task_executor: com.microsoft.lst_bench.task.custom.DependentTaskExecutor diff --git a/src/test/resources/config/spark/experiment_config-delta-skip-faulty.yaml b/src/test/resources/config/spark/experiment_config-delta-skip-faulty.yaml new file mode 100644 index 00000000..0eb5f344 --- /dev/null +++ b/src/test/resources/config/spark/experiment_config-delta-skip-faulty.yaml @@ -0,0 +1,31 @@ +# Description: Experiment Configuration +--- +version: 1 +id: spark_w_all_delta_sf_001 +repetitions: 1 +# Metadata accepts any key-value that we want to register together with the experiment run. +# TODO: In the future, many of these could be automatically generated by the framework. +metadata: + system: spark + system_version: 3.3.2 + table_format: delta + table_format_version: 2.2.0 + scale_factor: 0.01 + mode: cow +# The following parameter values will be used to replace the variables in the workload statements. +parameter_values: + external_catalog: spark_catalog + external_database: external_sf_001 + external_table_format: csv + external_data_path: '/sf_001/' + external_options_suffix: ',header="true"' + external_tblproperties_suffix: '' + catalog: spark_catalog + database: w_all_delta_sf_001 + table_format: delta + data_path: '/delta/sf_001/' + options_suffix: '' + tblproperties_suffix: '' + scale_factor: 0.01 +task_executor_arguments: + skip_erroneous_query_strings: this is a nonsense string;column diff --git a/src/test/resources/config/spark/simplified_library.yaml b/src/test/resources/config/spark/simplified_library.yaml new file mode 100644 index 00000000..607d5f2d --- /dev/null +++ b/src/test/resources/config/spark/simplified_library.yaml @@ -0,0 +1,115 @@ +# Description: Tasks Library +--- +version: 1 +task_templates: +# Create external tables needed for benchmark +- id: setup + files: + - run/spark-3.3.1/scripts/tpcds/setup/ddl-external-tables.sql +# Create data maintenance external tables needed for benchmark +- id: setup_data_maintenance + files: + - run/spark-3.3.1/scripts/tpcds/setup_data_maintenance/ddl-external-tables-refresh.sql + parameter_values_file: run/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat +# Create schema and drop existing tables +- id: init + files: + - run/spark-3.3.1/scripts/tpcds/init/init.sql +# Create benchmark tables and load data into them +- id: build + files: + - run/spark-3.3.1/scripts/tpcds/build/1_create_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_catalog_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer_address.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_customer_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_date_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_household_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_income_band.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_inventory.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_item.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_promotion.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_reason.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_ship_mode.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_store_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_time_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_warehouse.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_page.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/1_create_web_site.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_call_center.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_page.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_catalog_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer_address.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_customer_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_date_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_household_demographics.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_income_band.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_inventory.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_item.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_promotion.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_reason.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_ship_mode.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_store_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_time_dim.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_warehouse.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_page.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_returns.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_sales.sql + - run/spark-3.3.1/scripts/tpcds/build/2_load_web_site.sql +# Execution of TPC-DS queries (possibly in a previous point-in-time) +- id: single_user + files: + - run/spark-3.3.1/scripts/tpcds/single_user/query7.sql + - run/spark-3.3.1/scripts/tpcds/single_user/query15.sql + supports_time_travel: true +# Execution of TPC-DS data maintenance queries +- id: data_maintenance + files: + - run/spark-3.3.1/scripts/tpcds/data_maintenance/LF_CS.sql + parameter_values_file: run/auxiliary/tpcds/data_maintenance/parameter_values.dat +# Execution of optimize (Delta) +- id: optimize_delta + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-delta.sql +# Execution of optimize (Iceberg) +- id: optimize_iceberg + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-iceberg.sql +# Execution of optimize (Hudi) +- id: optimize_hudi + files: + - run/spark-3.3.1/scripts/tpcds/optimize/o_ship_mode-hudi.sql +# Execution of faulty TPC-DS query +- id: faulty_query + files: + - src/test/resources/scripts/faulty_test_query.sql +prepared_tasks: +# Task instance of execution of faulty TPC-DS query +- id: task_faulty_query + template_id: faulty_query + task_executor_arguments: + skip_erroneous_query_strings: this is a nonsense string;column +session_templates: +# Data maintenance session +- id: session_data_maintenance + tasks: + - template_id: data_maintenance +phase_templates: +# Data maintenance phase +- id: phase_multi_single_user + sessions: + - tasks: + - template_id: single_user + - tasks: + - template_id: single_user + target_endpoint: 1 diff --git a/src/test/resources/config/spark/simplified_task_library.yaml b/src/test/resources/config/spark/simplified_task_library.yaml deleted file mode 100644 index 3f95220c..00000000 --- a/src/test/resources/config/spark/simplified_task_library.yaml +++ /dev/null @@ -1,95 +0,0 @@ -# Description: Tasks Library ---- -version: 1 -task_templates: -# Create external tables needed for benchmark -- id: setup - files: - - src/main/resources/scripts/tpcds/setup/spark/ddl-external-tables.sql -# Create data maintenance external tables needed for benchmark -- id: setup_data_maintenance - files: - - src/main/resources/scripts/tpcds/setup_data_maintenance/spark/ddl-external-tables-refresh.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/setup_data_maintenance/parameter_values.dat -# Create schema and drop existing tables -- id: init - files: - - src/main/resources/scripts/tpcds/init/spark/init.sql -# Create benchmark tables and load data into them -- id: build - files: - - src/main/resources/scripts/tpcds/build/spark/1_create_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer_address.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_date_dim.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_household_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_income_band.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_inventory.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_item.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_promotion.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_reason.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_ship_mode.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_store_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_time_dim.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_warehouse.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_page.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_returns.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_sales.sql - - src/main/resources/scripts/tpcds/build/spark/1_create_web_site.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_call_center.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_page.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_catalog_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer_address.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_customer_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_date_dim.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_household_demographics.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_income_band.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_inventory.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_item.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_promotion.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_reason.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_ship_mode.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_store_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_time_dim.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_warehouse.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_page.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_returns.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_sales.sql - - src/main/resources/scripts/tpcds/build/spark/2_load_web_site.sql -# Execution of TPC-DS queries (possibly in a previous point-in-time) -- id: single_user - files: - - src/main/resources/scripts/tpcds/single_user/spark/query7.sql - - src/main/resources/scripts/tpcds/single_user/spark/query15.sql - supports_time_travel: true -# Execution of TPC-DS data maintenance queries -- id: data_maintenance - files: - - src/main/resources/scripts/tpcds/data_maintenance/spark/LF_CS.sql - parameter_values_file: src/main/resources/auxiliary/tpcds/data_maintenance/parameter_values.dat -# Execution of optimize (Delta) -- id: optimize_delta - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-delta.sql -# Execution of optimize (Iceberg) -- id: optimize_iceberg - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-iceberg.sql -# Execution of optimize (Hudi) -- id: optimize_hudi - files: - - src/main/resources/scripts/tpcds/optimize/spark/o_ship_mode-hudi.sql -# Execution of faulty TPC-DS query -- id: faulty_query - files: - - src/test/resources/scripts/faulty_test_query.sql \ No newline at end of file diff --git a/src/test/resources/config/spark/w_faulty_query_test.yaml b/src/test/resources/config/spark/w_faulty_query_test.yaml index a3d60369..1658f82f 100644 --- a/src/test/resources/config/spark/w_faulty_query_test.yaml +++ b/src/test/resources/config/spark/w_faulty_query_test.yaml @@ -6,6 +6,4 @@ phases: - id: test sessions: - tasks: - - template_id: faulty_query - task_executor_arguments: - skip_erroneous_query_strings: this is a nonsense string;column + - prepared_task_id: task_faulty_query diff --git a/src/test/resources/config/spark/w_faulty_query_test2.yaml b/src/test/resources/config/spark/w_faulty_query_test2.yaml new file mode 100644 index 00000000..3e4852f2 --- /dev/null +++ b/src/test/resources/config/spark/w_faulty_query_test2.yaml @@ -0,0 +1,9 @@ +# Description: Workload for test: Failure handling via SkipFailedQueryTaskExecutor +--- +version: 1 +id: w_faulty_query_test +phases: +- id: test + sessions: + - tasks: + - template_id: faulty_query diff --git a/src/test/resources/config/spark/w_multi_connection-delta.yaml b/src/test/resources/config/spark/w_multi_connection-delta.yaml index 5fc2d713..49ae7689 100644 --- a/src/test/resources/config/spark/w_multi_connection-delta.yaml +++ b/src/test/resources/config/spark/w_multi_connection-delta.yaml @@ -20,19 +20,13 @@ phases: - tasks: - template_id: build - id: multi_single_user - sessions: - - tasks: - - template_id: single_user - - tasks: - - template_id: single_user - target_endpoint: 1 + template_id: phase_multi_single_user - id: multi_mixed_1 sessions: - tasks: - template_id: single_user target_endpoint: 0 - - tasks: - - template_id: data_maintenance + - template_id: session_data_maintenance target_endpoint: 1 - id: multi_mixed_2 sessions: diff --git a/src/test/resources/config/spark/w_multi_connection-hudi.yaml b/src/test/resources/config/spark/w_multi_connection-hudi.yaml index 56054170..4079c878 100644 --- a/src/test/resources/config/spark/w_multi_connection-hudi.yaml +++ b/src/test/resources/config/spark/w_multi_connection-hudi.yaml @@ -23,19 +23,13 @@ phases: - pattern: '(?i)varchar\(.*\)|char\(.*\)' replacement: 'string' - id: multi_single_user - sessions: - - tasks: - - template_id: single_user - - tasks: - - template_id: single_user - target_endpoint: 1 + template_id: phase_multi_single_user - id: multi_mixed_1 sessions: - tasks: - template_id: single_user target_endpoint: 0 - - tasks: - - template_id: data_maintenance + - template_id: session_data_maintenance target_endpoint: 1 - id: multi_mixed_2 sessions: diff --git a/src/test/resources/config/spark/w_multi_connection-iceberg.yaml b/src/test/resources/config/spark/w_multi_connection-iceberg.yaml index 049054a7..1f5b0b5b 100644 --- a/src/test/resources/config/spark/w_multi_connection-iceberg.yaml +++ b/src/test/resources/config/spark/w_multi_connection-iceberg.yaml @@ -23,19 +23,13 @@ phases: - pattern: '(?i)options\((.|\n)*?\)' replacement: '' - id: multi_single_user - sessions: - - tasks: - - template_id: single_user - - tasks: - - template_id: single_user - target_endpoint: 1 + template_id: phase_multi_single_user - id: multi_mixed_1 sessions: - tasks: - template_id: single_user target_endpoint: 0 - - tasks: - - template_id: data_maintenance + - template_id: session_data_maintenance target_endpoint: 1 - id: multi_mixed_2 sessions: diff --git a/src/test/resources/config/spark/w_retry_query_test.yaml b/src/test/resources/config/spark/w_retry_query_test.yaml new file mode 100644 index 00000000..05562b30 --- /dev/null +++ b/src/test/resources/config/spark/w_retry_query_test.yaml @@ -0,0 +1,11 @@ +# Description: Workload for test: Failure handling via SkipFailedQueryTaskExecutor +--- +version: 1 +id: w_retry_query_test +phases: +- id: test + sessions: + - tasks: + - template_id: retry_query + task_executor_arguments: + retry_erroneous_query_strings: testError \ No newline at end of file diff --git a/src/test/resources/scripts/retry_test_query.sql b/src/test/resources/scripts/retry_test_query.sql new file mode 100644 index 00000000..4b9ba8e2 --- /dev/null +++ b/src/test/resources/scripts/retry_test_query.sql @@ -0,0 +1 @@ +SELECT * FROM test; \ No newline at end of file