Merge branch 'main' into fabric

microsoft · May 16, 2024 · be4c39b · be4c39b
2 parents c22de92 + a9e7ef1
commit be4c39b
Show file tree

Hide file tree

Showing 847 changed files with 5,933 additions and 1,734 deletions.
diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
@@ -26,3 +26,7 @@ updates:
       interval: "daily"
     assignees:
       - "jcamachor"
+    groups:
+      log4j:
+        patterns:
+          - "org.apache.logging.log4j*"
diff --git a/.github/workflows/maven.yaml b/.github/workflows/maven.yaml
@@ -33,7 +33,7 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
       - name: Set up JDK
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
         with:
           java-version: ${{ env.JAVA_VERSION }}
           distribution: 'temurin'
@@ -55,7 +55,7 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
       - name: Set up JDK
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
         with:
           java-version: ${{ env.JAVA_VERSION }}
           distribution: 'temurin'
@@ -77,7 +77,7 @@ jobs:
       - name: Check out repository code
         uses: actions/checkout@v4
       - name: Set up JDK
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
         with:
           java-version: ${{ env.JAVA_VERSION }}
           distribution: 'temurin'

diff --git a/.github/workflows/webapp-deploy.yaml b/.github/workflows/webapp-deploy.yaml
@@ -0,0 +1,108 @@
+# Copyright (c) Microsoft Corporation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
+# More GitHub Actions for Azure: https://github.com/Azure/actions
+# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions
+
+name: Build and deploy Web App - lst-bench
+
+on:
+  push:
+    paths:
+      - metrics/**
+      - run/**
+    branches:
+      - main
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+env:
+  AZURE_WEBAPP_NAME: lst-bench
+  WORKING_DIRECTORY: './metrics/app'
+  STARTUP_COMMAND: 'python -m streamlit run main.py --server.port 8000 --server.address 0.0.0.0 --client.toolbarMode minimal'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: 'Set up Python version'
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: 'Create and start virtual environment'
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+
+      - name: 'Install dependencies'
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          pip install setuptools
+          pip install -r requirements.txt
+
+      - name: 'Copy .duckdb files from ./run/'
+        run: |
+          find ./run -type f -name "*.duckdb" -exec cp {} ${{ env.WORKING_DIRECTORY }} \;
+
+      - name: Zip artifact for deployment
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: zip release.zip ./* -r
+
+      - name: Upload artifact for deployment jobs
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-app
+          path: |
+            ${{ env.WORKING_DIRECTORY }}/release.zip
+
+  deploy:
+    runs-on: ubuntu-latest
+    needs: build
+    environment:
+      name: 'webapp-deploy'
+      url: ${{ steps.deploy-to-webapp.outputs.webapp-url }}
+    permissions:
+      id-token: write #This is required for requesting the JWT
+
+    steps:
+      - name: Download artifact from build job
+        uses: actions/download-artifact@v4
+        with:
+          name: python-app
+          path: .
+
+      - name: Unzip artifact for deployment
+        run: unzip release.zip
+
+      - name: Login to Azure
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_33D9610570044F3DA4CC10BFC44E822C }}
+          tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_B6D8A47890014FE18CA30533FD44F9A3 }}
+          subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_16D6B2652AF543ADA2A0CBFD17A3F482 }}
+
+      - name: 'Deploy to Azure Web App'
+        uses: azure/webapps-deploy@v3
+        id: deploy-to-webapp
+        with:
+          app-name: ${{ env.AZURE_WEBAPP_NAME }}
+          startup-command: ${{ env.STARTUP_COMMAND }}
diff --git a/.gitignore b/.gitignore
@@ -57,6 +57,9 @@ bin/
 # Local configuration file (sdk path, etc)
 local.properties
 
+# Python
+*.pyc
+
 # Others
 *~
 .DS_Store

diff --git a/CITATION.bib b/CITATION.bib
@@ -0,0 +1,11 @@
+@article{2024lstbench,
+    author = {Jes\'{u}s Camacho-Rodr\'{\i}guez and Ashvin Agrawal and Anja Gruenheid and
+            Ashit Gosalia and Cristian Petculescu and Josep Aguilar-Saborit and
+            Avrilia Floratou and Carlo Curino and Raghu Ramakrishnan},
+    title = {LST-Bench: Benchmarking Log-Structured Tables in the Cloud},
+    journal = {Proc. ACM Manag. Data},
+    volume = {2},
+    number = {1},
+    year = {2024},
+    url = {https://doi.org/10.1145/3639314}
+}
diff --git a/README.md b/README.md
@@ -69,8 +69,8 @@ usage: ./launcher.sh -c <arg> -e <arg> -l <arg> -t <arg> -w <arg>
                                  connections config details
  -e,--experiment-config <arg>    [required] Path to input file containing
                                  the experiment config details
- -l,--task-library <arg>         [required] Path to input file containing
-                                 the library with task templates
+ -l,--library <arg>              [required] Path to input file containing
+                                 the library with templates
  -t,--input-log-config <arg>     [required] Path to input file containing
                                  the telemetry gathering config details
  -w,--workload <arg>             [required] Path to input file containing
@@ -111,22 +111,7 @@ The LST-Bench code is organized into two modules:
    The Python module performs data processing, analysis, and visualization to facilitate a deeper understanding of the experimental results.
 
 ### LST-Bench Concepts
-In LST-Bench, the following concepts are used to define and organize SQL workloads:
-
-- **Task**: A task is a collection of SQL statements grouped together in a sequence of files. Each file represents a step or subtask within the overall task.
-
-- **Session**: A session refers to a sequence of tasks. It represents a logical unit of work or a user session.
-
-- **Phase**: A phase consists of multiple concurrent sessions that need to be completed before proceeding to the next phase. Phases help simulate concurrent workload scenarios.
-
-- **Workload**: A workload is a sequence of phases, defining the complete set of tasks, sessions, and phases to be executed during the evaluation.
-
-In LST-Bench, tasks are generated using task templates predefined in the task library.
-LST-Bench includes a default task library that encompasses tasks derived from the TPC-DS benchmark, along with workload definitions representing the original TPC-DS and multiple workload patterns. These resources can be located [here](src/main/resources/config/tpcds).
-
-Although LST-Bench provides this set of tasks and workload patterns,
-users have the flexibility to incorporate additional task templates or even create a completely new task library to model specific scenarios.
-This flexible model allows for the easy creation of diverse SQL workloads for evaluation purposes without the need to modify the application itself.
+In LST-Bench, we utilize specific concepts to define and organize SQL workloads, with a focus on maximizing flexibility and facilitating reusability across various workloads. For detailed information, refer to our [documentation](docs/workloads.md).
 
 ### Telemetry and Metrics Processor
 LST-Bench captures execution telemetry during workload execution at multiple levels, including per experiment, phase, session, task, file, and statement.
@@ -142,17 +127,21 @@ Alternatively, if the LST-Bench [Metrics Processor](metrics) is used, you can si
 The processor will then analyze and visualize the results, providing a streamlined solution for result analysis and visualization.
 
 ## Documentation
-For more details about LST-Bench, please refer to the accompanying [technical report](https://arxiv.org/pdf/2305.01120):
+For more details about LST-Bench, please refer to the accompanying [technical report](https://arxiv.org/pdf/2305.01120).
+
+If you are writing an academic paper, you can cite this work as:
 
 ```bibtex
-@article{2023lstbench,
-    title={LST-Bench: Benchmarking Log-Structured Tables in the Cloud},
-    author={Jesús Camacho-Rodríguez and Ashvin Agrawal and Anja Gruenheid and
+@article{2024lstbench,
+    author = {Jes\'{u}s Camacho-Rodr\'{\i}guez and Ashvin Agrawal and Anja Gruenheid and
             Ashit Gosalia and Cristian Petculescu and Josep Aguilar-Saborit and
             Avrilia Floratou and Carlo Curino and Raghu Ramakrishnan},
-    year={2023},
-    journal={arXiv preprint arXiv:2305.01120},
-    url={https://arxiv.org/abs/2305.01120},
+    title = {LST-Bench: Benchmarking Log-Structured Tables in the Cloud},
+    journal = {Proc. ACM Manag. Data},
+    volume = {2},
+    number = {1},
+    year = {2024},
+    url = {https://doi.org/10.1145/3639314}
 }
 ```