Run pandas ASV on CI (#1)

pandas-dev · Dec 18, 2024 · 42aa0b5 · 42aa0b5
1 parent 5a8a21a
commit 42aa0b5
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 0 deletions.
diff --git a/.github/workflows/asv_test.yaml b/.github/workflows/asv_test.yaml
@@ -0,0 +1,98 @@
+name: Run ASV
+on:
+  schedule:
+    - cron: "0 0 * * *"
+
+env:
+  ENV_FILE: environment.yml
+  PANDAS_CI: 1
+  BRANCH_NAME: test
+
+permissions:
+  contents: read
+
+jobs:
+  produce-asv-benchmarks:
+    name: ASV Benchmarks
+    runs-on: ubuntu-24.04
+    defaults:
+      run:
+        shell: bash -el {0}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        repository: pandas-dev/pandas
+        fetch-depth: 0
+
+    - name: Set up Conda
+      uses: ./.github/actions/setup-conda
+
+    - name: Build pandas
+      uses: ./.github/actions/build_pandas
+
+    - name: Run ASV Benchmarks
+      run: |
+        cd asv_bench
+        asv machine --machine=asvrunner --yes
+        asv run --machine=asvrunner --python=same --set-commit-hash=$(git rev-parse HEAD) -b ^groupby.GroupByCythonAgg 
+        # Move to a standard location
+        mv results/asvrunner/$(git rev-parse --short=8 HEAD)*.json results.json
+
+    - name: Save JSON results as an artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: results.json
+        path: asv_bench/results.json
+        retention-days: 14
+
+  process-asv-benchmarks:
+    name: Process ASV Benchmarks
+    needs: produce-asv-benchmarks
+    runs-on: ubuntu-24.04
+    defaults:
+      run:
+        shell: bash -el {0}
+    permissions:
+      contents: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.BRANCH_NAME }}
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Upgrade build dependencies
+        run: pip install -U pip
+
+      - name: Install dependencies
+        run: pip install pandas pyarrow
+
+      - name: Download JSON results
+        uses: actions/download-artifact@v4
+        with:
+          name: results.json
+
+      - name: Process ASV results
+        run: |
+          python ci/process_results.py
+          
+      - name: Save parquet results as an artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: results.parquet
+          path: data/results.parquet
+          retention-days: 14
+
+      - name: Commit results
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_message: Results
+          branch: ${{ env.BRANCH_NAME }}
+          file_pattern: data/results.parquet
diff --git a/ci/process_results.py b/ci/process_results.py
@@ -0,0 +1,34 @@
+import json
+import datetime as dt
+import pyarrow as pa
+import itertools as it
+import pandas as pd
+import os
+
+input_filename = "results.json"
+output_filename = "results.parquet"
+results = json.load(open(input_filename))
+commit_hash = results["commit_hash"]
+columns = results["result_columns"]
+buf = {"name": [], "params": [], "result": []}
+for name, benchmark in results['results'].items():
+    data = dict(zip(columns, benchmark))
+    result = data["result"]
+    params = list(it.product(*data["params"]))
+    buf["name"].extend([name] * len(result))
+    buf["params"].extend(params)
+    buf["result"].extend(result)
+buf["name"] = pd.array(buf["name"], dtype="string[pyarrow]")
+buf["params"] = pd.array(buf["params"], dtype=pd.ArrowDtype(pa.list_(pa.string())))
+buf["result"] = pd.array(buf["result"], dtype="float64[pyarrow]")
+df = pd.DataFrame(buf)
+df["date"] = pd.array([dt.datetime.today()] * len(df), dtype=pd.ArrowDtype(pa.timestamp("us")))
+df["sha"] = pd.array([commit_hash] * len(df), dtype="string[pyarrow]")
+df = df[["date", "sha", "name", "params", "result"]]
+
+if os.path.exists("data/results.parquet"):
+    existing = pd.read_parquet("results.parquet")
+    final = pd.concat([existing, df])
+else:
+    final = df
+final.to_parquet(f"data/{output_filename}")
diff --git a/data/.gitignore b/data/.gitignore