Skip to content

Commit

Permalink
Run pandas ASV on CI (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored Dec 18, 2024
1 parent 5a8a21a commit 42aa0b5
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 0 deletions.
98 changes: 98 additions & 0 deletions .github/workflows/asv_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: Run ASV
on:
schedule:
- cron: "0 0 * * *"

env:
ENV_FILE: environment.yml
PANDAS_CI: 1
BRANCH_NAME: test

permissions:
contents: read

jobs:
produce-asv-benchmarks:
name: ASV Benchmarks
runs-on: ubuntu-24.04
defaults:
run:
shell: bash -el {0}

steps:
- name: Checkout
uses: actions/checkout@v4
with:
repository: pandas-dev/pandas
fetch-depth: 0

- name: Set up Conda
uses: ./.github/actions/setup-conda

- name: Build pandas
uses: ./.github/actions/build_pandas

- name: Run ASV Benchmarks
run: |
cd asv_bench
asv machine --machine=asvrunner --yes
asv run --machine=asvrunner --python=same --set-commit-hash=$(git rev-parse HEAD) -b ^groupby.GroupByCythonAgg
# Move to a standard location
mv results/asvrunner/$(git rev-parse --short=8 HEAD)*.json results.json
- name: Save JSON results as an artifact
uses: actions/upload-artifact@v4
with:
name: results.json
path: asv_bench/results.json
retention-days: 14

process-asv-benchmarks:
name: Process ASV Benchmarks
needs: produce-asv-benchmarks
runs-on: ubuntu-24.04
defaults:
run:
shell: bash -el {0}
permissions:
contents: write

steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ env.BRANCH_NAME }}

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Upgrade build dependencies
run: pip install -U pip

- name: Install dependencies
run: pip install pandas pyarrow

- name: Download JSON results
uses: actions/download-artifact@v4
with:
name: results.json

- name: Process ASV results
run: |
python ci/process_results.py
- name: Save parquet results as an artifact
uses: actions/upload-artifact@v4
with:
name: results.parquet
path: data/results.parquet
retention-days: 14

- name: Commit results
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: Results
branch: ${{ env.BRANCH_NAME }}
file_pattern: data/results.parquet
34 changes: 34 additions & 0 deletions ci/process_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json
import datetime as dt
import pyarrow as pa
import itertools as it
import pandas as pd
import os

input_filename = "results.json"
output_filename = "results.parquet"
results = json.load(open(input_filename))
commit_hash = results["commit_hash"]
columns = results["result_columns"]
buf = {"name": [], "params": [], "result": []}
for name, benchmark in results['results'].items():
data = dict(zip(columns, benchmark))
result = data["result"]
params = list(it.product(*data["params"]))
buf["name"].extend([name] * len(result))
buf["params"].extend(params)
buf["result"].extend(result)
buf["name"] = pd.array(buf["name"], dtype="string[pyarrow]")
buf["params"] = pd.array(buf["params"], dtype=pd.ArrowDtype(pa.list_(pa.string())))
buf["result"] = pd.array(buf["result"], dtype="float64[pyarrow]")
df = pd.DataFrame(buf)
df["date"] = pd.array([dt.datetime.today()] * len(df), dtype=pd.ArrowDtype(pa.timestamp("us")))
df["sha"] = pd.array([commit_hash] * len(df), dtype="string[pyarrow]")
df = df[["date", "sha", "name", "params", "result"]]

if os.path.exists("data/results.parquet"):
existing = pd.read_parquet("results.parquet")
final = pd.concat([existing, df])
else:
final = df
final.to_parquet(f"data/{output_filename}")
Empty file added data/.gitignore
Empty file.

0 comments on commit 42aa0b5

Please sign in to comment.