Skip to content

Commit

Permalink
Tracks benchmark (#953)
Browse files Browse the repository at this point in the history
* wip

* lint

* wip

* wip

* show/hide dataset details

* add wrapped generators

* wip

* wip before the beach

* make metrics generic

* fix conflicts

* add first results

* improve benchmarks

* fix time

* wip

* wip

* add benchmarks to docs

* new results

* add shelve

* Update voting.py

* new benchmark results added

* housekeeping

* more housekeeping

* json needed due to different computer chips + shelve

* add models and datasets details

* refactor run

* Update run.py

* should work

* Update tracks.py

Co-authored-by: smastelini <saulomastelini@gmail.com>
Former-commit-id: a4b17e8
  • Loading branch information
MaxHalford and smastelini authored Jun 6, 2022
1 parent 85e037d commit c3fa6b0
Show file tree
Hide file tree
Showing 24 changed files with 4,909 additions and 135 deletions.
17 changes: 0 additions & 17 deletions benchmarks.html

This file was deleted.

20 changes: 15 additions & 5 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
# Benchmarks

Navigate to this directory and create a `conda` virtual environment, as so:
Navigate to the root of this repo and create a `conda` virtual environment, as so:

```sh
$ conda create -n river-benchmarks -y python==3.8.5
$ conda activate river-benchmarks
$ pip install -r requirements.txt
conda create -n river-benchmarks -y python==3.8.5
conda activate river-benchmarks
pip install -e ".[benchmarks]"
```

Note that this will install the development version of `river` from GitHub. You can change this behaviour by modifying `requirements.txt` before creating the virtual environment.
Then run the benchmarks:

```sh
python run.py
```

This creates a `results.json` file. To generate the page that gets displayed in the docs, do this:

```sh
python render.py
```
75 changes: 75 additions & 0 deletions benchmarks/details.json

Large diffs are not rendered by default.

132 changes: 132 additions & 0 deletions benchmarks/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import json
import dominate
from dominate.tags import *
from river import datasets
from slugify import slugify
from watermark import watermark

with open('results.json') as f:
benchmarks = json.load(f)

with open("details.json") as f:
models = json.load(f)

with open('../docs/benchmarks/index.md', 'w') as f:
print_ = lambda x: print(x, file=f, end='\n\n')
print_("""---
hide:
- navigation
---
""")
print_('# Benchmarks')

print_('## Environment')
print_(pre(watermark(python=True, packages='river,numpy,scikit-learn,pandas,scipy', machine=True)))

imports = div()
imports.add(link(href="https://unpkg.com/tabulator-tables@5.2.6/dist/css/tabulator.min.css", rel="stylesheet"))
imports.add(script(type="text/javascript", src="https://unpkg.com/tabulator-tables@5.2.6/dist/js/tabulator.min.js"))
print_(imports)

print_(script(dominate.util.raw("""
let baseColumns
let metrics
let columns
""")))


for track_name, results in benchmarks.items():
print_(f'## {track_name}')

print_("### Results")
print_(div(id=f"{slugify(track_name)}-results"))

print_("### Datasets")
for name, desc in models[track_name]["Dataset"].items():
_details = details()
_details.add(summary(name))
_details.add(pre(desc))
print_(_details)

print_("### Models")
for name, desc in models[track_name]["Model"].items():
_details = details()
_details.add(summary(name))
_details.add(pre(desc))
print_(_details)

print_(script(dominate.util.raw(f"""
var results = {results}
baseColumns = [
"Dataset",
"Model",
"Memory",
"Time"
]
metrics = Object.keys(results[0]).filter(x => !baseColumns.includes(x)).sort();
columns = [...baseColumns, ...metrics].map(x => ({{title: x, field: x}}))
function formatBytes(bytes, decimals = 2) {{
if (bytes === 0) return '0 Bytes'
const k = 1024;
const dm = decimals < 0 ? 0 : decimals;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i];
}}
function msToTime(s) {{
function pad(n, z) {{
z = z || 2;
return ('00' + n).slice(-z);
}}
var ms = s % 1000;
s = (s - ms) / 1000;
var secs = s % 60;
s = (s - secs) / 60;
var mins = s % 60;
var hrs = (s - mins) / 60;
return pad(hrs) + ':' + pad(mins) + ':' + pad(secs) + '.' + pad(ms, 3);
}}
columns.map((x, i) => {{
if (x.title === 'Dataset') {{
columns[i]["headerFilter"] = true
}}
if (x.title === 'Model') {{
columns[i]["headerFilter"] = true
}}
if (x.title === 'Memory') {{
columns[i]["formatter"] = function(cell, formatterParams, onRendered){{
return formatBytes(cell.getValue())
}}
}}
if (x.title === 'Time') {{
columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{
return msToTime(cell.getValue())
}}
}}
if (['Accuracy', 'F1'].includes(x.title)) {{
columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{
return (100 * cell.getValue()).toFixed(2) + "%"
}}
}}
if (['MAE', 'RMSE', 'R2'].includes(x.title)) {{
columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{
return cell.getValue().toFixed(3)
}}
}}
}})
new Tabulator('#{slugify(track_name)}-results', {{
data: results,
layout: 'fitColumns',
columns: columns
}})
""")))
8 changes: 0 additions & 8 deletions benchmarks/requirements.txt

This file was deleted.

Binary file added benchmarks/results.db
Binary file not shown.
Loading

0 comments on commit c3fa6b0

Please sign in to comment.