benches/tables.py

#!/usr/bin/env python3
# Generates markdown tables from criterion textual output.
# Outputs to stdout or to the README.md file if provided as an argument.

import re
import sys

from tabulate import tabulate

MARKER = "<!-- AUTOGENERATED MARKER -->\n"


def main():
    out_file = sys.argv[1] if len(sys.argv) > 1 else None

    lines = sys.stdin.readlines()
    if len(lines) == 0:
        return
    while ":" not in lines[0]:
        lines = lines[1:]
    lines = list(filter(lambda x: "Warning" not in x, lines))
    if len(lines) == 0:
        return

    benchmark_re = re.compile(r"(\w+): (\d+) LoC, (\d+) bytes")
    benchmarks = []
    for line in lines:
        if line.strip() == "":
            break
        match = benchmark_re.match(line)
        if match:
            name, loc, bytes = match.groups()
            benchmarks.append((name, int(loc), int(bytes)))

    time = r"(\s*[\d\.]+ \w+)"
    data_re = re.compile(
        rf"parser/(\w+)/(\w+)/(\w+)\s*time:\s*\n?\s*\[{time}{time}{time}\]",
        flags=re.MULTILINE,
    )
    data = []
    min_times = {b[0]: {} for b in benchmarks}
    for match in data_re.findall("\n".join(lines)):
        bench_name, parser, kind, _time1, time2, _time3 = match
        time_ns = parse_time_s(time2)
        min_time_entry = min_times[bench_name].get(kind, [parser, time_ns])
        min_times[bench_name][kind] = [
            min_time_entry[0],
            min(min_time_entry[1], time_ns),
        ]
        data.append([bench_name, parser, kind, time2, time_ns])

    parsers = list(sorted(set(x[1] for x in data)))

    # Solc patch to remove base overhead
    base_solc_ns = -1
    for [bench_name, parser, _, _, ns] in data:
        if bench_name == "empty" and parser == "solc":
            base_solc_ns = ns
            break
    if base_solc_ns == -1:
        raise ValueError("Couldn't find base solc time")
    base_solc_ns -= 1_000  # keep 1us
    for i, [bench_name, parser, _, _, ns] in enumerate(data):
        if parser == "solc":
            data[i][4] -= base_solc_ns
            data[i][3] = format_ns(data[i][4])

    out_s = ""
    for bench_name, loc, bytes in benchmarks:
        out_s += f"### {bench_name} ({loc} LoC, {bytes} bytes)\n\n"

        for kind in ["lex", "parse"]:
            table = []
            table.append(
                [
                    "Parser",
                    "Relative",
                    "Time",
                    "LoC/s",
                    "Bytes/s",
                ]
            )

            for i, parser in enumerate(parsers):
                related = next(
                    (x for x in data if x[0:3] == [bench_name, parser, kind]),
                    None,
                )
                if not related:
                    continue

                min_time = min_times[bench_name][kind][1]
                time_ns = related[4]
                relative = format_number(time_ns / min_time) + "x"
                time_s = related[3]
                loc_s = get_per_second(loc, time_ns)
                bytes_s = get_per_second(bytes, time_ns)
                table.append(
                    [
                        parser,
                        relative,
                        time_s,
                        loc_s,
                        bytes_s,
                    ]
                )

            table[1:] = sorted(table[1:], key=lambda x: float(x[1][:-1]))

            out_s += f"#### {kind.capitalize()}\n"
            out_s += tabulate(table, headers="firstrow", tablefmt="pipe")
            out_s += "\n\n"

    out_s = out_s.rstrip()
    if out_file:
        with open(out_file, "r") as f:
            content = f.read()
        idx = content.index(MARKER) + len(MARKER)
        with open(out_file, "w") as f:
            f.write(content[:idx] + "\n" + out_s + "\n")
    else:
        print(out_s)


def parse_time_s(time: str):
    value, unit = time.strip().split(" ")
    value = float(value)
    if unit == "s":
        return int(value * 1_000_000_000)
    elif unit == "ms":
        return int(value * 1_000_000)
    elif unit == "us" or unit == "µs":
        return int(value * 1_000)
    elif unit == "ns":
        return int(value * 1)
    else:
        raise ValueError(f"Unknown unit: {unit}")


def get_per_second(total: int, ns: int):
    if total == 0 or ns == -1:
        return "N/A"

    s = ns / 1_000_000_000
    return format_number(total / s)


def format_number(n: float):
    if n >= 1_000_000_000:
        n /= 1_000_000_000
        s = "B"
    elif n >= 1_000_000:
        n /= 1_000_000
        s = "M"
    elif n >= 1_000:
        n /= 1_000
        s = "K"
    else:
        s = ""
    return f"{n:.2f}{s}"


def format_ns(ns: int):
    if ns >= 1_000_000_000:
        ns /= 1_000_000_000
        s = "s"
    elif ns >= 1_000_000:
        ns /= 1_000_000
        s = "ms"
    elif ns >= 1_000:
        ns /= 1_000
        s = "µs"
    else:
        s = "ns"
    return f"{ns:f}"[:6] + " " + s


if __name__ == "__main__":
    main()