diff --git a/README.md b/README.md
index bf21d796..3a0a8900 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,10 @@ The next steps differ based on what kind of project you have.
 
 All scripts will become available to use in your terminal with the `reccmp-` prefix. Note that these scripts need to be executed in the directory where `reccmp-build.yml` is located.
 
+* [`aggregate`](/reccmp/tools/aggregate.py): Combines JSON reports into a single file.
+    * Aggregate using highest accuracy score: `reccmp-aggregate --samples ./sample0.json ./sample1.json ./sample2.json --output ./combined.json`
+    * Diff two saved reports: `reccmp-aggregate --diff ./before.json ./after.json`
+    * Diff against the aggregate: `reccmp-aggregate --samples ./sample0.json ./sample1.json ./sample2.json --diff ./before.json`
 * [`decomplint`](/reccmp/tools/decomplint.py): Checks the decompilation annotations (see above)
     * e.g. `reccmp-decomplint --module LEGO1 LEGO1`
 * [`reccmp`](/reccmp/tools/asmcmp.py): Compares an original binary with a recompiled binary, provided a PDB file. For example:
diff --git a/reccmp/assets/template.html b/reccmp/assets/template.html
index 3ac15b45..6518c5a6 100644
--- a/reccmp/assets/template.html
+++ b/reccmp/assets/template.html
@@ -180,7 +180,10 @@
         margin-bottom: 0;
       }
     </style>
-    <script>var data = {{{data}}};</script>
+    <script>
+      var report = {{{report}}};
+      var data = report["data"];
+    </script>
     <script>{{{reccmp_js}}}</script>
     </script>
   </head>
diff --git a/reccmp/isledecomp/compare/diff.py b/reccmp/isledecomp/compare/diff.py
index aebe1b6a..19c412bd 100644
--- a/reccmp/isledecomp/compare/diff.py
+++ b/reccmp/isledecomp/compare/diff.py
@@ -1,6 +1,5 @@
 from difflib import SequenceMatcher
-from typing import TypedDict
-from typing_extensions import NotRequired
+from typing_extensions import NotRequired, TypedDict
 
 CombinedDiffInput = list[tuple[str, str]]
 
diff --git a/reccmp/isledecomp/compare/report.py b/reccmp/isledecomp/compare/report.py
new file mode 100644
index 00000000..fe5ece9b
--- /dev/null
+++ b/reccmp/isledecomp/compare/report.py
@@ -0,0 +1,189 @@
+from datetime import datetime
+from dataclasses import dataclass
+from typing import Literal, Iterable, Iterator
+from pydantic import BaseModel, ValidationError
+from pydantic_core import from_json
+from .diff import CombinedDiffOutput
+
+
+class ReccmpReportDeserializeError(Exception):
+    """The given file is not a serialized reccmp report file"""
+
+
+class ReccmpReportSameSourceError(Exception):
+    """Tried to aggregate reports derived from different source files."""
+
+
+@dataclass
+class ReccmpComparedEntity:
+    orig_addr: str
+    name: str
+    accuracy: float
+    recomp_addr: str | None = None
+    is_effective_match: bool = False
+    is_stub: bool = False
+    diff: CombinedDiffOutput | None = None
+
+
+class ReccmpStatusReport:
+    # The filename of the original binary.
+    # This is here to avoid comparing reports derived from different files.
+    # TODO: in the future, we may want to use the hash instead
+    filename: str
+
+    # Creation date of the report file.
+    timestamp: datetime
+
+    # Using orig addr as the key.
+    entities: dict[str, ReccmpComparedEntity]
+
+    def __init__(self, filename: str, timestamp: datetime | None = None) -> None:
+        self.filename = filename
+        if timestamp is not None:
+            self.timestamp = timestamp
+        else:
+            self.timestamp = datetime.now().replace(microsecond=0)
+
+        self.entities = {}
+
+
+def _get_entity_for_addr(
+    samples: Iterable[ReccmpStatusReport], addr: str
+) -> Iterator[ReccmpComparedEntity]:
+    """Helper to return entities from xreports that have the given address."""
+    for sample in samples:
+        if addr in sample.entities:
+            yield sample.entities[addr]
+
+
+def _accuracy_sort_key(entity: ReccmpComparedEntity) -> float:
+    """Helper to sort entity samples by accuracy score.
+    100% match is preferred over effective match.
+    Effective match is preferred over any accuracy.
+    Stubs rank lower than any accuracy score."""
+    if entity.is_stub:
+        return -1.0
+
+    if entity.accuracy == 1.0:
+        if not entity.is_effective_match:
+            return 1000.0
+
+    if entity.is_effective_match:
+        return 1.0
+
+    return entity.accuracy
+
+
+def combine_reports(samples: list[ReccmpStatusReport]) -> ReccmpStatusReport:
+    """Combines the sample reports into a single report.
+    The current strategy is to use the entity with the highest
+    accuracy score from any report."""
+    assert len(samples) > 0
+
+    if not all(samples[0].filename == s.filename for s in samples):
+        raise ReccmpReportSameSourceError
+
+    output = ReccmpStatusReport(filename=samples[0].filename)
+
+    # Combine every orig addr used in any of the reports.
+    orig_addr_set = {key for sample in samples for key in sample.entities.keys()}
+
+    all_orig_addrs = sorted(list(orig_addr_set))
+
+    for addr in all_orig_addrs:
+        e_list = list(_get_entity_for_addr(samples, addr))
+        assert len(e_list) > 0
+
+        # Our aggregate accuracy score is the highest from any report.
+        e_list.sort(key=_accuracy_sort_key, reverse=True)
+
+        output.entities[addr] = e_list[0]
+
+        # Recomp addr will most likely vary between samples, so clear it
+        output.entities[addr].recomp_addr = None
+
+    return output
+
+
+#### JSON schemas and conversion functions ####
+
+
+@dataclass
+class JSONEntityVersion1:
+    address: str
+    name: str
+    matching: float
+    # Optional fields
+    recomp: str | None = None
+    stub: bool = False
+    effective: bool = False
+    diff: CombinedDiffOutput | None = None
+
+
+class JSONReportVersion1(BaseModel):
+    file: str
+    format: Literal[1]
+    timestamp: float
+    data: list[JSONEntityVersion1]
+
+
+def _serialize_version_1(
+    report: ReccmpStatusReport, diff_included: bool = False
+) -> JSONReportVersion1:
+    """The HTML file needs the diff data, but it is omitted from the JSON report."""
+    entities = [
+        JSONEntityVersion1(
+            address=addr,  # prefer dict key over redundant value in entity
+            name=e.name,
+            matching=e.accuracy,
+            recomp=e.recomp_addr,
+            stub=e.is_stub,
+            effective=e.is_effective_match,
+            diff=e.diff if diff_included else None,
+        )
+        for addr, e in report.entities.items()
+    ]
+
+    return JSONReportVersion1(
+        file=report.filename,
+        format=1,
+        timestamp=report.timestamp.timestamp(),
+        data=entities,
+    )
+
+
+def _deserialize_version_1(obj: JSONReportVersion1) -> ReccmpStatusReport:
+    report = ReccmpStatusReport(
+        filename=obj.file, timestamp=datetime.fromtimestamp(obj.timestamp)
+    )
+
+    for e in obj.data:
+        report.entities[e.address] = ReccmpComparedEntity(
+            orig_addr=e.address,
+            name=e.name,
+            accuracy=e.matching,
+            recomp_addr=e.recomp,
+            is_stub=e.stub,
+            is_effective_match=e.effective,
+        )
+
+    return report
+
+
+def deserialize_reccmp_report(json_str: str) -> ReccmpStatusReport:
+    try:
+        obj = JSONReportVersion1.model_validate(from_json(json_str))
+        return _deserialize_version_1(obj)
+    except ValidationError as ex:
+        raise ReccmpReportDeserializeError from ex
+
+
+def serialize_reccmp_report(
+    report: ReccmpStatusReport, diff_included: bool = False
+) -> str:
+    """Create a JSON string for the report so it can be written to a file."""
+    now = datetime.now().replace(microsecond=0)
+    report.timestamp = now
+    obj = _serialize_version_1(report, diff_included=diff_included)
+
+    return obj.model_dump_json(exclude_defaults=True)
diff --git a/reccmp/isledecomp/utils.py b/reccmp/isledecomp/utils.py
index fc7215d9..26b8f37a 100644
--- a/reccmp/isledecomp/utils.py
+++ b/reccmp/isledecomp/utils.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 import logging
-from pathlib import Path
 import colorama
+from reccmp.isledecomp.compare.report import ReccmpStatusReport, ReccmpComparedEntity
 
 
 def print_combined_diff(udiff, plain: bool = False, show_both: bool = False):
@@ -129,7 +129,9 @@ def diff_json_display(show_both_addrs: bool = False, is_plain: bool = False):
     """Generate a function that will display the diff according to
     the reccmp display preferences."""
 
-    def formatter(orig_addr, saved, new) -> str:
+    def formatter(
+        orig_addr, saved: ReccmpComparedEntity, new: ReccmpComparedEntity
+    ) -> str:
         old_pct = "new"
         new_pct = "gone"
         name = ""
@@ -138,29 +140,25 @@ def formatter(orig_addr, saved, new) -> str:
         if new is not None:
             new_pct = (
                 "stub"
-                if new.get("stub", False)
-                else percent_string(
-                    new["matching"], new.get("effective", False), is_plain
-                )
+                if new.is_stub
+                else percent_string(new.accuracy, new.is_effective_match, is_plain)
             )
 
             # Prefer the current name of this function if we have it.
             # We are using the original address as the key.
             # A function being renamed is not of interest here.
-            name = new.get("name", "")
-            recomp_addr = new.get("recomp", "n/a")
+            name = new.name
+            recomp_addr = new.recomp_addr or "n/a"
 
         if saved is not None:
             old_pct = (
                 "stub"
-                if saved.get("stub", False)
-                else percent_string(
-                    saved["matching"], saved.get("effective", False), is_plain
-                )
+                if saved.is_stub
+                else percent_string(saved.accuracy, saved.is_effective_match, is_plain)
             )
 
             if name == "":
-                name = saved.get("name", "")
+                name = saved.name
 
         if show_both_addrs:
             addr_string = f"{orig_addr} / {recomp_addr:10}"
@@ -176,29 +174,25 @@ def formatter(orig_addr, saved, new) -> str:
 
 
 def diff_json(
-    saved_data,
-    new_data,
-    orig_file: Path,
+    saved_data: ReccmpStatusReport,
+    new_data: ReccmpStatusReport,
     show_both_addrs: bool = False,
     is_plain: bool = False,
 ):
-    """Using a saved copy of the diff summary and the current data, print a
-    report showing which functions/symbols have changed match percentage."""
+    """Compare two status report files, determine what items changed, and print the result."""
 
     # Don't try to diff a report generated for a different binary file
-    base_file = orig_file.name.lower()
-
-    if saved_data.get("file") != base_file:
+    if saved_data.filename != new_data.filename:
         logging.getLogger().error(
             "Diff report for '%s' does not match current file '%s'",
-            saved_data.get("file"),
-            base_file,
+            saved_data.filename,
+            new_data.filename,
         )
         return
 
-    if "timestamp" in saved_data:
+    if saved_data.timestamp is not None:
         now = datetime.now().replace(microsecond=0)
-        then = datetime.fromtimestamp(saved_data["timestamp"]).replace(microsecond=0)
+        then = saved_data.timestamp.replace(microsecond=0)
 
         print(
             " ".join(
@@ -213,8 +207,8 @@ def diff_json(
         print()
 
     # Convert to dict, using orig_addr as key
-    saved_invert = {obj["address"]: obj for obj in saved_data["data"]}
-    new_invert = {obj["address"]: obj for obj in new_data}
+    saved_invert = saved_data.entities
+    new_invert = new_data.entities
 
     all_addrs = set(saved_invert.keys()).union(new_invert.keys())
 
@@ -227,60 +221,56 @@ def diff_json(
         for addr in sorted(all_addrs)
     }
 
+    DiffSubsectionType = dict[
+        str, tuple[ReccmpComparedEntity | None, ReccmpComparedEntity | None]
+    ]
+
     # The criteria for diff judgement is in these dict comprehensions:
     # Any function not in the saved file
-    new_functions = {
+    new_functions: DiffSubsectionType = {
         key: (saved, new) for key, (saved, new) in combined.items() if saved is None
     }
 
     # Any function now missing from the saved file
     # or a non-stub -> stub conversion
-    dropped_functions = {
+    dropped_functions: DiffSubsectionType = {
         key: (saved, new)
         for key, (saved, new) in combined.items()
         if new is None
-        or (
-            new is not None
-            and saved is not None
-            and new.get("stub", False)
-            and not saved.get("stub", False)
-        )
+        or (new is not None and saved is not None and new.is_stub and not saved.is_stub)
     }
 
     # TODO: move these two into functions if the assessment gets more complex
     # Any function with increased match percentage
     # or stub -> non-stub conversion
-    improved_functions = {
+    improved_functions: DiffSubsectionType = {
         key: (saved, new)
         for key, (saved, new) in combined.items()
         if saved is not None
         and new is not None
-        and (
-            new["matching"] > saved["matching"]
-            or (not new.get("stub", False) and saved.get("stub", False))
-        )
+        and (new.accuracy > saved.accuracy or (not new.is_stub and saved.is_stub))
     }
 
     # Any non-stub function with decreased match percentage
-    degraded_functions = {
+    degraded_functions: DiffSubsectionType = {
         key: (saved, new)
         for key, (saved, new) in combined.items()
         if saved is not None
         and new is not None
-        and new["matching"] < saved["matching"]
-        and not saved.get("stub")
-        and not new.get("stub")
+        and new.accuracy < saved.accuracy
+        and not saved.is_stub
+        and not new.is_stub
     }
 
     # Any function with former or current "effective" match
-    entropy_functions = {
+    entropy_functions: DiffSubsectionType = {
         key: (saved, new)
         for key, (saved, new) in combined.items()
         if saved is not None
         and new is not None
-        and new["matching"] == 1.0
-        and saved["matching"] == 1.0
-        and new.get("effective", False) != saved.get("effective", False)
+        and new.accuracy == 1.0
+        and saved.accuracy == 1.0
+        and new.is_effective_match != saved.is_effective_match
     }
 
     get_diff_str = diff_json_display(show_both_addrs, is_plain)
diff --git a/reccmp/tools/aggregate.py b/reccmp/tools/aggregate.py
new file mode 100644
index 00000000..563d5cad
--- /dev/null
+++ b/reccmp/tools/aggregate.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+import argparse
+import logging
+from typing import Sequence
+from pathlib import Path
+from reccmp.isledecomp.utils import diff_json
+from reccmp.isledecomp.compare.report import (
+    ReccmpStatusReport,
+    combine_reports,
+    ReccmpReportDeserializeError,
+    ReccmpReportSameSourceError,
+    deserialize_reccmp_report,
+    serialize_reccmp_report,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+def write_report_file(output_file: Path, report: ReccmpStatusReport):
+    """Convert the status report to JSON and write to a file."""
+    json_str = serialize_reccmp_report(report)
+
+    with open(output_file, "w+", encoding="utf-8") as f:
+        f.write(json_str)
+
+
+def load_report_file(report_path: Path) -> ReccmpStatusReport:
+    """Deserialize from JSON at the given filename and return the report."""
+
+    with report_path.open("r", encoding="utf-8") as f:
+        return deserialize_reccmp_report(f.read())
+
+
+def deserialize_sample_files(paths: list[Path]) -> list[ReccmpStatusReport]:
+    """Deserialize all sample files and return the list of reports.
+    Does not remove duplicates."""
+    samples = []
+
+    for path in paths:
+        if path.is_file():
+            try:
+                report = load_report_file(path)
+                samples.append(report)
+            except ReccmpReportDeserializeError:
+                logger.warning("Skipping '%s' due to import error", path)
+        elif not path.exists():
+            logger.warning("File not found: '%s'", path)
+
+    return samples
+
+
+class TwoOrMoreArgsAction(argparse.Action):
+    """Support nargs=2+"""
+
+    def __call__(
+        self, parser, namespace, values: Sequence[str] | None, option_string=None
+    ):
+        assert isinstance(values, Sequence)
+        if len(values) < 2:
+            raise argparse.ArgumentError(self, "expected two or more arguments")
+
+        setattr(namespace, self.dest, values)
+
+
+class TwoOrFewerArgsAction(argparse.Action):
+    """Support nargs=(1,2)"""
+
+    def __call__(
+        self, parser, namespace, values: Sequence[str] | None, option_string=None
+    ):
+        assert isinstance(values, Sequence)
+        if len(values) not in (1, 2):
+            raise argparse.ArgumentError(self, "expected one or two arguments")
+
+        setattr(namespace, self.dest, values)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        allow_abbrev=False,
+        description="Aggregate saved accuracy reports.",
+    )
+    parser.add_argument(
+        "--diff",
+        type=Path,
+        metavar="<files>",
+        nargs="+",
+        action=TwoOrFewerArgsAction,
+        help="Report files to diff.",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        type=Path,
+        metavar="<file>",
+        help="Where to save the aggregate file.",
+    )
+    parser.add_argument(
+        "--samples",
+        type=Path,
+        metavar="<files>",
+        nargs="+",
+        action=TwoOrMoreArgsAction,
+        help="Report files to aggregate.",
+    )
+    parser.add_argument(
+        "--no-color", "-n", action="store_true", help="Do not color the output"
+    )
+
+    args = parser.parse_args()
+
+    if not (args.samples or args.diff):
+        parser.error(
+            "exepected arguments for --samples or --diff. (No input files specified)"
+        )
+
+    if not (args.output or args.diff):
+        parser.error(
+            "expected arguments for --output or --diff. (No output action specified)"
+        )
+
+    agg_report: ReccmpStatusReport | None = None
+
+    if args.samples is not None:
+        samples = deserialize_sample_files(args.samples)
+
+        if len(samples) < 2:
+            logger.error("Not enough samples to aggregate!")
+            return 1
+
+        try:
+            agg_report = combine_reports(samples)
+        except ReccmpReportSameSourceError:
+            filename_list = sorted({s.filename for s in samples})
+            logger.error(
+                "Aggregate samples are not from the same source file!\nFilenames used: %s",
+                filename_list,
+            )
+            return 1
+
+        if args.output is not None:
+            write_report_file(args.output, agg_report)
+
+    # If --diff has at least one file and we aggregated some samples this run, diff the first file and the aggregate.
+    # If --diff has two files and we did not aggregate this run, diff the files in the list.
+    if args.diff is not None:
+        saved_data = load_report_file(args.diff[0])
+
+        if agg_report is None:
+            if len(args.diff) > 1:
+                agg_report = load_report_file(args.diff[1])
+            else:
+                logger.error("Not enough files to diff!")
+                return 1
+        elif len(args.diff) == 2:
+            logger.warning(
+                "Ignoring second --diff argument '%s'.\nDiff of '%s' and aggregate report follows.",
+                args.diff[1],
+                args.diff[0],
+            )
+
+        diff_json(saved_data, agg_report, show_both_addrs=False, is_plain=args.no_color)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/reccmp/tools/asmcmp.py b/reccmp/tools/asmcmp.py
index 0e5eb16c..59c11781 100755
--- a/reccmp/tools/asmcmp.py
+++ b/reccmp/tools/asmcmp.py
@@ -2,11 +2,8 @@
 
 import argparse
 import base64
-import json
 import logging
 import os
-from datetime import datetime
-from pathlib import Path
 
 from pystache import Renderer  # type: ignore[import-untyped]
 import colorama
@@ -18,6 +15,12 @@
 )
 
 from reccmp.isledecomp.compare import Compare as IsleCompare
+from reccmp.isledecomp.compare.report import (
+    ReccmpStatusReport,
+    ReccmpComparedEntity,
+    deserialize_reccmp_report,
+    serialize_reccmp_report,
+)
 from reccmp.isledecomp.formats.detect import detect_image
 from reccmp.isledecomp.formats.pe import PEImage
 from reccmp.isledecomp.types import EntityType
@@ -34,39 +37,21 @@
 colorama.just_fix_windows_console()
 
 
-def gen_json(json_file: str, orig_file: Path, data):
-    """Create a JSON file that contains the comparison summary"""
-
-    # If the structure of the JSON file ever changes, we would run into a problem
-    # reading an older format file in the CI action. Mark which version we are
-    # generating so we could potentially address this down the road.
-    json_format_version = 1
-
-    # Remove the diff field
-    reduced_data = [
-        {key: value for (key, value) in obj.items() if key != "diff"} for obj in data
-    ]
+def gen_json(json_file: str, json_str: str):
+    """Convert the status report to JSON and write to a file."""
 
     with open(json_file, "w", encoding="utf-8") as f:
-        json.dump(
-            {
-                "file": orig_file.name.lower(),
-                "format": json_format_version,
-                "timestamp": datetime.now().timestamp(),
-                "data": reduced_data,
-            },
-            f,
-        )
+        f.write(json_str)
 
 
-def gen_html(html_file, data):
+def gen_html(html_file: str, report: str):
     js_path = get_asset_file("../assets/reccmp.js")
     with open(js_path, "r", encoding="utf-8") as f:
         reccmp_js = f.read()
 
     output_data = Renderer().render_path(
         get_asset_file("../assets/template.html"),
-        {"data": data, "reccmp_js": reccmp_js},
+        {"report": report, "reccmp_js": reccmp_js},
     )
 
     with open(html_file, "w", encoding="utf-8") as htmlfile:
@@ -267,7 +252,8 @@ def main():
     function_count = 0
     total_accuracy = 0.0
     total_effective_accuracy = 0.0
-    htmlinsert = []
+
+    report = ReccmpStatusReport(filename=target.original_path.name.lower())
 
     for match in isle_compare.compare_all():
         if not args.silent and args.diff is None:
@@ -287,44 +273,38 @@ def main():
             total_effective_accuracy += match.effective_ratio
 
         # If html, record the diffs to an HTML file
-        html_obj = {
-            "address": f"0x{match.orig_addr:x}",
-            "recomp": f"0x{match.recomp_addr:x}",
-            "name": match.name,
-            "matching": match.effective_ratio,
-        }
-
-        if match.is_effective_match:
-            html_obj["effective"] = True
-
-        if match.udiff is not None:
-            html_obj["diff"] = match.udiff
-
-        if match.is_stub:
-            html_obj["stub"] = True
-
-        htmlinsert.append(html_obj)
+        orig_addr = f"0x{match.orig_addr:x}"
+        recomp_addr = f"0x{match.recomp_addr:x}"
+
+        report.entities[orig_addr] = ReccmpComparedEntity(
+            orig_addr=orig_addr,
+            name=match.name,
+            accuracy=match.effective_ratio,
+            recomp_addr=recomp_addr,
+            is_effective_match=match.is_effective_match,
+            is_stub=match.is_stub,
+            diff=match.udiff,
+        )
 
     # Compare with saved diff report.
     if args.diff is not None:
         with open(args.diff, "r", encoding="utf-8") as f:
-            saved_data = json.load(f)
-
-            diff_json(
-                saved_data,
-                htmlinsert,
-                target.original_path,
-                show_both_addrs=args.print_rec_addr,
-                is_plain=args.no_color,
-            )
+            saved_data = deserialize_reccmp_report(f.read())
+
+        diff_json(
+            saved_data,
+            report,
+            show_both_addrs=args.print_rec_addr,
+            is_plain=args.no_color,
+        )
 
     ## Generate files and show summary.
 
     if args.json is not None:
-        gen_json(args.json, target.original_path, htmlinsert)
+        gen_json(args.json, serialize_reccmp_report(report))
 
     if args.html is not None:
-        gen_html(args.html, json.dumps(htmlinsert))
+        gen_html(args.html, serialize_reccmp_report(report, diff_included=True))
 
     implemented_funcs = function_count
 
diff --git a/setup.cfg b/setup.cfg
index 6458a6f5..629c9862 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -21,6 +21,7 @@ exclude =
 
 [options.entry_points]
 console_scripts =
+    reccmp-aggregate = reccmp.tools.aggregate:main
     reccmp-datacmp = reccmp.tools.datacmp:main
     reccmp-decomplint = reccmp.tools.decomplint:main
     reccmp-project = reccmp.tools.project:main
diff --git a/tests/test_report.py b/tests/test_report.py
new file mode 100644
index 00000000..11510d3c
--- /dev/null
+++ b/tests/test_report.py
@@ -0,0 +1,118 @@
+"""Reccmp reports: files that contain the comparison result from asmcmp."""
+import pytest
+from reccmp.isledecomp.compare.report import (
+    ReccmpStatusReport,
+    ReccmpComparedEntity,
+    combine_reports,
+    ReccmpReportSameSourceError,
+)
+
+
+def create_report(
+    entities: list[tuple[str, float]] | None = None
+) -> ReccmpStatusReport:
+    """Helper to quickly set up a report to be customized further for each test."""
+    report = ReccmpStatusReport(filename="test.exe")
+    if entities is not None:
+        for addr, accuracy in entities:
+            report.entities[addr] = ReccmpComparedEntity(addr, "test", accuracy)
+
+    return report
+
+
+def test_aggregate_identity():
+    """Combine a list of one report. Should get the same report back,
+    except for expected differences like the timestamp."""
+    report = create_report([("100", 1.0), ("200", 0.5)])
+    combined = combine_reports([report])
+
+    for (a_key, a_entity), (b_key, b_entity) in zip(
+        report.entities.items(), combined.entities.items()
+    ):
+        assert a_key == b_key
+        assert a_entity.orig_addr == b_entity.orig_addr
+        assert a_entity.accuracy == b_entity.accuracy
+
+
+def test_aggregate_simple():
+    """Should choose the best score from the sample reports."""
+    x = create_report([("100", 0.8), ("200", 0.2)])
+    y = create_report([("100", 0.2), ("200", 0.8)])
+
+    combined = combine_reports([x, y])
+    assert combined.entities["100"].accuracy == 0.8
+    assert combined.entities["200"].accuracy == 0.8
+
+
+def test_aggregate_union_all_addrs():
+    """Should combine all addresses from any report."""
+    x = create_report([("100", 0.8)])
+    y = create_report([("200", 0.8)])
+
+    combined = combine_reports([x, y])
+    assert "100" in combined.entities
+    assert "200" in combined.entities
+
+
+def test_aggregate_stubs():
+    """Stub functions (i.e. do not compare asm) are considered to have 0 percent accuracy."""
+    x = create_report([("100", 0.9)])
+    y = create_report([("100", 0.5)])
+
+    # In a real report, accuracy would be zero for a stub.
+    x.entities["100"].is_stub = True
+    y.entities["100"].is_stub = False
+
+    combined = combine_reports([x, y])
+    assert combined.entities["100"].is_stub is False
+
+    # Choose the lower non-stub value
+    assert combined.entities["100"].accuracy == 0.5
+
+
+def test_aggregate_all_stubs():
+    """If all samples are stubs, preserve that setting."""
+    x = create_report([("100", 1.0)])
+
+    x.entities["100"].is_stub = True
+
+    combined = combine_reports([x, x])
+    assert combined.entities["100"].is_stub is True
+
+
+def test_aggregate_100_over_effective():
+    """Prefer 100% match over effective."""
+    x = create_report([("100", 0.9)])
+    y = create_report([("100", 1.0)])
+    x.entities["100"].is_effective_match = True
+
+    combined = combine_reports([x, y])
+    assert combined.entities["100"].is_effective_match is False
+
+
+def test_aggregate_effective_over_any():
+    """Prefer effective match over any accuracy."""
+    x = create_report([("100", 0.5)])
+    y = create_report([("100", 0.6)])
+    x.entities["100"].is_effective_match = True
+    # Y has higher accuracy score, but we could not confirm an effective match.
+
+    combined = combine_reports([x, y])
+    assert combined.entities["100"].is_effective_match is True
+
+    # Should retain original accuracy for effective match.
+    assert combined.entities["100"].accuracy == 0.5
+
+
+def test_aggregate_different_files():
+    """Should raise an exception if we try to aggregate reports
+    where the orig filename does not match."""
+    x = create_report()
+    y = create_report()
+
+    # Make sure they are different, regardless of what is set by create_report().
+    x.filename = "test.exe"
+    y.filename = "hello.exe"
+
+    with pytest.raises(ReccmpReportSameSourceError):
+        combine_reports([x, y])