Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand line number match to a range of lines #58

Merged
merged 3 commits into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion reccmp/isledecomp/compare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ def orig_bin_checker(addr: int) -> bool:
# a lineref, we can match the nameref correctly because the lineref
# was already removed from consideration.
for fun in codebase.iter_line_functions():
recomp_addr = self._lines_db.search_line(fun.filename, fun.line_number)
recomp_addr = self._lines_db.search_line(
fun.filename, fun.line_number, fun.end_line
)
if recomp_addr is not None:
self._db.set_function_pair(fun.offset, recomp_addr)
if fun.should_skip():
Expand Down
52 changes: 38 additions & 14 deletions reccmp/isledecomp/compare/lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,18 @@
import sqlite3
import logging
from functools import cache
from typing import Optional
from pathlib import Path
from reccmp.isledecomp.dir import PathResolver


_SETUP_SQL = """
DROP TABLE IF EXISTS `lineref`;
CREATE TABLE `lineref` (
CREATE TABLE lineref (
path text not null,
filename text not null,
line int not null,
addr int not null
);
CREATE INDEX `file_line` ON `lineref` (filename, line);
CREATE INDEX file_line ON lineref (filename, line);
"""


Expand Down Expand Up @@ -46,25 +44,51 @@ def add_line(self, path: str, line_no: int, addr: int):
filename = my_basename_lower(sourcepath)

self._db.execute(
"INSERT INTO `lineref` (path, filename, line, addr) VALUES (?,?,?,?)",
"INSERT INTO lineref (path, filename, line, addr) VALUES (?,?,?,?)",
(sourcepath, filename, line_no, addr),
)

def search_line(self, path: str, line_no: int) -> Optional[int]:
"""Using path and line number from FUNCTION marker,
get the address of this function in the recomp."""
def search_line(
self, path: str, line_start: int, line_end: int | None = None
) -> int | None:
"""The database contains the first line of each function, as verified by
reducing the starting list of line-offset pairs using other information from the pdb.
We want to know if exactly one function exists between line start and line end
in the given file."""

# We might not capture the end line of a function. If not, search for the start line only.
if line_end is None:
line_end = line_start

# Search using the filename from the path to limit calls to Path.samefile.
# TODO: This should be refactored. Maybe sqlite isn't suited for this and we
# should store Path objects as dict keys instead.
filename = my_basename_lower(path)
cur = self._db.execute(
"SELECT path, addr FROM `lineref` WHERE filename = ? AND line = ?",
(filename, line_no),
"SELECT path, addr FROM lineref WHERE filename = ? AND line >= ? AND line <= ?",
(filename, line_start, line_end),
)
for source_path, addr in cur.fetchall():
if my_samefile(path, source_path):
return addr

possible_functions = [
addr for source_path, addr in cur if my_samefile(path, source_path)
]
if len(possible_functions) == 1:
return possible_functions[0]

# The file has been edited since the last compile.
if len(possible_functions) > 1:
logger.error(
"Debug data out of sync with function near: %s:%d",
path,
line_start,
)
return None

# No functions matched. This could mean the file is out of sync, or that
# the function was eliminated or inlined by compiler optimizations.
logger.error(
"Failed to find function symbol with filename and line: %s:%d",
path,
line_no,
line_start,
)
return None
6 changes: 3 additions & 3 deletions reccmp/isledecomp/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,17 +478,18 @@ def read_line(self, line: str):
# to help parsing.
self.function_sig = remove_trailing_comment(line_strip)

# The range of lines for this function begins when we see a non-blank line.
self._function_starts_here()

# Now check to see if the opening curly bracket is on the
# same line. clang-format should prevent this (BraceWrapping)
# but it is easy to detect.
# If the entire function is on one line, handle that too.
if self.function_sig.endswith("{"):
self._function_starts_here()
self.state = ReaderState.IN_FUNC
elif self.function_sig.endswith("}") or self.function_sig.endswith(
"};"
):
self._function_starts_here()
self._function_done()
elif self.function_sig.endswith(");"):
# Detect forward reference or declaration
Expand All @@ -499,7 +500,6 @@ def read_line(self, line: str):
elif self.state == ReaderState.WANT_CURLY:
if line_strip == "{":
self.curly_indent_stops = line.index("{")
self._function_starts_here()
self.state = ReaderState.IN_FUNC

elif self.state == ReaderState.IN_FUNC:
Expand Down
256 changes: 256 additions & 0 deletions tests/test_parser_line_number.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
"""Verify that the reported line number of each decomp item matches expectations."""
from textwrap import dedent # Indenting is important here.
import pytest
from reccmp.isledecomp.parser.parser import DecompParser


@pytest.fixture(name="parser")
def fixture_parser():
return DecompParser()


def test_function_one_liner(parser):
"""Entire function on one line"""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test() { hello(); world++; }
"""
)
)
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 2


def test_function_newline_curly(parser):
"""Allman style: curly braces always on their own line"""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test()
{
hello();
world++;
}
"""
)
)
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 6


def test_function_sameline_curly(parser):
"""K&R or 1TBS style: curly braces never on their own line"""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test() {
hello();
world++;
}
"""
)
)
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 5


@pytest.mark.xfail(reason="TODO")
def test_function_newline_curly_with_code(parser):
"""Pico/Lisp style. Same syntax as AFXWIN1.INL"""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test()
{ hello();
world++; }
"""
)
)
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 4


def test_function_with_other_markers(parser):
"""Correct reporting for function with STRING and GLOBAL markers"""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
const char* test()
{
// GLOBAL: TEST 0x2000
// STRING: TEST 0x3000
const char* msg = "Hello";

// GLOBAL: TEST 0x4000
static int g_count = 5;

// STRING: TEST 0x5000
return "Test";
}
"""
)
)

assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 13

# Variable and string on same line
assert parser.variables[0].line_number == 6
assert parser.strings[0].line_number == 6

# Variable by itself
assert parser.variables[1].line_number == 9

# String by itself
assert parser.strings[1].line_number == 12


def test_function_allman_unexpected_function_end(parser):
"""TODO: This should probably be a syntax error instead.
Documenting current behavior of ending the existing function gracefully
when a second FUNCTION marker is detected."""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test()
{
hello();
// FUNCTION: TEST 0x5555
}
"""
)
)

# Outer function ends at the line before the second FUNCTION mark
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 4


@pytest.mark.xfail(
reason="Missed function end because closing '}' did not match tab stops of opening '{'"
)
def test_function_unequal_curly(parser):
"""Similar to previous test except that we overshoot the range of the first function."""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test()
{
hello();
}

// FUNCTION: TEST 0x5555
void test2()
{
hello();
}
"""
)
)

assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 5
assert parser.functions[1].line_number == 8
assert parser.functions[1].end_line == 11


@pytest.mark.xfail(reason="Ends function too soon")
def test_function_no_tabbing(parser):
"""Should properly manage scope level even if curly brackets are not tabbed."""
parser.read(
dedent(
"""\
// FUNCTION: TEST 0x1234
void test()
{

if (1)
{
hello();
}
}
"""
)
)

assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 9


def test_synthetic(parser):
"""The SYNTHETIC marker can only be a nameref annotation."""
parser.read(
dedent(
"""\
// SYNTHETIC: TEST 0x1234
// SYNTHETIC: HELLO 0x5678
// Test
"""
)
)

# Reported line number is the comment with the name of the function
assert parser.functions[0].line_number == 3
assert parser.functions[0].end_line == 3


def test_string(parser):
parser.read(
dedent(
"""\
// STRING: TEST 0x1234
return "Hello";
"""
)
)

# Reported line number is the one with the string
assert parser.strings[0].line_number == 2
# TODO: enable when end_line is added
# assert parser.strings[0].end_line == 2


@pytest.mark.skip(
reason="No way to properly test this without end_line attribute for strings."
)
def test_string_mutiline_concat(parser):
"""Capture multiline strings with the line continuation character (backslash)"""
parser.read(
dedent(
"""\
// STRING: TEST 0x1234
const char* test = "Hello"
"World";
"""
)
)

assert parser.strings[0].line_number == 2
# TODO: enable when end_line is added
# assert parser.strings[0].end_line == 3


@pytest.mark.xfail(reason="Does not register as a string with our line-based parser.")
def test_string_line_continuation(parser):
"""Capture multiline strings with the line continuation character (backslash)"""
parser.read(
dedent(
"""\
// STRING: TEST 0x1234
return "Hello \\
World";
"""
)
)

assert parser.strings[0].line_number == 2
# TODO: enable when end_line is added
# assert parser.strings[0].end_line == 3
Loading
Loading