Skip to content

Commit 8a225d3

Browse files
committed
fix(benchmark): Update benchmark for new command
Refactor call to use cleaning inside method
1 parent cfd9f4b commit 8a225d3

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

benchmark/benchmark.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,15 @@ def generate_branch_report(self, branch: str) -> None:
5252
or row["html_anon_2020"]
5353
or row["html"]
5454
)
55+
params = {"clean_steps": ["html", "inline_whitespace"]}
5556
if text:
5657
# Remove XML encodings from xml_harvard
5758
text = re.sub(r"^<\?xml.*?\?>", "", text, count=1)
58-
opinion_text_is_marked_up = True
59+
params['markup_text'] = text or ""
5960
else:
60-
text = row["plain_text"]
61-
opinion_text_is_marked_up = False
61+
params['markup_text'] = row['plain_text']
6262

63-
plain_text = clean_text(text, ["html", "inline_whitespace"])
64-
found_citations = get_citations(
65-
plain_text,
66-
markup_text=text if opinion_text_is_marked_up else "",
67-
)
63+
found_citations = get_citations(**params)
6864

6965
# Get the citation text string from the cite object
7066
cites = [cite.token.data for cite in found_citations if cite.token]

0 commit comments

Comments
 (0)