From 8e66ed1d06fb8fcad06b6b03247a17bc867820ba Mon Sep 17 00:00:00 2001
From: William Palin
Date: Wed, 19 Mar 2025 13:30:06 -0400
Subject: [PATCH 1/6] feat(eyecite): Init Document object for better citation
parsing
With the introduction of markup parsing,
handling multiple parameters became unwieldy.
To address this, I added `Document` object that encapsulates:
- Plain and markup text
- Span updates for text mapping
- Tokenized words and extracted citation tokens
This refactor should enable:
- **More complex parsing**, using html
- **Simplified logic** by reducing the number of parameters passed around
Additionally, it simplifies the api call
by allowing a user to pass in markup or plain text but not
needing to do both
---
eyecite/find.py | 111 ++++++++++++++++---------------------
eyecite/models.py | 40 +++++++++++++
tests/test_AnnotateTest.py | 15 +++--
tests/test_FindTest.py | 46 ++++++++-------
tests/test_ResolveTest.py | 8 ++-
5 files changed, 131 insertions(+), 89 deletions(-)
diff --git a/eyecite/find.py b/eyecite/find.py
index de322d60..04577753 100644
--- a/eyecite/find.py
+++ b/eyecite/find.py
@@ -1,8 +1,7 @@
import re
from bisect import bisect_left, bisect_right
-from typing import List, Optional, Type, cast
+from typing import Callable, Iterable, List, Optional, Type, Union, cast
-from eyecite.annotate import SpanUpdater
from eyecite.helpers import (
disambiguate_reporters,
extract_pin_cite,
@@ -14,6 +13,7 @@
CaseReferenceToken,
CitationBase,
CitationToken,
+ Document,
FullCaseCitation,
FullCitation,
FullJournalCitation,
@@ -35,15 +35,16 @@
def get_citations(
- plain_text: str,
+ plain_text: str = "",
remove_ambiguous: bool = False,
tokenizer: Tokenizer = default_tokenizer,
markup_text: str = "",
+ clean_steps: Optional[Iterable[Union[str, Callable[[str], str]]]] = None,
) -> List[CitationBase]:
"""This is eyecite's main workhorse function. Given a string of text
- (e.g., a judicial opinion or other legal document), return a list of
+ (e.g., a judicial opinion or other legal doc), return a list of
`eyecite.models.CitationBase` objects representing the citations found
- in the document.
+ in the doc.
Args:
plain_text: The text to parse. You may wish to use the
@@ -57,6 +58,7 @@ def get_citations(
markup_text: if the source text has markup (XML or HTML mostly), pass
it to extract ReferenceCitations that may be detectable via
markup style tags
+ clean_steps: Cleanup steps and methods
Returns:
A list of `eyecite.models.CitationBase` objects
@@ -64,16 +66,14 @@ def get_citations(
if plain_text == "eyecite":
return joke_cite
- words, citation_tokens = tokenizer.tokenize(plain_text)
+ document = Document(
+ plain_text=plain_text,
+ markup_text=markup_text,
+ clean_steps=clean_steps,
+ )
+ document.tokenize(tokenizer=tokenizer)
citations: list[CitationBase] = []
-
- if markup_text:
- plain_to_markup = SpanUpdater(plain_text, markup_text)
- markup_to_plain = SpanUpdater(markup_text, plain_text)
- else:
- plain_to_markup, markup_to_plain = None, None
-
- for i, token in citation_tokens:
+ for i, token in document.citation_tokens:
citation: CitationBase
token_type = type(token)
@@ -84,36 +84,30 @@ def get_citations(
if token_type is CitationToken:
citation_token = cast(CitationToken, token)
if citation_token.short:
- citation = _extract_shortform_citation(words, i)
+ citation = _extract_shortform_citation(document.words, i)
else:
- citation = _extract_full_citation(words, i)
+ citation = _extract_full_citation(document.words, i)
if citations and isinstance(citation, FullCitation):
citation.is_parallel_citation(citations[-1])
- # Check for reference citations that follow a full citation
- # Using the plaintiff or defendant
- references = extract_reference_citations(
- citation,
- plain_text,
- markup_text,
- plain_to_markup,
- markup_to_plain,
- )
- citations.extend(references)
+ # Check for reference citations that follow a full citation
+ # Using the plaintiff or defendant
+ references = extract_reference_citations(citation, document)
+ citations.extend(references)
# CASE 2: Token is an "Id." or "Ibid." reference.
# In this case, the citation should simply be to the item cited
# immediately prior, but for safety we will leave that resolution up
# to the user.
elif token_type is IdToken:
- citation = _extract_id_citation(words, i)
+ citation = _extract_id_citation(document.words, i)
# CASE 3: Token is a "supra" reference.
# In this case, we're not sure yet what the citation's antecedent is.
# It could be any of the previous citations above. Thus, like an Id.
# citation, for safety we won't resolve this reference yet.
elif token_type is SupraToken:
- citation = _extract_supra_citation(words, i)
+ citation = _extract_supra_citation(document.words, i)
# CASE 4: Token is a section marker.
# In this case, it's likely that this is a reference to a citation,
@@ -137,48 +131,36 @@ def get_citations(
citations = disambiguate_reporters(citations)
# Returns a list of citations ordered in the sequence that they appear in
- # the document. The ordering of this list is important for reconstructing
+ # the doc. The ordering of this list is important for reconstructing
# the references of the ShortCaseCitation, SupraCitation, and
# IdCitation and ReferenceCitation objects.
return citations
def extract_reference_citations(
- citation: FullCitation,
- plain_text: str,
- markup_text: str = "",
- plain_to_markup: Optional[SpanUpdater] = None,
- markup_to_plain: Optional[SpanUpdater] = None,
+ citation: ResourceCitation, document: Document
) -> List[ReferenceCitation]:
"""Extract reference citations that follow a full citation
:param citation: the full case citation found
- :param plain_text: the text
- :param markup_text: optional argument for source text with XML style tags
- that may help extracting name-only ReferenceCitations
- :param plain_to_markup: a SpanUpdater from plain or clean text to
- marked up text
- :param markup_to_plain: a SpanUpdater from marked up text to plain text
+ :param document: document object to parse
:return: Reference citations
"""
- if len(plain_text) <= citation.span()[-1]:
+ if len(document.plain_text) <= citation.span()[-1]:
return []
if not isinstance(citation, FullCaseCitation):
return []
reference_citations = extract_pincited_reference_citations(
- citation, plain_text
+ citation, document.plain_text
)
- if markup_text:
+ if document.markup_text:
reference_citations.extend(
find_reference_citations_from_markup(
- markup_text,
- plain_text,
+ document,
[citation],
- plain_to_markup,
- markup_to_plain,
)
)
@@ -392,11 +374,8 @@ def _extract_id_citation(
def find_reference_citations_from_markup(
- markup_text: str,
- plain_text: str,
+ document: Document,
citations: list,
- plain_to_markup: Optional[SpanUpdater] = None,
- markup_to_plain: Optional[SpanUpdater] = None,
) -> list[ReferenceCitation]:
"""Use HTML/XML style tags and parties names to find ReferenceCitations
@@ -420,11 +399,6 @@ def find_reference_citations_from_markup(
:return: a list of ReferenceCitations
"""
- if not markup_to_plain:
- markup_to_plain = SpanUpdater(markup_text, plain_text)
- if not plain_to_markup:
- plain_to_markup = SpanUpdater(plain_text, markup_text)
-
references = []
tags = "|".join(["em", "i"])
@@ -453,30 +427,39 @@ def find_reference_citations_from_markup(
# `utils.maybe_balance_style tags` for reference; it has some tolerance
# which may be enough for these citations
regex = rf"<(?:{tags})>\s*({'|'.join(regexes)})[:;.,\s]*(?:{tags})>"
- start_in_markup = plain_to_markup.update(
+
+ if (
+ not document.plain_to_markup
+ or not document.markup_to_plain
+ or not document.markup_text
+ ):
+ # ensure we have markup text
+ return []
+ start_in_markup = document.plain_to_markup.update(
citation.span()[0], bisect_right
)
- for match in re.finditer(regex, markup_text[start_in_markup:]):
- full_start_in_plain = markup_to_plain.update(
+ for match in re.finditer(
+ regex, document.markup_text[start_in_markup:]
+ ):
+ full_start_in_plain = document.markup_to_plain.update(
start_in_markup + match.start(), bisect_left
)
- full_end_in_plain = markup_to_plain.update(
+ full_end_in_plain = document.markup_to_plain.update(
start_in_markup + match.end(), bisect_right
)
# the first group [match.group(0)] is the whole match,
# with whitespace and punctuation. the second group, match.group(1)
# is the only capturing and named group
- start_in_plain = markup_to_plain.update(
+ start_in_plain = document.markup_to_plain.update(
start_in_markup + match.start(1), bisect_left
)
- end_in_plain = markup_to_plain.update(
+ end_in_plain = document.markup_to_plain.update(
start_in_markup + match.end(1), bisect_right
)
-
reference = ReferenceCitation(
token=CaseReferenceToken(
- data=plain_text[start_in_plain:end_in_plain],
+ data=document.plain_text[start_in_plain:end_in_plain],
start=start_in_plain,
end=end_in_plain,
),
diff --git a/eyecite/models.py b/eyecite/models.py
index 0090d584..5e78eb4e 100644
--- a/eyecite/models.py
+++ b/eyecite/models.py
@@ -7,6 +7,7 @@
Callable,
Dict,
Hashable,
+ Iterable,
List,
Optional,
Sequence,
@@ -15,6 +16,8 @@
cast,
)
+from eyecite import clean_text
+from eyecite.annotate import SpanUpdater
from eyecite.utils import REPORTERS_THAT_NEED_PAGE_CORRECTION, hash_sha256
ResourceType = Hashable
@@ -861,3 +864,40 @@ def __hash__(self):
def __eq__(self, other):
return self.__hash__() == other.__hash__()
+
+
+@dataclass(eq=False, unsafe_hash=False)
+class Document:
+ plain_text: str = ""
+ markup_text: Optional[str] = ""
+ citation_tokens: list[Tuple[int, Token]] = field(default_factory=list)
+ words: Tokens = field(default_factory=list)
+ plain_to_markup: Optional[SpanUpdater] = field(default=None, init=False)
+ markup_to_plain: Optional[SpanUpdater] = field(default=None, init=False)
+ clean_steps: Optional[Iterable[Union[str, Callable[[str], str]]]] = field(
+ default_factory=list
+ )
+
+ def __post_init__(self):
+ if self.plain_text and self.clean_steps:
+ self.plain_text = clean_text(self.plain_text, self.clean_steps)
+
+ if self.markup_text != "":
+ if "html" not in self.clean_steps:
+ raise (
+ "`html` is a required cleanup step for markup text",
+ self.markup_text,
+ )
+
+ self.plain_text = clean_text(self.markup_text, self.clean_steps)
+
+ self.plain_to_markup = SpanUpdater(
+ self.plain_text, self.markup_text
+ )
+ self.markup_to_plain = SpanUpdater(
+ self.markup_text, self.plain_text
+ )
+
+ def tokenize(self, tokenizer):
+ # Tokenize the document and store the results in the document object
+ self.words, self.citation_tokens = tokenizer.tokenize(self.plain_text)
diff --git a/tests/test_AnnotateTest.py b/tests/test_AnnotateTest.py
index 46a49b2d..c4fa127f 100644
--- a/tests/test_AnnotateTest.py
+++ b/tests/test_AnnotateTest.py
@@ -3,6 +3,7 @@
from unittest import TestCase
from eyecite import annotate_citations, clean_text, get_citations
+from eyecite.models import Document
from eyecite.utils import maybe_balance_style_tags
@@ -207,12 +208,18 @@ def lower_annotator(before, text, after):
clean_steps=clean_steps,
annotate_args=annotate_kwargs,
):
- get_citations_args = {}
if annotate_kwargs.pop("use_markup", False):
get_citations_args = {"markup_text": source_text}
+ else:
+ get_citations_args = {"plain_text": source_text}
- plain_text = clean_text(source_text, clean_steps)
- cites = get_citations(plain_text, **get_citations_args)
+ document = Document(
+ **get_citations_args, clean_steps=clean_steps
+ )
+
+ cites = get_citations(
+ **get_citations_args, clean_steps=clean_steps
+ )
annotations = [
(c.span(), f"<{i}>", f"{i}>")
for i, c in enumerate(cites)
@@ -225,7 +232,7 @@ def lower_annotator(before, text, after):
]
annotated = annotate_citations(
- plain_text,
+ document.plain_text,
annotations,
source_text=source_text,
**annotate_kwargs,
diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py
index 51ba4ef7..64406e79 100644
--- a/tests/test_FindTest.py
+++ b/tests/test_FindTest.py
@@ -3,13 +3,14 @@
from datetime import datetime
from unittest import TestCase
-from eyecite import clean_text, get_citations
+from eyecite import get_citations
from eyecite.find import extract_reference_citations
from eyecite.helpers import filter_citations
# by default tests use a cache for speed
# call tests with `EYECITE_CACHE_DIR= python ...` to disable cache
from eyecite.models import (
+ Document,
FullCaseCitation,
ReferenceCitation,
ResourceCitation,
@@ -60,15 +61,17 @@ def get_comparison_attrs(cite):
tokenizers = tested_tokenizers
for q, expected_cites, *kwargs in test_pairs:
kwargs = kwargs[0] if kwargs else {}
- clean_steps = kwargs.pop("clean", [])
- clean_q = clean_text(q, clean_steps)
+ clean_steps = kwargs.get("clean_steps", [])
for tokenizer in tokenizers:
with self.subTest(
message, tokenizer=type(tokenizer).__name__, q=q
):
- cites_found = get_citations(
- clean_q, tokenizer=tokenizer, **kwargs
- )
+ if "html" in clean_steps:
+ kwargs["markup_text"] = q
+ else:
+ kwargs["plain_text"] = q
+
+ cites_found = get_citations(tokenizer=tokenizer, **kwargs)
self.assertEqual(
[type(i) for i in cites_found],
[type(i) for i in expected_cites],
@@ -93,11 +96,11 @@ def test_find_citations(self):
# Basic test with a line break
('1 U.S.\n1',
[case_citation()],
- {'clean': ['all_whitespace']}),
+ {'clean_steps': ['all_whitespace']}),
# Basic test with a line break within a reporter
('1 U.\nS. 1',
[case_citation(reporter_found='U. S.')],
- {'clean': ['all_whitespace']}),
+ {'clean_steps': ['all_whitespace']}),
# Basic test of non-case name before citation (should not be found)
('lissner test 1 U.S. 1',
[case_citation()]),
@@ -259,7 +262,7 @@ def test_find_citations(self):
[supra_citation("supra,",
metadata={'pin_cite': 'at 2',
'antecedent_guess': 'asdf'})],
- {'clean': ['all_whitespace']}),
+ {'clean_steps': ['all_whitespace']}),
# Test short form citation with a page range
('before asdf, 1 U. S., at 20-25',
[case_citation(page='20', reporter_found='U. S.', short=True,
@@ -383,7 +386,7 @@ def test_find_citations(self):
# Test italicized Ibid. citation
('before asdf. Ibid.
foo bar lorem
',
[id_citation('Ibid.')],
- {'clean': ['html', 'inline_whitespace']}),
+ {'clean_steps': ['html', 'inline_whitespace']}),
# Test Id. citation
('foo v. bar 1 U.S. 12, 347-348. asdf. Id., at 123. foo bar',
[case_citation(page='12',
@@ -399,15 +402,15 @@ def test_find_citations(self):
'defendant': 'bar',
'pin_cite': '347-348'}),
id_citation('Id.,', metadata={'pin_cite': 'at 123'})],
- {'clean': ['all_whitespace']}),
+ {'clean_steps': ['all_whitespace']}),
# Test italicized Id. citation
('before asdf. Id., at 123.
foo bar
',
[id_citation('Id.,', metadata={'pin_cite': 'at 123'})],
- {'clean': ['html', 'inline_whitespace']}),
+ {'clean_steps': ['html', 'inline_whitespace']}),
# Test italicized Id. citation with another HTML tag in the way
('before asdf. Id., at 123.
foo bar
',
[id_citation('Id.,', metadata={'pin_cite': 'at 123'})],
- {'clean': ['html', 'inline_whitespace']}),
+ {'clean_steps': ['html', 'inline_whitespace']}),
# Test weirder Id. citations (#1344)
('foo v. bar 1 U.S. 12, 347-348. asdf. Id. ΒΆ 34. foo bar',
[case_citation(page='12',
@@ -517,7 +520,7 @@ def test_find_citations(self):
metadata={'plaintiff': None,
'defendant': None,
'court': 'scotus'})],
- {'clean': ['html', 'inline_whitespace']}),
+ {'clean_steps': ['html', 'inline_whitespace']}),
# Test filtering overlapping citations - this finds four citations
# but should filter down to three
("Miles v. Smith 1 Ga. 1; asdfasdf asd Something v. Else, 1 Miles 3; 1 Miles at 10",
@@ -1012,7 +1015,10 @@ def test_reference_extraction_using_resolved_names(self):
citations = get_citations(plain_text)
found_cite = citations[0]
found_cite.metadata.resolved_case_name = "State v. Wingler"
- references = extract_reference_citations(found_cite, plain_text)
+ document = Document(plain_text=plain_text, markup_text="")
+ references = extract_reference_citations(
+ citation=found_cite, document=document
+ )
final_citations = filter_citations(citations + references)
self.assertEqual(
len(final_citations), 2, "There should only be 2 citations"
@@ -1043,8 +1049,9 @@ def test_reference_extraction_from_markup(self):
ex post facto scrutiny simply because it is consistent with
punitive goals as well.\" 44 F.3d at 493.
"""
- plain_text = clean_text(markup_text, ["html", "all_whitespace"])
- citations = get_citations(plain_text, markup_text=markup_text)
+ citations = get_citations(
+ markup_text=markup_text, clean_steps=["html", "all_whitespace"]
+ )
references = [c for c in citations if isinstance(c, ReferenceCitation)]
# Tests both for the order and exact counts. Note that there is one
# "Bae" in the text that should not be picked up: "Bae's argument"...
@@ -1083,8 +1090,9 @@ def test_reference_filtering(self):
""",
]
for markup_text in texts:
- plain_text = clean_text(markup_text, ["html", "all_whitespace"])
- citations = get_citations(plain_text, markup_text=markup_text)
+ citations = get_citations(
+ markup_text=markup_text, clean_steps=["html", "all_whitespace"]
+ )
self.assertFalse(
any(
[isinstance(cite, ReferenceCitation) for cite in citations]
diff --git a/tests/test_ResolveTest.py b/tests/test_ResolveTest.py
index 6c4539c4..d40fcb1b 100644
--- a/tests/test_ResolveTest.py
+++ b/tests/test_ResolveTest.py
@@ -5,7 +5,7 @@
from eyecite import get_citations
from eyecite.find import extract_reference_citations
from eyecite.helpers import filter_citations
-from eyecite.models import FullCitation, Resource
+from eyecite.models import Document, FullCitation, Resource
from eyecite.resolve import resolve_citations
@@ -52,6 +52,10 @@ def checkReferenceResolution(
Returns:
None
"""
+
+ document = Document(
+ plain_text=citation_text,
+ )
citations = get_citations(citation_text)
if resolved_case_name_short:
citations[0].metadata.resolved_case_name_short = (
@@ -59,7 +63,7 @@ def checkReferenceResolution(
)
citations.extend(
extract_reference_citations(
- citations[0], citation_text # type: ignore[arg-type]
+ citations[0], document # type: ignore[arg-type]
)
)
citations = filter_citations(citations)
From de4cfd9b944af6bfa0a9891f83d456161d5fa575 Mon Sep 17 00:00:00 2001
From: William Palin
Date: Wed, 19 Mar 2025 13:44:45 -0400
Subject: [PATCH 2/6] chore(changes): Update changes.md
---
CHANGES.md | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index a03c6f06..a30173a0 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -5,10 +5,13 @@
The following changes are not yet released, but are code complete:
Features:
-- None
+- Introduced `Document` object to encapsulate plain text, markup text, span updates, tokens, and citation strings.
+- Simplifies citation processing by reducing parameter passing and improving maintainability (hopefully).
+- Should enable more complex html parsing.
Changes:
-- None
+- Moved text cleaning logic into `get_citations` for simpler call with markup
+-
Fixes:
- Prefer the other full citation on overlap with nominative reporter
From 5a3e813a06720861b978cbb2c07569e4af24f403 Mon Sep 17 00:00:00 2001
From: William Palin
Date: Thu, 20 Mar 2025 16:12:37 -0400
Subject: [PATCH 3/6] chore(find.py): Fix docstring
---
eyecite/find.py | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/eyecite/find.py b/eyecite/find.py
index 04577753..75b5ef6e 100644
--- a/eyecite/find.py
+++ b/eyecite/find.py
@@ -389,13 +389,9 @@ def find_reference_citations_from_markup(
Creating the SpanUpdaters for each full citation will be too slow,
re-use them if possible
- :param markup_text: HTML or XML source
- :param plain_text: cleaned text
+ :param document: Document object we are parsing
:param citations: list of citations found over plain text. The full cites
will be used to access parties names metadata
- :param plain_to_markup: a SpanUpdater from plain or clean text to
- marked up text
- :param markup_to_plain: a SpanUpdater from marked up text to plain text
:return: a list of ReferenceCitations
"""
From 8a225d37f4ffa5938996ffd359ceda5cb33af48f Mon Sep 17 00:00:00 2001
From: William Palin
Date: Fri, 21 Mar 2025 09:30:46 -0400
Subject: [PATCH 4/6] fix(benchmark): Update benchmark for new command
Refactor call to use cleaning inside method
---
benchmark/benchmark.py | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index f7e48f30..7414a2be 100644
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -52,19 +52,15 @@ def generate_branch_report(self, branch: str) -> None:
or row["html_anon_2020"]
or row["html"]
)
+ params = {"clean_steps": ["html", "inline_whitespace"]}
if text:
# Remove XML encodings from xml_harvard
text = re.sub(r"^<\?xml.*?\?>", "", text, count=1)
- opinion_text_is_marked_up = True
+ params['markup_text'] = text or ""
else:
- text = row["plain_text"]
- opinion_text_is_marked_up = False
+ params['markup_text'] = row['plain_text']
- plain_text = clean_text(text, ["html", "inline_whitespace"])
- found_citations = get_citations(
- plain_text,
- markup_text=text if opinion_text_is_marked_up else "",
- )
+ found_citations = get_citations(**params)
# Get the citation text string from the cite object
cites = [cite.token.data for cite in found_citations if cite.token]
From e3415108c229890d7621d308c4f79d078ab2a661 Mon Sep 17 00:00:00 2001
From: William Palin
Date: Fri, 21 Mar 2025 09:42:15 -0400
Subject: [PATCH 5/6] fix(benchmark): Lint
---
benchmark/benchmark.py | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 7414a2be..a951b7c2 100644
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -8,10 +8,11 @@
import sys
from io import StringIO
from pathlib import Path
+from typing import Any, Dict
from matplotlib import pyplot as plt # type: ignore
-from eyecite import clean_text, get_citations
+from eyecite import get_citations
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
@@ -45,20 +46,22 @@ def generate_branch_report(self, branch: str) -> None:
now = datetime.datetime.now()
data = []
for row in csv_data:
- text = (
+ text: str = (
row["xml_harvard"]
or row["html_lawbox"]
or row["html_columbia"]
or row["html_anon_2020"]
or row["html"]
)
- params = {"clean_steps": ["html", "inline_whitespace"]}
+ params: Dict[str, Any] = {
+ "clean_steps": ["html", "inline_whitespace"]
+ }
if text:
# Remove XML encodings from xml_harvard
text = re.sub(r"^<\?xml.*?\?>", "", text, count=1)
- params['markup_text'] = text or ""
+ params["markup_text"] = text or ""
else:
- params['markup_text'] = row['plain_text']
+ params["markup_text"] = row["plain_text"]
found_citations = get_citations(**params)
From ba78e8e5857ef6a43e01984185359414b2da5b60 Mon Sep 17 00:00:00 2001
From: William Palin
Date: Fri, 21 Mar 2025 09:48:40 -0400
Subject: [PATCH 6/6] fix(find): Check only for references after full citation
---
eyecite/find.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/eyecite/find.py b/eyecite/find.py
index 43ab31de..cef72d1c 100644
--- a/eyecite/find.py
+++ b/eyecite/find.py
@@ -95,10 +95,10 @@ def get_citations(
pre = cast(FullCaseCitation, citations[-1]) # type: ignore
citation.is_parallel_citation(pre)
- # Check for reference citations that follow a full citation
- # Using the plaintiff or defendant
- references = extract_reference_citations(citation, document)
- citations.extend(references)
+ # Check for reference citations that follow a full citation
+ # Using the plaintiff or defendant
+ references = extract_reference_citations(citation, document)
+ citations.extend(references)
# CASE 2: Token is an "Id." or "Ibid." reference.
# In this case, the citation should simply be to the item cited