Skip to content

Commit 3563f9f

Browse files
committed
fix(helpers): Lint
Fix bug in index shift and text cleanup
1 parent 82f72b0 commit 3563f9f

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

eyecite/helpers.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -413,11 +413,12 @@ def find_case_name_in_html(
413413
defendant, _ = convert_html_to_plain_text_and_loc(
414414
document, defendant_tags
415415
)
416-
417416
clean_plaintiff = strip_stop_words(plaintiff)
418417

419-
citation.metadata.plaintiff = clean_plaintiff
420-
citation.metadata.defendant = strip_stop_words(defendant)
418+
citation.metadata.plaintiff = clean_plaintiff.strip().strip(",")
419+
citation.metadata.defendant = (
420+
strip_stop_words(defendant).strip().strip(",")
421+
)
421422

422423
# Update full span start accordingly
423424
if len(clean_plaintiff) != len(plaintiff):
@@ -431,9 +432,10 @@ def find_case_name_in_html(
431432
# stopped at a stop word, work forward to possible title
432433
# this should be at least two words (including whitespace)
433434
# but with html could be more.
434-
shift = index + 2
435+
# shift = index + 2
436+
shift = 3
435437
while True:
436-
if words[shift] == " ":
438+
if words[index + shift] == " ":
437439
shift += 1
438440
else:
439441
break
@@ -443,14 +445,16 @@ def find_case_name_in_html(
443445
loc = word.start + right_offset - 1
444446
# find a character in the word
445447
filtered_tags = find_html_tags_at_position(document, loc)
446-
447448
if len(filtered_tags) != 1:
448449
return None
449450

450451
defendant, start = convert_html_to_plain_text_and_loc(
451452
document, filtered_tags
452453
)
453-
citation.metadata.defendant = strip_stop_words(defendant)
454+
455+
citation.metadata.defendant = strip_stop_words(defendant).strip(
456+
", "
457+
)
454458
citation.full_span_start = start
455459
return
456460

@@ -484,6 +488,7 @@ def convert_html_to_plain_text_and_loc(
484488
Returns: The text of the plain text and the location it starts
485489
"""
486490
markup_location = results[0]
491+
487492
start = document.markup_to_plain.update( # type: ignore
488493
markup_location[1],
489494
bisect_right,
@@ -493,6 +498,7 @@ def convert_html_to_plain_text_and_loc(
493498
bisect_right,
494499
)
495500
case_name = document.plain_text[start:end]
501+
# print("------->", case_name, start, markup_location)
496502
return (case_name, start)
497503

498504

0 commit comments

Comments
 (0)