@@ -11,7 +11,7 @@ def straighten_quotes(text):
11
11
12
12
def lower_annotator (before , text , after ):
13
13
return before + text .lower () + after
14
-
14
+ self . maxDiff = None
15
15
test_pairs = (
16
16
# single cite
17
17
("1 U.S. 1" , "<0>1 U.S. 1</0>" , []),
@@ -59,10 +59,10 @@ def lower_annotator(before, text, after):
59
59
"<body>foo <i><0>1 <b>U.S.</b></i> 1</0> bar</body>" ,
60
60
["html" , "inline_whitespace" ],
61
61
),
62
- # whitespace and html -- skip unbalanced tags
62
+ # whitespace and html -- unbalanced tags are repaired
63
63
(
64
64
"foo <i>1 U.S.</i> 1; 2 <i>U.S.</i> 2" ,
65
- "foo <i>1 U.S.</i> 1; <1>2 <i>U.S.</i> 2</1>" ,
65
+ "foo <0>< i>1 U.S.</i> 1</0> ; <1>2 <i>U.S.</i> 2</1>" ,
66
66
["html" , "inline_whitespace" ],
67
67
{"unbalanced_tags" : "skip" },
68
68
),
@@ -101,6 +101,98 @@ def lower_annotator(before, text, after):
101
101
[],
102
102
{"annotator" : lower_annotator },
103
103
),
104
+ # solvable unbalanced <em> tag. Need the FullCaseCitation first
105
+ # so the ReferenceCitation can be found
106
+ # from https://www.courtlistener.com/api/rest/v4/opinions/8496639/
107
+ # source: Opinion.xml_harvard
108
+ (
109
+ " partially secured by a debtor’s principal residence was not "
110
+ "con-firmable. <em>Nobelman v. Am. Sav. Bank, </em>"
111
+ "508 U.S. 324, 113 S.Ct. 2106, 124 L.Ed.2d 228 (1993). That "
112
+ "plan proposed to bifurcate the claim and... pay the unsecured"
113
+ "... only by a lien on the debtor’s principal residence.” "
114
+ "<em>Nobelman </em>at 332, 113 S.Ct. 2106. Section 1123(b)(5) "
115
+ "codifies the <em>Nobelman </em>decision in individual debtor "
116
+ "chapter 11 cases." ,
117
+ " partially secured by a debtor’s principal residence was not"
118
+ " con-firmable. <em>Nobelman v. Am. Sav. Bank, </em>"
119
+ "<a href='something'>508 U.S. 324</a>, <a href='something'>"
120
+ "113 S.Ct. 2106</a>, <a href='something'>124 L.Ed.2d 228</a>"
121
+ " (1993). That plan proposed to bifurcate the claim and..."
122
+ " pay the unsecured... only by a lien on the debtor’s"
123
+ " principal residence.” <a href='something'><em>Nobelman </em>"
124
+ "at 332</a>, <a href='something'>113 S.Ct. 2106</a>. Section"
125
+ " 1123(b)(5) codifies the <em>Nobelman </em>decision in"
126
+ " individual debtor chapter 11 cases." ,
127
+ ["html" , "all_whitespace" ],
128
+ {"annotate_anchors" : True , "unbalanced_tags" : "skip" }
129
+ ),
130
+ # solvable unbalanced <i> tag
131
+ # from https://www.courtlistener.com/api/rest/v4/opinions/2841253/
132
+ # source: Opinion.html
133
+ (
134
+ "he has not agreed so to submit.’” <i>Howsam v. Dean"
135
+ " Witter Reynolds, Inc.</i>, 537 U.S. 79, 83, 123 S. Ct."
136
+ " 588, 591 (2002) (combined mandamus and"
137
+ " interlocutory appeal) (citing <i>Howsam</i> at 84, 123"
138
+ " S. Ct. at 592)" ,
139
+
140
+ "he has not agreed so to submit.’” <i>Howsam v. Dean"
141
+ " Witter Reynolds, Inc.</i>, <a href='something'>537 U.S."
142
+ " 79</a>, 83, <a href='something'>123 S. Ct. 588</a>, 591"
143
+ " (2002) (combined mandamus and interlocutory appeal)"
144
+ " (citing <a href='something'><i>Howsam</i> at 84</a>, <a"
145
+ " href='something'>123 S. Ct. at 592</a>)" ,
146
+
147
+ ["html" , "all_whitespace" ],
148
+ {"annotate_anchors" : True , "unbalanced_tags" : "skip" }
149
+ ),
150
+ # The next 2 examples could be resolved if we increased the
151
+ # character tolerance or admitted the full case name instead of
152
+ # just one of the parties
153
+ (
154
+ # https://www.courtlistener.com/api/rest/v4/opinions/1535649/
155
+ # source: xml_harvard
156
+ "See also Styler v. Tall Oaks, Inc. (In re Hatch),"
157
+ " 93 B.R. 263, 267 (Bankr.D. Utah 1988),"
158
+ " <em> rev'd </em> 114 B.R. 747 (D.Utah 1989)."
159
+ "</p>... The court makes no"
160
+ " determination as to whe Fifth Amendment to the"
161
+ " constitution of the United States.” <em> Styler v."
162
+ " Tall Oaks, Inc. (In re Hatch), </em> at 748."
163
+ "</p>" ,
164
+ "See also Styler v. Tall Oaks, Inc. (In re Hatch),"
165
+ " <a href='something'>93 B.R. 263</a>, 267"
166
+ " (Bankr.D. Utah 1988), <em> rev'd </em> <a"
167
+ " href='something'>114 B.R. 747</a> (D.Utah 1989)."
168
+ "</p>... The court makes no"
169
+ " determination as to whe Fifth Amendment to the"
170
+ " constitution of the United States.” <em> Styler v."
171
+ " Tall Oaks, Inc. (In re Hatch), </em> at 748."
172
+ "</p>" ,
173
+ ["html" , "all_whitespace" ],
174
+ {"annotate_anchors" : True , "unbalanced_tags" : "skip" }
175
+ ),
176
+ (
177
+ # https://www.courtlistener.com/api/rest/v4/opinions/1985850/
178
+ # source: html_lawbox
179
+ "to act rationally. <i>See, e.g., </i><i>State v."
180
+ " Wingler,</i> 25 <i>N.J.</i> 161, 175, 135 <i>A.</i>2d"
181
+ " 468 (1957); <i>citing, ... have been applied.'"
182
+ " [<i>State v. Wingler</i> at 175, 135 <i>A.</i>2d"
183
+ " 468, <i>citing, </i><i>Minnesota ex rel.</i>" ,
184
+
185
+ "to act rationally. <i>See, e.g., </i><i>State v."
186
+ " Wingler,</i> <a href='something'>25 <i>N.J.</i>"
187
+ " 161</a>, 175, <a href='something'>135 <i>A.</i>2d"
188
+ " 468</a> (1957); <i>citing, ... have been applied.'"
189
+ " [<i>State v. Wingler</i> at 175, <a"
190
+ " href='something'>135 <i>A.</i>2d 468</a>, <i>citing,"
191
+ " </i><i>Minnesota ex rel.</i>" ,
192
+ ["html" , "all_whitespace" ],
193
+ {"annotate_anchors" : True , "unbalanced_tags" : "skip" },
194
+ )
195
+
104
196
)
105
197
for source_text , expected , clean_steps , * annotate_kwargs in test_pairs :
106
198
annotate_kwargs = annotate_kwargs [0 ] if annotate_kwargs else {}
@@ -115,6 +207,13 @@ def lower_annotator(before, text, after):
115
207
(c .span (), f"<{ i } >" , f"</{ i } >" )
116
208
for i , c in enumerate (cites )
117
209
]
210
+
211
+ if annotate_kwargs .pop ("annotate_anchors" , False ):
212
+ annotations = [
213
+ (c .span (), "<a href='something'>" , "</a>" )
214
+ for c in cites
215
+ ]
216
+
118
217
annotated = annotate_citations (
119
218
plain_text ,
120
219
annotations ,
0 commit comments