Skip to content

Commit 58e723b

Browse files
Merge pull request #9 from wellcometrust/fix/unlabelled_tokens_in_tsv
fix: Unlabelled tokens output to tsv
2 parents dcea5e4 + 21aac04 commit 58e723b

File tree

1 file changed

+1
-9
lines changed

1 file changed

+1
-9
lines changed

deep_reference_parser/prodigy/reference_to_token_annotations.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -281,28 +281,20 @@ def reference_to_token_annotations(
281281

282282
# Only run the tagger on annotated examples.
283283

284-
not_annotated_docs = [doc for doc in ref_annotated_docs if not doc.get("spans")]
285284
ref_annotated_docs = [doc for doc in ref_annotated_docs if doc.get("spans")]
286285

287286
logger.info(
288287
"Loaded %s documents with reference annotations", len(ref_annotated_docs)
289288
)
290-
logger.info(
291-
"Loaded %s documents with no reference annotations", len(not_annotated_docs)
292-
)
293289

294290
annotator = TokenTagger(task=task, lowercase=lowercase, text=text)
295291

296292
token_annotated_docs = annotator.run(ref_annotated_docs)
297-
all_docs = token_annotated_docs + not_annotated_docs
298293

299-
write_jsonl(all_docs, output_file=output_file)
294+
write_jsonl(token_annotated_docs, output_file=output_file)
300295

301296
logger.info(
302297
"Wrote %s docs with token annotations to %s",
303298
len(token_annotated_docs),
304299
output_file,
305300
)
306-
logger.info(
307-
"Wrote %s docs with no annotations to %s", len(not_annotated_docs), output_file
308-
)

0 commit comments

Comments
 (0)