Skip to content

Commit 19cdd90

Browse files
authored
Update the manual_extraction examples to add another cleanup step. #27 (#74)
Update the `manual_extraction` examples to add another cleanup step.
1 parent cfae277 commit 19cdd90

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

examples/manual_extraction/manual_extraction.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,21 @@ def __call__(self, _markdown: str) -> ManualInfo:
9090
]
9191
)
9292

93+
class CleanUpManual(cocoindex.op.FunctionSpec):
94+
"""Clean up manual information."""
95+
96+
97+
98+
@cocoindex.op.executor_class()
99+
class CleanUpManualExecutor:
100+
"""Executor for CleanUpManual."""
101+
102+
spec: CleanUpManual
103+
104+
def __call__(self, manual_info: ManualInfo) -> ManualInfo | None:
105+
# TODO: Clean up
106+
return manual_info
107+
93108
@cocoindex.flow_def(name="ManualExtraction")
94109
def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
95110
"""
@@ -101,7 +116,8 @@ def manual_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: coco
101116

102117
with data_scope["documents"].row() as doc:
103118
doc["markdown"] = doc["content"].transform(PdfToMarkdown())
104-
doc["manual_info"] = doc["markdown"].transform(ExtractManual())
119+
doc["raw_manual_info"] = doc["markdown"].transform(ExtractManual())
120+
doc["manual_info"] = doc["raw_manual_info"].transform(CleanUpManual())
105121
manual_infos.collect(filename=doc["filename"], manual_info=doc["manual_info"])
106122

107123
manual_infos.export(

0 commit comments

Comments
 (0)