Skip to content

Commit bac170a

Browse files
authored
fix: summary creation issue(#16)
1 parent 4038bea commit bac170a

File tree

1 file changed

+23
-16
lines changed

1 file changed

+23
-16
lines changed

admin-api-lib/src/admin_api_lib/impl/information_enhancer/page_summary_enhancer.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,6 @@ class PageSummaryEnhancer(SummaryEnhancer):
2727
BASE64_IMAGE_KEY = "base64_image"
2828
DEFAULT_PAGE_NR = 1
2929

30-
async def _acreate_summary(self, information: list[Document], config: Optional[RunnableConfig]) -> list[Document]:
31-
# group infos by page, defaulting to page 1 if no page metadata
32-
if self._chunker_settings:
33-
filtered_information = [
34-
info for info in information if len(info.page_content) > self._chunker_settings.max_size
35-
]
36-
else:
37-
filtered_information = information
38-
grouped = [
39-
[info for info in filtered_information if info.metadata.get("page", self.DEFAULT_PAGE_NR) == page]
40-
for page in {info_piece.metadata.get("page", self.DEFAULT_PAGE_NR) for info_piece in filtered_information}
41-
]
42-
43-
summary_tasks = [self._asummarize_page(info_group, config) for info_group in tqdm(grouped)]
44-
return await gather(*summary_tasks)
45-
4630
async def _asummarize_page(self, page_pieces: list[Document], config: Optional[RunnableConfig]) -> Document:
4731
full_page_content = " ".join([piece.page_content for piece in page_pieces])
4832
summary = await self._summarizer.ainvoke(full_page_content, config)
@@ -52,3 +36,26 @@ async def _asummarize_page(self, page_pieces: list[Document], config: Optional[R
5236
meta["type"] = ContentType.SUMMARY.value
5337

5438
return Document(metadata=meta, page_content=summary)
39+
40+
async def _acreate_summary(self, information: list[Document], config: Optional[RunnableConfig]) -> list[Document]:
41+
distinct_pages = []
42+
for info in information:
43+
if info.metadata.get("page", self.DEFAULT_PAGE_NR) not in distinct_pages:
44+
distinct_pages.append(info.metadata.get("page", self.DEFAULT_PAGE_NR))
45+
46+
grouped = []
47+
for page in distinct_pages:
48+
group = []
49+
for compare_info in information:
50+
if compare_info.metadata.get("page", self.DEFAULT_PAGE_NR) == page:
51+
group.append(compare_info)
52+
if (
53+
self._chunker_settings
54+
and len(" ".join([item.page_content for item in group])) < self._chunker_settings.max_size
55+
):
56+
continue
57+
grouped.append(group)
58+
59+
summary_tasks = [self._asummarize_page(info_group, config) for info_group in tqdm(grouped)]
60+
61+
return await gather(*summary_tasks)

0 commit comments

Comments
 (0)