From 04ab142a908e1f88ade7fba5f812bf3b66d23cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 17 Mar 2025 19:03:06 +0100 Subject: [PATCH 01/31] rename providers module to parsers --- docs/api_reference/document_search/processing.md | 10 +++++----- docs/how-to/core/component_preferrences.md | 2 +- .../strategies/create_custom_execution_strategy.md | 4 ++-- docs/how-to/document_search/search_documents.md | 6 +++--- examples/document-search/multimodal_basic.py | 2 +- examples/document-search/multimodal_chroma.py | 2 +- examples/document-search/multimodal_qdrant.py | 2 +- .../config/pipeline/providers/unstructured.yaml | 4 ++-- .../providers/unstructured_optimization.yaml | 4 ++-- .../evaluation/document-search/basic/evaluate.py | 2 +- .../evaluation/document-search/basic/optimize.py | 2 +- .../document_search/ingestion/document_processor.py | 8 ++++---- .../ingestion/{providers => parsers}/__init__.py | 0 .../ingestion/{providers => parsers}/base.py | 4 ++-- .../ingestion/{providers => parsers}/dummy.py | 2 +- .../{providers => parsers}/unstructured/__init__.py | 0 .../{providers => parsers}/unstructured/default.py | 4 ++-- .../{providers => parsers}/unstructured/images.py | 4 ++-- .../{providers => parsers}/unstructured/pdf.py | 2 +- .../{providers => parsers}/unstructured/utils.py | 0 .../tests/integration/test_unstructured.py | 2 +- .../tests/unit/test_document_processor.py | 2 +- .../tests/unit/test_document_search.py | 4 ++-- .../tests/unit/test_ingest_strategies.py | 2 +- .../tests/unit/test_providers.py | 12 ++++++------ 25 files changed, 43 insertions(+), 43 deletions(-) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/__init__.py (100%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/base.py (94%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/dummy.py (96%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/unstructured/__init__.py (100%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/unstructured/default.py (97%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/unstructured/images.py (95%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/unstructured/pdf.py (93%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{providers => parsers}/unstructured/utils.py (100%) diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index 853aa7f02..a6ca1fbe8 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -3,22 +3,22 @@ ::: ragbits.document_search.ingestion.document_processor.DocumentProcessorRouter ## Providers -::: ragbits.document_search.ingestion.providers.base.BaseProvider +::: ragbits.document_search.ingestion.parsers.base.BaseProvider options: heading_level: 3 -::: ragbits.document_search.ingestion.providers.dummy.DummyProvider +::: ragbits.document_search.ingestion.parsers.dummy.DummyProvider options: heading_level: 3 -::: ragbits.document_search.ingestion.providers.unstructured.UnstructuredDefaultProvider +::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDefaultProvider options: heading_level: 3 -::: ragbits.document_search.ingestion.providers.unstructured.UnstructuredImageProvider +::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredImageProvider options: heading_level: 3 -::: ragbits.document_search.ingestion.providers.unstructured.UnstructuredPdfProvider +::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredPdfProvider options: heading_level: 3 \ No newline at end of file diff --git a/docs/how-to/core/component_preferrences.md b/docs/how-to/core/component_preferrences.md index e59dfe57e..a64a4839b 100644 --- a/docs/how-to/core/component_preferrences.md +++ b/docs/how-to/core/component_preferrences.md @@ -167,6 +167,6 @@ This is the list of component types for which you can set a preferred configurat | `vector_store` | `ragbits-core` | [`VectorStore`][ragbits.core.vector_stores.base.VectorStore]| | | `history_compressor` | `ragbits-conversations` | [`ConversationHistoryCompressor`][ragbits.conversations.history.compressors.base.ConversationHistoryCompressor]| | | `document_search` | `ragbits-document-search` | [`DocumentSearch`][ragbits.document_search.DocumentSearch]| Specifics: [Configuration](#ds-configuration)| -| `provider` | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.providers.base.BaseProvider]| | +| `provider` | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider]| | | `rephraser` | `ragbits-document-search` | [`QueryRephraser`][ragbits.document_search.retrieval.rephrasers.QueryRephraser]| | | `reranker` | `ragbits-document-search` | [`Reranker`][ragbits.document_search.retrieval.rerankers.base.Reranker]| | diff --git a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md index 92f59e409..1f1ee4ec6 100644 --- a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md +++ b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md @@ -21,7 +21,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta, S from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter from ragbits.document_search.ingestion.strategies import IngestStrategy -from ragbits.document_search.ingestion.providers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider class DelayedExecutionStrategy(IngestStrategy): async def process_documents( @@ -50,7 +50,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta, S from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter from ragbits.document_search.ingestion.strategies import IngestStrategy -from ragbits.document_search.ingestion.providers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider class DelayedExecutionStrategy(IngestStrategy): async def process_documents( diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 3c7d5de5f..09f7dc88d 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -78,7 +78,7 @@ from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.providers.unstructured.default import UnstructuredDefaultProvider +from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider embedder = LiteLLMEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) @@ -88,11 +88,11 @@ document_search = DocumentSearch( ) ``` -If you want to implement a new provider you should extend the [`BaseProvider`][ragbits.document_search.ingestion.providers.base.BaseProvider] class: +If you want to implement a new provider you should extend the [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider] class: ```python from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.providers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider class CustomProvider(BaseProvider): diff --git a/examples/document-search/multimodal_basic.py b/examples/document-search/multimodal_basic.py index 11f0131f5..5831364d2 100644 --- a/examples/document-search/multimodal_basic.py +++ b/examples/document-search/multimodal_basic.py @@ -39,7 +39,7 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/examples/document-search/multimodal_chroma.py b/examples/document-search/multimodal_chroma.py index 837a1bf0f..2cfbdf5cc 100644 --- a/examples/document-search/multimodal_chroma.py +++ b/examples/document-search/multimodal_chroma.py @@ -42,7 +42,7 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/examples/document-search/multimodal_qdrant.py b/examples/document-search/multimodal_qdrant.py index f0e3731cc..29d58e324 100644 --- a/examples/document-search/multimodal_qdrant.py +++ b/examples/document-search/multimodal_qdrant.py @@ -42,7 +42,7 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml index cd07c5a9a..3ce63e4ba 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml @@ -1,5 +1,5 @@ txt: - type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider config: use_api: false partition_kwargs: @@ -12,7 +12,7 @@ txt: overlap_all: 0 md: - type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider config: use_api: false partition_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml index 65e4e2809..cc10e47a1 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml @@ -1,5 +1,5 @@ txt: - type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider config: use_api: false partition_kwargs: @@ -16,7 +16,7 @@ txt: overlap_all: 0 md: - type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider config: use_api: false partition_kwargs: diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py index 78d8bed85..6a102eda2 100644 --- a/examples/evaluation/document-search/basic/evaluate.py +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -47,7 +47,7 @@ }, "providers": { "txt": { - "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider", + "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", }, }, "ingest_strategy": { diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py index 21ef800b2..e14dc4dc5 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/optimize.py @@ -55,7 +55,7 @@ }, "providers": { "txt": { - "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider", + "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", }, }, "source": { diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 91d62b9a6..736db2d9f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -4,10 +4,10 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.providers.base import BaseProvider -from ragbits.document_search.ingestion.providers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.providers.unstructured.images import UnstructuredImageProvider -from ragbits.document_search.ingestion.providers.unstructured.pdf import UnstructuredPdfProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider +from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider +from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider # TODO consider defining with some defined schema ProvidersConfig = Mapping[DocumentType, Callable[[], BaseProvider] | BaseProvider] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py similarity index 94% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index e8e6a3a72..a2c0cdd22 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -5,7 +5,7 @@ from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element, IntermediateElement -from ragbits.document_search.ingestion import providers +from ragbits.document_search.ingestion import parsers class DocumentTypeNotSupportedError(Exception): @@ -23,7 +23,7 @@ class BaseProvider(WithConstructionConfig, ABC): A base class for the document processing providers. """ - default_module: ClassVar = providers + default_module: ClassVar = parsers configuration_key: ClassVar = "provider" SUPPORTED_DOCUMENT_TYPES: set[DocumentType] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py similarity index 96% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py index bb8bfcec7..d68876e57 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py @@ -6,7 +6,7 @@ TextDocument, ) from ragbits.document_search.documents.element import Element, ImageElement, IntermediateElement, TextElement -from ragbits.document_search.ingestion.providers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider class DummyProvider(BaseProvider): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/__init__.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py similarity index 97% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py index 8a50b0362..3bb974b8a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py @@ -11,8 +11,8 @@ from ragbits.core.audit import trace from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element, IntermediateElement -from ragbits.document_search.ingestion.providers.base import BaseProvider -from ragbits.document_search.ingestion.providers.unstructured.utils import check_required_argument, to_text_element +from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.unstructured.utils import check_required_argument, to_text_element DEFAULT_PARTITION_KWARGS: dict = { "strategy": "hi_res", diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py similarity index 95% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py index 985b38f19..a7b3feb5b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py @@ -9,8 +9,8 @@ from ragbits.core.llms.base import LLM from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element, IntermediateElement, IntermediateImageElement -from ragbits.document_search.ingestion.providers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.providers.unstructured.utils import ( +from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider +from ragbits.document_search.ingestion.parsers.unstructured.utils import ( crop_and_convert_to_bytes, extract_image_coordinates, to_text_element, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py similarity index 93% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py index 345417499..8009809c7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py @@ -6,7 +6,7 @@ from unstructured.documents.elements import Element as UnstructuredElement from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.providers.unstructured.images import UnstructuredImageProvider +from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider class UnstructuredPdfProvider(UnstructuredImageProvider): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index e75de77a9..0637c0e7d 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -4,7 +4,7 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter, ProvidersConfig -from ragbits.document_search.ingestion.providers.unstructured.default import ( +from ragbits.document_search.ingestion.parsers.unstructured.default import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, UNSTRUCTURED_SERVER_URL_ENV, diff --git a/packages/ragbits-document-search/tests/unit/test_document_processor.py b/packages/ragbits-document-search/tests/unit/test_document_processor.py index 6e44d2494..cc68072d4 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_processor.py +++ b/packages/ragbits-document-search/tests/unit/test_document_processor.py @@ -2,7 +2,7 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyProvider async def test_document_processor_router(): diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index b84b285c4..350d48197 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -19,8 +19,8 @@ from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers import BaseProvider -from ragbits.document_search.ingestion.providers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers import BaseProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.batched import ( BatchedIngestStrategy, ) diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 8e968b2ef..228fa2f65 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -4,7 +4,7 @@ from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.providers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.base import IngestStrategy from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_providers.py index 55e9c7811..bf327eed4 100644 --- a/packages/ragbits-document-search/tests/unit/test_providers.py +++ b/packages/ragbits-document-search/tests/unit/test_providers.py @@ -5,11 +5,11 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.providers.base import BaseProvider, DocumentTypeNotSupportedError -from ragbits.document_search.ingestion.providers.dummy import DummyProvider -from ragbits.document_search.ingestion.providers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.providers.unstructured.images import UnstructuredImageProvider -from ragbits.document_search.ingestion.providers.unstructured.pdf import UnstructuredPdfProvider +from ragbits.document_search.ingestion.parsers.base import BaseProvider, DocumentTypeNotSupportedError +from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider +from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider +from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider @pytest.mark.parametrize("document_type", UnstructuredDefaultProvider.SUPPORTED_DOCUMENT_TYPES) @@ -56,7 +56,7 @@ async def test_unstructured_provider_raises_value_error_when_server_url_not_set( def test_subclass_from_config(): config = ObjectContructionConfig.model_validate( - {"type": "ragbits.document_search.ingestion.providers:DummyProvider"} + {"type": "ragbits.document_search.ingestion.parsers:DummyProvider"} ) embedding = BaseProvider.subclass_from_config(config) assert isinstance(embedding, DummyProvider) From c6c935d5c4c5c205b42272c49be49ad856ed42db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 17 Mar 2025 19:24:35 +0100 Subject: [PATCH 02/31] rename intermediate_handlers module to enrichers --- .../src/ragbits/document_search/_main.py | 8 ++++---- .../{intermediate_handlers => enrichers}/__init__.py | 0 .../{intermediate_handlers => enrichers}/base.py | 0 .../{intermediate_handlers => enrichers}/images.py | 2 +- .../ragbits/document_search/ingestion/strategies/base.py | 2 +- .../document_search/ingestion/strategies/batched.py | 2 +- .../ragbits/document_search/ingestion/strategies/ray.py | 2 +- .../document_search/ingestion/strategies/sequential.py | 2 +- .../tests/unit/test_intermediate_handlers.py | 4 ++-- .../ragbits-document-search/tests/unit/test_providers.py | 4 +--- 10 files changed, 12 insertions(+), 14 deletions(-) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{intermediate_handlers => enrichers}/__init__.py (100%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{intermediate_handlers => enrichers}/base.py (100%) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{intermediate_handlers => enrichers}/images.py (97%) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 52357a63a..a1a9ebd79 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -25,10 +25,10 @@ from ragbits.document_search.documents.element import Element, IntermediateElement, IntermediateImageElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.documents.sources.base import SourceResolver -from ragbits.document_search.ingestion import intermediate_handlers +from ragbits.document_search.ingestion import enrichers from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler -from ragbits.document_search.ingestion.intermediate_handlers.images import ImageIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler from ragbits.document_search.ingestion.strategies import ( IngestStrategy, SequentialIngestStrategy, @@ -131,7 +131,7 @@ def from_config(cls, config: dict) -> Self: parser_router = DocumentProcessorRouter.from_config(parser_config) enricher_router = { import_by_path(element_type, element): ( - import_by_path(handler_config["type"], intermediate_handlers).from_config(handler_config["config"]) + import_by_path(handler_config["type"], enrichers).from_config(handler_config["config"]) ) for element_type, handler_config in config.get("intermediate_handlers", {}).items() } diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/__init__.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/base.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py similarity index 97% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/images.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py index cf8feb9f8..6096dc06d 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/intermediate_handlers/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py @@ -13,7 +13,7 @@ IntermediateElement, IntermediateImageElement, ) -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler class ImagePromptInput(BaseModel): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 1c821b2ec..3f1a4cf1f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -14,7 +14,7 @@ from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler _CallP = ParamSpec("_CallP") _CallReturnT = TypeVar("_CallReturnT") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 6b56015f9..464c0ee48 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -8,7 +8,7 @@ from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index d37a0b113..f197dd5cd 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -7,7 +7,7 @@ from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index ca62037f9..ab57584e1 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -5,7 +5,7 @@ from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter -from ragbits.document_search.ingestion.intermediate_handlers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py index e5303b7af..912d7cb05 100644 --- a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py +++ b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py @@ -5,7 +5,7 @@ from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions from ragbits.document_search.documents.document import DocumentMeta from ragbits.document_search.documents.element import ImageElement, IntermediateImageElement -from ragbits.document_search.ingestion.intermediate_handlers.images import ImageIntermediateHandler, _ImagePrompt +from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler, _ImagePrompt @pytest.fixture @@ -46,7 +46,7 @@ def test_from_config(): config = { "llm": { "type": "LiteLLM", - "prompt": "ragbits.document_search.ingestion.intermediate_handlers.images:_ImagePrompt", + "prompt": "ragbits.document_search.ingestion.enrichers.images:_ImagePrompt", } } diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_providers.py index bf327eed4..be9efbcec 100644 --- a/packages/ragbits-document-search/tests/unit/test_providers.py +++ b/packages/ragbits-document-search/tests/unit/test_providers.py @@ -55,9 +55,7 @@ async def test_unstructured_provider_raises_value_error_when_server_url_not_set( def test_subclass_from_config(): - config = ObjectContructionConfig.model_validate( - {"type": "ragbits.document_search.ingestion.parsers:DummyProvider"} - ) + config = ObjectContructionConfig.model_validate({"type": "ragbits.document_search.ingestion.parsers:DummyProvider"}) embedding = BaseProvider.subclass_from_config(config) assert isinstance(embedding, DummyProvider) From 72cfa4346a81f5277152590b6a8f6b7c753ac1da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 14:21:46 +0100 Subject: [PATCH 03/31] update changelog and fix document types --- packages/ragbits-document-search/CHANGELOG.md | 3 ++- .../ragbits/document_search/documents/document.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/ragbits-document-search/CHANGELOG.md b/packages/ragbits-document-search/CHANGELOG.md index 8c8744c46..7e98b5673 100644 --- a/packages/ragbits-document-search/CHANGELOG.md +++ b/packages/ragbits-document-search/CHANGELOG.md @@ -2,12 +2,13 @@ ## Unreleased +- BREAKING CHANGE: Providers and intermediate handlers refactored to parsers and enrichers (#419) + ## 0.10.0 (2025-03-17) ### Changed - ragbits-core updated to version v0.10.0 - - BREAKING CHANGE: Processing strategies refactored to ingest strategies (#394) - Compability with the new Vector Store interface from ragbits-core (#288) - Fix docstring formatting to resolve Griffe warnings diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index c196ee68c..411f2118a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -1,7 +1,7 @@ import tempfile from enum import Enum from pathlib import Path -from typing import Annotated +from typing import Annotated, Any from pydantic import BaseModel @@ -11,7 +11,7 @@ class DocumentType(str, Enum): """ - Types of documents that can be stored. + Document types that can be parsed. """ MD = "md" @@ -31,12 +31,20 @@ class DocumentType(str, Enum): RST = "rst" RTF = "rtf" TSV = "tsv" + JSON = "json" XML = "xml" JPG = "jpg" PNG = "png" UNKNOWN = "unknown" + @classmethod + def _missing_(cls, value: object) -> Any: + """ + Return WILDCARD if the value is not found in the enum. + """ + return cls.UNKNOWN + class DocumentMeta(BaseModel): """ From e1a8d59693b588d7223e7bec52724f6909e51644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 14:54:44 +0100 Subject: [PATCH 04/31] move document processor router to parsers module --- docs/api_reference/document_search/processing.md | 2 +- .../document_search/ingest/strategies/async_processing.md | 2 +- .../ingest/strategies/create_custom_execution_strategy.md | 4 ++-- docs/how-to/document_search/search_documents.md | 2 +- examples/document-search/multimodal_basic.py | 2 +- examples/document-search/multimodal_chroma.py | 2 +- examples/document-search/multimodal_qdrant.py | 2 +- .../src/ragbits/document_search/_main.py | 2 +- .../ingestion/{document_processor.py => parsers/router.py} | 0 .../src/ragbits/document_search/ingestion/strategies/base.py | 2 +- .../ragbits/document_search/ingestion/strategies/batched.py | 2 +- .../src/ragbits/document_search/ingestion/strategies/ray.py | 2 +- .../document_search/ingestion/strategies/sequential.py | 2 +- .../tests/integration/test_unstructured.py | 2 +- .../tests/unit/test_document_processor.py | 2 +- .../tests/unit/test_document_search.py | 2 +- .../tests/unit/test_ingest_strategies.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/{document_processor.py => parsers/router.py} (100%) diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index a6ca1fbe8..877131a85 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -1,6 +1,6 @@ # Document Processing -::: ragbits.document_search.ingestion.document_processor.DocumentProcessorRouter +::: ragbits.document_search.ingestion.parsers.router.DocumentProcessorRouter ## Providers ::: ragbits.document_search.ingestion.parsers.base.BaseProvider diff --git a/docs/how-to/document_search/ingest/strategies/async_processing.md b/docs/how-to/document_search/ingest/strategies/async_processing.md index 10261ee60..6ad7043c1 100644 --- a/docs/how-to/document_search/ingest/strategies/async_processing.md +++ b/docs/how-to/document_search/ingest/strategies/async_processing.md @@ -3,7 +3,7 @@ In Ragbits, a component called "processing execution strategy" controls how document processing is executed during ingestion. There are multiple execution strategies available in Ragbits that can be easily interchanged. You can also [create new custom execution strategies](create_custom_execution_strategy.md) to meet your specific needs. !!! note - It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentProcessorRouter][ragbits.document_search.ingestion.document_processor.DocumentProcessorRouter]. + It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentProcessorRouter][ragbits.document_search.ingestion.parsers.router.DocumentProcessorRouter]. ## The Synchronous Execution Strategy diff --git a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md index 1f1ee4ec6..12b9c1eae 100644 --- a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md +++ b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md @@ -19,7 +19,7 @@ from collections.abc import Sequence from ragbits.document_search.documents.document import Document, DocumentMeta, Source from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.strategies import IngestStrategy from ragbits.document_search.ingestion.parsers.base import BaseProvider @@ -48,7 +48,7 @@ from collections.abc import Sequence from ragbits.document_search.documents.document import Document, DocumentMeta, Source from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.strategies import IngestStrategy from ragbits.document_search.ingestion.parsers.base import BaseProvider diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 09f7dc88d..0004eaeb1 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -76,7 +76,7 @@ library that supports parsing and chunking of most common document types (i.e. p from ragbits.core.embeddings.litellm import LiteLLMEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.documents.document import DocumentType from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider diff --git a/examples/document-search/multimodal_basic.py b/examples/document-search/multimodal_basic.py index 5831364d2..1f4c48b93 100644 --- a/examples/document-search/multimodal_basic.py +++ b/examples/document-search/multimodal_basic.py @@ -38,7 +38,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/examples/document-search/multimodal_chroma.py b/examples/document-search/multimodal_chroma.py index 2cfbdf5cc..249600963 100644 --- a/examples/document-search/multimodal_chroma.py +++ b/examples/document-search/multimodal_chroma.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/examples/document-search/multimodal_qdrant.py b/examples/document-search/multimodal_qdrant.py index 29d58e324..1649de7a9 100644 --- a/examples/document-search/multimodal_qdrant.py +++ b/examples/document-search/multimodal_qdrant.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index a1a9ebd79..416ef3d98 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -26,7 +26,7 @@ from ragbits.document_search.documents.sources import Source from ragbits.document_search.documents.sources.base import SourceResolver from ragbits.document_search.ingestion import enrichers -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler from ragbits.document_search.ingestion.strategies import ( diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 3f1a4cf1f..5b34b72df 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -13,7 +13,7 @@ from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler _CallP = ParamSpec("_CallP") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 464c0ee48..08ab8877b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -7,7 +7,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index f197dd5cd..998a6d9b6 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -6,7 +6,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index ab57584e1..2f07a4ffd 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -4,7 +4,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index 0637c0e7d..b1524a84f 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -3,7 +3,7 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter, ProvidersConfig +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter, ProvidersConfig from ragbits.document_search.ingestion.parsers.unstructured.default import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, diff --git a/packages/ragbits-document-search/tests/unit/test_document_processor.py b/packages/ragbits-document-search/tests/unit/test_document_processor.py index cc68072d4..06f98d760 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_processor.py +++ b/packages/ragbits-document-search/tests/unit/test_document_processor.py @@ -1,7 +1,7 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 350d48197..e037c135c 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -18,7 +18,7 @@ ) from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers import BaseProvider from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.batched import ( diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 228fa2f65..3e0a70cef 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -3,7 +3,7 @@ from ragbits.core.embeddings.noop import NoopEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.base import IngestStrategy from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy From 20ee6906047a797f5eb6166a1c922a8dd4c7cefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 15:54:41 +0100 Subject: [PATCH 05/31] rename document processor router to parser router --- docs/api_reference/document_search/processing.md | 2 +- .../ingest/strategies/async_processing.md | 2 +- .../strategies/create_custom_execution_strategy.md | 8 ++++---- docs/how-to/document_search/search_documents.md | 4 ++-- examples/document-search/multimodal_basic.py | 4 ++-- examples/document-search/multimodal_chroma.py | 4 ++-- examples/document-search/multimodal_qdrant.py | 4 ++-- .../src/ragbits/document_search/_main.py | 12 ++++++------ .../document_search/ingestion/parsers/router.py | 10 +++++----- .../document_search/ingestion/strategies/base.py | 6 +++--- .../document_search/ingestion/strategies/batched.py | 6 +++--- .../document_search/ingestion/strategies/ray.py | 4 ++-- .../ingestion/strategies/sequential.py | 4 ++-- .../tests/integration/test_unstructured.py | 8 ++++---- .../tests/unit/test_document_processor.py | 6 +++--- .../tests/unit/test_document_search.py | 10 +++++----- .../tests/unit/test_ingest_strategies.py | 4 ++-- 17 files changed, 49 insertions(+), 49 deletions(-) diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index 877131a85..18015cb27 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -1,6 +1,6 @@ # Document Processing -::: ragbits.document_search.ingestion.parsers.router.DocumentProcessorRouter +::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter ## Providers ::: ragbits.document_search.ingestion.parsers.base.BaseProvider diff --git a/docs/how-to/document_search/ingest/strategies/async_processing.md b/docs/how-to/document_search/ingest/strategies/async_processing.md index 6ad7043c1..2b2661b30 100644 --- a/docs/how-to/document_search/ingest/strategies/async_processing.md +++ b/docs/how-to/document_search/ingest/strategies/async_processing.md @@ -3,7 +3,7 @@ In Ragbits, a component called "processing execution strategy" controls how document processing is executed during ingestion. There are multiple execution strategies available in Ragbits that can be easily interchanged. You can also [create new custom execution strategies](create_custom_execution_strategy.md) to meet your specific needs. !!! note - It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentProcessorRouter][ragbits.document_search.ingestion.parsers.router.DocumentProcessorRouter]. + It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentParserRouter][ragbits.document_search.ingestion.parsers.router.DocumentParserRouter]. ## The Synchronous Execution Strategy diff --git a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md index 12b9c1eae..e70c6b480 100644 --- a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md +++ b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md @@ -19,7 +19,7 @@ from collections.abc import Sequence from ragbits.document_search.documents.document import Document, DocumentMeta, Source from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import IngestStrategy from ragbits.document_search.ingestion.parsers.base import BaseProvider @@ -27,7 +27,7 @@ class DelayedExecutionStrategy(IngestStrategy): async def process_documents( self, documents: Sequence[DocumentMeta | Document | Source], - processor_router: DocumentProcessorRouter, + processor_router: DocumentParserRouter, processor_overwrite: BaseProvider | None = None, ) -> list[Element]: elements = [] @@ -48,7 +48,7 @@ from collections.abc import Sequence from ragbits.document_search.documents.document import Document, DocumentMeta, Source from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import IngestStrategy from ragbits.document_search.ingestion.parsers.base import BaseProvider @@ -56,7 +56,7 @@ class DelayedExecutionStrategy(IngestStrategy): async def process_documents( self, documents: Sequence[DocumentMeta | Document | Source], - processor_router: DocumentProcessorRouter, + processor_router: DocumentParserRouter, processor_overwrite: BaseProvider | None = None, ) -> list[Element]: elements = [] diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 0004eaeb1..98cc23332 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -76,7 +76,7 @@ library that supports parsing and chunking of most common document types (i.e. p from ragbits.core.embeddings.litellm import LiteLLMEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.documents.document import DocumentType from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider @@ -84,7 +84,7 @@ embedder = LiteLLMEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) document_search = DocumentSearch( vector_store=vector_store, - parser_router=DocumentProcessorRouter({DocumentType.TXT: UnstructuredDefaultProvider()}) + parser_router=DocumentParserRouter({DocumentType.TXT: UnstructuredDefaultProvider()}) ) ``` diff --git a/examples/document-search/multimodal_basic.py b/examples/document-search/multimodal_basic.py index 1f4c48b93..c4b267877 100644 --- a/examples/document-search/multimodal_basic.py +++ b/examples/document-search/multimodal_basic.py @@ -38,7 +38,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" @@ -59,7 +59,7 @@ async def main() -> None: embedder = VertexAIMultimodelEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentProcessorRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_chroma.py b/examples/document-search/multimodal_chroma.py index 249600963..9dd47c23b 100644 --- a/examples/document-search/multimodal_chroma.py +++ b/examples/document-search/multimodal_chroma.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentProcessorRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_qdrant.py b/examples/document-search/multimodal_qdrant.py index 1649de7a9..9d9792098 100644 --- a/examples/document-search/multimodal_qdrant.py +++ b/examples/document-search/multimodal_qdrant.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentProcessorRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 416ef3d98..03ce62e63 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -26,7 +26,7 @@ from ragbits.document_search.documents.sources import Source from ragbits.document_search.documents.sources.base import SourceResolver from ragbits.document_search.ingestion import enrichers -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler from ragbits.document_search.ingestion.strategies import ( @@ -84,7 +84,7 @@ class DocumentSearch(WithConstructionConfig): reranker: Reranker ingest_strategy: IngestStrategy - parser_router: DocumentProcessorRouter + parser_router: DocumentParserRouter enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler] def __init__( @@ -93,14 +93,14 @@ def __init__( query_rephraser: QueryRephraser | None = None, reranker: Reranker | None = None, ingest_strategy: IngestStrategy | None = None, - parser_router: DocumentProcessorRouter | None = None, + parser_router: DocumentParserRouter | None = None, enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler] | None = None, ) -> None: self.vector_store = vector_store self.query_rephraser = query_rephraser or NoopQueryRephraser() self.reranker = reranker or NoopReranker() self.ingest_strategy = ingest_strategy or SequentialIngestStrategy() - self.parser_router = parser_router or DocumentProcessorRouter.from_config() + self.parser_router = parser_router or DocumentParserRouter.from_config() self.enricher_router = enricher_router or { IntermediateImageElement: ImageIntermediateHandler(llm=get_preferred_llm(llm_type=LLMType.VISION)), } @@ -127,8 +127,8 @@ def from_config(cls, config: dict) -> Self: vector_store: VectorStore = VectorStore.subclass_from_config(model.vector_store) ingest_strategy = IngestStrategy.subclass_from_config(model.ingest_strategy) - parser_config = DocumentProcessorRouter.from_dict_to_providers_config(model.providers) - parser_router = DocumentProcessorRouter.from_config(parser_config) + parser_config = DocumentParserRouter.from_dict_to_providers_config(model.providers) + parser_router = DocumentParserRouter.from_config(parser_config) enricher_router = { import_by_path(element_type, element): ( import_by_path(handler_config["type"], enrichers).from_config(handler_config["config"]) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 736db2d9f..1f8d705c7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -37,9 +37,9 @@ } -class DocumentProcessorRouter: +class DocumentParserRouter: """ - The DocumentProcessorRouter is responsible for routing the document to the correct provider based on the document + The DocumentParserRouter is responsible for routing the document to the correct provider based on the document metadata such as the document type. """ @@ -71,9 +71,9 @@ def from_dict_to_providers_config(dict_config: dict[str, ObjectContructionConfig return providers_config @classmethod - def from_config(cls, providers: ProvidersConfig | None = None) -> "DocumentProcessorRouter": + def from_config(cls, providers: ProvidersConfig | None = None) -> "DocumentParserRouter": """ - Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default + Create a DocumentParserRouter from a configuration. If the configuration is not provided, the default configuration will be used. If the configuration is provided, it will be merged with the default configuration, overriding the default values for the document types that are defined in the configuration. Example of the configuration: @@ -87,7 +87,7 @@ def from_config(cls, providers: ProvidersConfig | None = None) -> "DocumentProce provider class. Returns: - The DocumentProcessorRouter. + The DocumentParserRouter. """ config: MutableMapping[DocumentType, Callable[[], BaseProvider] | BaseProvider] = copy.deepcopy( DEFAULT_PROVIDERS_CONFIG diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 5b34b72df..06f2a1a7d 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -13,7 +13,7 @@ from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler _CallP = ParamSpec("_CallP") @@ -67,7 +67,7 @@ async def __call__( self, documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], ) -> IngestExecutionResult: """ @@ -120,7 +120,7 @@ async def _call_with_error_handling( @staticmethod async def _parse_document( document: DocumentMeta | Document | Source, - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, ) -> Sequence[Element | IntermediateElement]: """ Parse a single document and return the elements. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 08ab8877b..645744f25 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -7,7 +7,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -54,7 +54,7 @@ async def __call__( self, documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], ) -> IngestExecutionResult: """ @@ -121,7 +121,7 @@ async def __call__( async def _parse_batch( self, batch: list[DocumentMeta | Document | Source], - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, ) -> list[IngestTaskResult | IngestDocumentResult]: """ Parse batch of documents. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index 998a6d9b6..752cc972e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -6,7 +6,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -53,7 +53,7 @@ async def __call__( self, documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], ) -> IngestExecutionResult: """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index 2f07a4ffd..d7d330617 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -4,7 +4,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -22,7 +22,7 @@ async def __call__( self, documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, - parser_router: DocumentProcessorRouter, + parser_router: DocumentParserRouter, enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], ) -> IngestExecutionResult: """ diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index b1524a84f..505889fd7 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -3,7 +3,7 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter, ProvidersConfig +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter, ProvidersConfig from ragbits.document_search.ingestion.parsers.unstructured.default import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, @@ -29,7 +29,7 @@ ], ) async def test_document_processor_processes_text_document_with_unstructured_provider(config: ProvidersConfig): - document_processor = DocumentProcessorRouter.from_config(config) + document_processor = DocumentParserRouter.from_config(config) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") elements = await document_processor.get_provider(document_meta).process(document_meta) @@ -49,7 +49,7 @@ async def test_document_processor_processes_text_document_with_unstructured_prov reason="Unstructured API environment variables not set", ) async def test_document_processor_processes_md_document_with_unstructured_provider(): - document_processor = DocumentProcessorRouter.from_config() + document_processor = DocumentParserRouter.from_config() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / "test_file.md") elements = await document_processor.get_provider(document_meta).process(document_meta) @@ -68,7 +68,7 @@ async def test_document_processor_processes_md_document_with_unstructured_provid ) @pytest.mark.parametrize("file_name", ["transformers_paper_page.pdf", "transformers_paper_page.png"]) async def test_document_processor_processes_image_document_with_unstructured_provider(file_name: str): - document_processor = DocumentProcessorRouter.from_config() + document_processor = DocumentParserRouter.from_config() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / file_name) elements = await document_processor.get_provider(document_meta).process(document_meta) diff --git a/packages/ragbits-document-search/tests/unit/test_document_processor.py b/packages/ragbits-document-search/tests/unit/test_document_processor.py index 06f98d760..0b1fdda6b 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_processor.py +++ b/packages/ragbits-document-search/tests/unit/test_document_processor.py @@ -1,12 +1,12 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider async def test_document_processor_router(): - document_processor_router = DocumentProcessorRouter.from_config({DocumentType.TXT: DummyProvider()}) + document_processor_router = DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") @@ -16,7 +16,7 @@ async def test_document_processor_router(): async def test_document_processor_router_raises_when_no_provider_found(): - document_processor_router = DocumentProcessorRouter.from_config() + document_processor_router = DocumentParserRouter.from_config() document_processor_router._providers = {DocumentType.TXT: DummyProvider()} document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index e037c135c..c6dcef30e 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -18,7 +18,7 @@ ) from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers import BaseProvider from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.batched import ( @@ -70,7 +70,7 @@ async def test_document_search_ingest_from_source(): document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentProcessorRouter.from_config({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}), ) with tempfile.NamedTemporaryFile(suffix=".txt") as f: @@ -100,7 +100,7 @@ async def test_document_search_ingest(document: DocumentMeta | Document): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentProcessorRouter({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), ) await document_search.ingest([document]) @@ -123,7 +123,7 @@ async def test_document_search_with_search_config(): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentProcessorRouter({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), ) await document_search.ingest([DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George")]) @@ -461,7 +461,7 @@ def __iter__(self): ): document_search = DocumentSearch( vector_store=vector_store, - parser_router=DocumentProcessorRouter.from_config(providers), + parser_router=DocumentParserRouter.from_config(providers), ) await document_search.ingest("huggingface://dataset_name/train/0") diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 3e0a70cef..3b018a32e 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -3,7 +3,7 @@ from ragbits.core.embeddings.noop import NoopEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentProcessorRouter +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.strategies.base import IngestStrategy from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy @@ -37,7 +37,7 @@ def documents_fixture() -> list[DocumentMeta]: async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: list[DocumentMeta]) -> None: vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) - parser_router = DocumentProcessorRouter.from_config({DocumentType.TXT: DummyProvider()}) + parser_router = DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}) results = await ingest_strategy( documents=documents, From 4654701093eb6ed54059da60c5023768c4d88688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 20:08:51 +0100 Subject: [PATCH 06/31] refactor document parser router interface --- docs/how-to/core/component_preferrences.md | 2 +- .../create_custom_execution_strategy.md | 2 +- examples/document-search/multimodal_basic.py | 4 +- examples/document-search/multimodal_chroma.py | 4 +- examples/document-search/multimodal_qdrant.py | 4 +- .../config/pipeline/document_search.yaml | 2 +- .../document_search_optimization.yaml | 2 +- .../{providers => parsers}/unstructured.yaml | 0 .../unstructured_optimization.yaml | 0 .../document-search/basic/evaluate.py | 10 +- .../document-search/basic/optimize.py | 2 +- .../src/ragbits/document_search/_main.py | 9 +- .../document_search/documents/document.py | 2 +- .../ingestion/parsers/router.py | 146 ++++++++---------- .../ingestion/strategies/base.py | 7 +- .../ingestion/strategies/batched.py | 2 +- .../ingestion/strategies/ray.py | 2 +- .../ingestion/strategies/sequential.py | 2 +- .../tests/integration/test_unstructured.py | 29 ++-- ...ssor.py => test_document_parser_router.py} | 18 +-- .../tests/unit/test_document_search.py | 12 +- .../tests/unit/test_ingest_strategies.py | 4 +- 22 files changed, 123 insertions(+), 142 deletions(-) rename examples/evaluation/document-search/advanced/config/pipeline/{providers => parsers}/unstructured.yaml (100%) rename examples/evaluation/document-search/advanced/config/pipeline/{providers => parsers}/unstructured_optimization.yaml (100%) rename packages/ragbits-document-search/tests/unit/{test_document_processor.py => test_document_parser_router.py} (53%) diff --git a/docs/how-to/core/component_preferrences.md b/docs/how-to/core/component_preferrences.md index a64a4839b..90fb9f5e5 100644 --- a/docs/how-to/core/component_preferrences.md +++ b/docs/how-to/core/component_preferrences.md @@ -167,6 +167,6 @@ This is the list of component types for which you can set a preferred configurat | `vector_store` | `ragbits-core` | [`VectorStore`][ragbits.core.vector_stores.base.VectorStore]| | | `history_compressor` | `ragbits-conversations` | [`ConversationHistoryCompressor`][ragbits.conversations.history.compressors.base.ConversationHistoryCompressor]| | | `document_search` | `ragbits-document-search` | [`DocumentSearch`][ragbits.document_search.DocumentSearch]| Specifics: [Configuration](#ds-configuration)| -| `provider` | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider]| | +| `parser` | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider]| | | `rephraser` | `ragbits-document-search` | [`QueryRephraser`][ragbits.document_search.retrieval.rephrasers.QueryRephraser]| | | `reranker` | `ragbits-document-search` | [`Reranker`][ragbits.document_search.retrieval.rerankers.base.Reranker]| | diff --git a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md index e70c6b480..4cac9f8f1 100644 --- a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md +++ b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md @@ -65,7 +65,7 @@ class DelayedExecutionStrategy(IngestStrategy): document_meta = await self.to_document_meta(document) # Get the processor for the document - processor = processor_overwrite or processor_router.get_provider(document) + processor = processor_overwrite or processor_router.get(document) await asyncio.sleep(1) diff --git a/examples/document-search/multimodal_basic.py b/examples/document-search/multimodal_basic.py index c4b267877..6f00d1fcd 100644 --- a/examples/document-search/multimodal_basic.py +++ b/examples/document-search/multimodal_basic.py @@ -38,8 +38,8 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -59,7 +59,7 @@ async def main() -> None: embedder = VertexAIMultimodelEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_chroma.py b/examples/document-search/multimodal_chroma.py index 9dd47c23b..eac4b579a 100644 --- a/examples/document-search/multimodal_chroma.py +++ b/examples/document-search/multimodal_chroma.py @@ -41,8 +41,8 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_qdrant.py b/examples/document-search/multimodal_qdrant.py index 9d9792098..c9b4fda0e 100644 --- a/examples/document-search/multimodal_qdrant.py +++ b/examples/document-search/multimodal_qdrant.py @@ -41,8 +41,8 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter.from_config({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml index 3a36fa3fa..4180c8440 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml @@ -2,7 +2,7 @@ defaults: - vector_store@config.vector_store: chroma - rephraser@config.rephraser: noop - reranker@config.reranker: noop - - providers@config.providers: unstructured + - parsers@config.parsers: unstructured - source@config.source: hf - _self_ diff --git a/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml index a708b9671..2e690c0f2 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml @@ -2,7 +2,7 @@ defaults: - vector_store@config.vector_store: chroma_optimization - rephraser@config.rephraser: noop - reranker@config.reranker: noop - - providers@config.providers: unstructured_optimization + - parsers@config.parsers: unstructured_optimization - source@config.source: hf - _self_ diff --git a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml similarity index 100% rename from examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml diff --git a/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml similarity index 100% rename from examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_optimization.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py index 6a102eda2..74f6e5026 100644 --- a/examples/evaluation/document-search/basic/evaluate.py +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -45,17 +45,17 @@ }, }, }, - "providers": { - "txt": { - "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", - }, - }, "ingest_strategy": { "type": "ragbits.document_search.ingestion.strategies.batched:BatchedIngestStrategy", "config": { "batch_size": 10, }, }, + "parsers": { + "txt": { + "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", + }, + }, "source": { "type": "ragbits.document_search.documents.sources:HuggingFaceSource", "config": { diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py index e14dc4dc5..5662530eb 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/optimize.py @@ -53,7 +53,7 @@ }, }, }, - "providers": { + "parsers": { "txt": { "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", }, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 03ce62e63..74db2e0cd 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -26,9 +26,9 @@ from ragbits.document_search.documents.sources import Source from ragbits.document_search.documents.sources.base import SourceResolver from ragbits.document_search.ingestion import enrichers -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import ( IngestStrategy, SequentialIngestStrategy, @@ -59,7 +59,7 @@ class DocumentSearchConfig(BaseModel): rephraser: ObjectContructionConfig = ObjectContructionConfig(type="NoopQueryRephraser") reranker: ObjectContructionConfig = ObjectContructionConfig(type="NoopReranker") ingest_strategy: ObjectContructionConfig = ObjectContructionConfig(type="SequentialIngestStrategy") - providers: dict[str, ObjectContructionConfig] = {} + parsers: dict[str, ObjectContructionConfig] = {} intermediate_element_handlers: dict[str, ObjectContructionConfig] = {} @@ -100,7 +100,7 @@ def __init__( self.query_rephraser = query_rephraser or NoopQueryRephraser() self.reranker = reranker or NoopReranker() self.ingest_strategy = ingest_strategy or SequentialIngestStrategy() - self.parser_router = parser_router or DocumentParserRouter.from_config() + self.parser_router = parser_router or DocumentParserRouter() self.enricher_router = enricher_router or { IntermediateImageElement: ImageIntermediateHandler(llm=get_preferred_llm(llm_type=LLMType.VISION)), } @@ -127,8 +127,7 @@ def from_config(cls, config: dict) -> Self: vector_store: VectorStore = VectorStore.subclass_from_config(model.vector_store) ingest_strategy = IngestStrategy.subclass_from_config(model.ingest_strategy) - parser_config = DocumentParserRouter.from_dict_to_providers_config(model.providers) - parser_router = DocumentParserRouter.from_config(parser_config) + parser_router = DocumentParserRouter.from_config(model.parsers) enricher_router = { import_by_path(element_type, element): ( import_by_path(handler_config["type"], enrichers).from_config(handler_config["config"]) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index 411f2118a..a277e6de7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -39,7 +39,7 @@ class DocumentType(str, Enum): UNKNOWN = "unknown" @classmethod - def _missing_(cls, value: object) -> Any: + def _missing_(cls, value: object) -> Any: # noqa: ANN401 """ Return WILDCARD if the value is not found in the enum. """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 1f8d705c7..85b3c6ece 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -1,119 +1,103 @@ -import copy -from collections.abc import Callable, Mapping, MutableMapping -from typing import cast +from collections.abc import Mapping +from typing import ClassVar -from ragbits.core.utils.config_handling import ObjectContructionConfig +from typing_extensions import Self + +from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.base import BaseProvider from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider -# TODO consider defining with some defined schema -ProvidersConfig = Mapping[DocumentType, Callable[[], BaseProvider] | BaseProvider] - - -DEFAULT_PROVIDERS_CONFIG: MutableMapping[DocumentType, Callable[[], BaseProvider] | BaseProvider] = { - DocumentType.TXT: UnstructuredDefaultProvider, - DocumentType.MD: UnstructuredDefaultProvider, - DocumentType.PDF: UnstructuredPdfProvider, - DocumentType.DOCX: UnstructuredDefaultProvider, - DocumentType.DOC: UnstructuredDefaultProvider, - DocumentType.PPTX: UnstructuredDefaultProvider, - DocumentType.PPT: UnstructuredDefaultProvider, - DocumentType.XLSX: UnstructuredDefaultProvider, - DocumentType.XLS: UnstructuredDefaultProvider, - DocumentType.CSV: UnstructuredDefaultProvider, - DocumentType.HTML: UnstructuredDefaultProvider, - DocumentType.EPUB: UnstructuredDefaultProvider, - DocumentType.ORG: UnstructuredDefaultProvider, - DocumentType.ODT: UnstructuredDefaultProvider, - DocumentType.RST: UnstructuredDefaultProvider, - DocumentType.RTF: UnstructuredDefaultProvider, - DocumentType.TSV: UnstructuredDefaultProvider, - DocumentType.XML: UnstructuredDefaultProvider, - DocumentType.JPG: UnstructuredImageProvider, - DocumentType.PNG: UnstructuredImageProvider, +_default_parser = UnstructuredDefaultProvider() +_default_img_parser = UnstructuredImageProvider() +_default_pdf_parser = UnstructuredPdfProvider() + +_DEFAULT_PARSERS: dict[DocumentType, BaseProvider] = { + DocumentType.TXT: _default_parser, + DocumentType.MD: _default_parser, + DocumentType.PDF: _default_pdf_parser, + DocumentType.DOCX: _default_parser, + DocumentType.DOC: _default_parser, + DocumentType.PPTX: _default_parser, + DocumentType.PPT: _default_parser, + DocumentType.XLSX: _default_parser, + DocumentType.XLS: _default_parser, + DocumentType.CSV: _default_parser, + DocumentType.HTML: _default_parser, + DocumentType.EPUB: _default_parser, + DocumentType.ORG: _default_parser, + DocumentType.ODT: _default_parser, + DocumentType.RST: _default_parser, + DocumentType.RTF: _default_parser, + DocumentType.TSV: _default_parser, + DocumentType.XML: _default_parser, + DocumentType.JPG: _default_img_parser, + DocumentType.PNG: _default_img_parser, } -class DocumentParserRouter: +class DocumentParserRouter(WithConstructionConfig): """ - The DocumentParserRouter is responsible for routing the document to the correct provider based on the document - metadata such as the document type. + The class responsible for routing the document to the correct parser based on the document type. """ - def __init__(self, providers: ProvidersConfig): - self._providers = providers + configuration_key: ClassVar[str] = "parsers" + + _parsers: Mapping[DocumentType, BaseProvider] - @staticmethod - def from_dict_to_providers_config(dict_config: dict[str, ObjectContructionConfig]) -> ProvidersConfig: + def __init__(self, parsers: Mapping[DocumentType, BaseProvider] | None = None) -> None: """ - Creates ProvidersConfig from dictionary that maps document types to the provider configuration. + Initialize the DocumentParserRouter instance. Args: - dict_config: The dictionary with configuration. - - Returns: - ProvidersConfig object. + parsers: The mapping of document types and their parsers. To override default Unstructured parsers. - Raises: - InvalidConfigError: If a provider class can't be found or is not the correct type. + Example: + { + DocumentType.PDF: CustomPDFParser(), + DocumentType.TXT: CustomTextParser(), + } """ - providers_config = {} - - for document_type, config in dict_config.items(): - providers_config[DocumentType(document_type)] = cast( - Callable[[], BaseProvider] | BaseProvider, - BaseProvider.subclass_from_config(config), - ) - - return providers_config + self._parsers = {**_DEFAULT_PARSERS, **parsers} if parsers else _DEFAULT_PARSERS @classmethod - def from_config(cls, providers: ProvidersConfig | None = None) -> "DocumentParserRouter": + def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: """ - Create a DocumentParserRouter from a configuration. If the configuration is not provided, the default - configuration will be used. If the configuration is provided, it will be merged with the default configuration, - overriding the default values for the document types that are defined in the configuration. - Example of the configuration: - { - DocumentType.TXT: YourCustomProviderClass(), - DocumentType.PDF: UnstructuredProvider(), - } + Initialize the class with the provided configuration. Args: - providers: The dictionary with the providers configuration, mapping the document types to the - provider class. + config: A dictionary containing configuration details for the class. Returns: The DocumentParserRouter. - """ - config: MutableMapping[DocumentType, Callable[[], BaseProvider] | BaseProvider] = copy.deepcopy( - DEFAULT_PROVIDERS_CONFIG - ) - config.update(providers if providers is not None else {}) - return cls(providers=config) + Raises: + InvalidConfigError: If any of the provided parsers cannot be initialized. + """ + parsers = { + DocumentType(document_type): BaseProvider.subclass_from_config(parser_config) + for document_type, parser_config in config.items() + } + return cls(parsers=parsers) - def get_provider(self, document_meta: DocumentMeta) -> BaseProvider: + def get(self, document_meta: DocumentMeta) -> BaseProvider: """ - Get the provider for the document. + Get the parser for the document. Args: document_meta: The document metadata. Returns: - The provider for processing the document. + The parser for processing the document. Raises: - ValueError: If no provider is found for the document type. + ValueError: If no parser is found for the document type. """ - provider_class_or_provider = self._providers.get(document_meta.document_type) - if provider_class_or_provider is None: - raise ValueError(f"No provider found for the document type {document_meta.document_type}") - elif isinstance(provider_class_or_provider, BaseProvider): - provider = provider_class_or_provider - else: - provider = provider_class_or_provider() - return provider + parser = self._parsers.get(document_meta.document_type) + + if isinstance(parser, BaseProvider): + return parser + + raise ValueError(f"No parser found for the document type {document_meta.document_type}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 06f2a1a7d..e7e9b00ac 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -13,8 +13,8 @@ from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter _CallP = ParamSpec("_CallP") _CallReturnT = TypeVar("_CallReturnT") @@ -131,6 +131,9 @@ async def _parse_document( Returns: The list of elements. + + Raises: + ValueError: If no parser is found for the document type. """ document_meta = ( await DocumentMeta.from_source(document) @@ -139,7 +142,7 @@ async def _parse_document( if isinstance(document, DocumentMeta) else document.metadata ) - parser = parser_router.get_provider(document_meta) + parser = parser_router.get(document_meta) return await parser.process(document_meta) @staticmethod diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 645744f25..73b8bbea2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -7,8 +7,8 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index 752cc972e..6f5a94dda 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -6,8 +6,8 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index d7d330617..d753f729b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -4,8 +4,8 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, IngestExecutionResult, diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index 505889fd7..39d280c68 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -3,7 +3,7 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter, ProvidersConfig +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.unstructured.default import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, @@ -15,7 +15,7 @@ @pytest.mark.parametrize( - "config", + "parsers", [ {}, pytest.param({DocumentType.TXT: UnstructuredDefaultProvider()}), @@ -28,18 +28,13 @@ ), ], ) -async def test_document_processor_processes_text_document_with_unstructured_provider(config: ProvidersConfig): - document_processor = DocumentParserRouter.from_config(config) +async def test_parser_router_processes_text_document_with_unstructured_provider(parsers: dict): + parser_router = DocumentParserRouter(parsers) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - elements = await document_processor.get_provider(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).process(document_meta) - expected_provider_type = ( - UnstructuredDefaultProvider - if isinstance(config.get(DocumentType.TXT), UnstructuredDefaultProvider) - else type(UnstructuredDefaultProvider) - ) - assert isinstance(document_processor._providers[DocumentType.TXT], expected_provider_type) + assert isinstance(parser_router._parsers[DocumentType.TXT], UnstructuredDefaultProvider) assert len(elements) == 1 assert elements[0].content == "Name of Peppa's brother is George." # type: ignore @@ -48,11 +43,11 @@ async def test_document_processor_processes_text_document_with_unstructured_prov env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), reason="Unstructured API environment variables not set", ) -async def test_document_processor_processes_md_document_with_unstructured_provider(): - document_processor = DocumentParserRouter.from_config() +async def test_parser_router_processes_md_document_with_unstructured_provider(): + parser_router = DocumentParserRouter() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / "test_file.md") - elements = await document_processor.get_provider(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).process(document_meta) assert len(elements) == 1 assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework." # type: ignore @@ -67,11 +62,11 @@ async def test_document_processor_processes_md_document_with_unstructured_provid reason="OpenAI API environment variables not set", ) @pytest.mark.parametrize("file_name", ["transformers_paper_page.pdf", "transformers_paper_page.png"]) -async def test_document_processor_processes_image_document_with_unstructured_provider(file_name: str): - document_processor = DocumentParserRouter.from_config() +async def test_parser_router_processes_image_document_with_unstructured_provider(file_name: str): + parser_router = DocumentParserRouter() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / file_name) - elements = await document_processor.get_provider(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).process(document_meta) assert len(elements) == 7 assert elements[-1].description != "" # type: ignore diff --git a/packages/ragbits-document-search/tests/unit/test_document_processor.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py similarity index 53% rename from packages/ragbits-document-search/tests/unit/test_document_processor.py rename to packages/ragbits-document-search/tests/unit/test_document_parser_router.py index 0b1fdda6b..2c053595f 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_processor.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -1,29 +1,29 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter -async def test_document_processor_router(): - document_processor_router = DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}) +async def test_parser_router(): + parser_router = DocumentParserRouter({DocumentType.TXT: DummyProvider()}) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") - document_processor = document_processor_router.get_provider(document_meta) + parser = parser_router.get(document_meta) - assert isinstance(document_processor, DummyProvider) + assert isinstance(parser, DummyProvider) -async def test_document_processor_router_raises_when_no_provider_found(): - document_processor_router = DocumentParserRouter.from_config() - document_processor_router._providers = {DocumentType.TXT: DummyProvider()} +async def test_parser_router_raises_when_no_provider_found(): + parser_router = DocumentParserRouter() + parser_router._parsers = {DocumentType.TXT: DummyProvider()} document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") document_meta.document_type = DocumentType.PDF with pytest.raises(ValueError) as err: - _ = document_processor_router.get_provider(document_meta) + _ = parser_router.get(document_meta) assert str(err.value) == f"No provider found for the document type {DocumentType.PDF}" diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index c6dcef30e..7bb3658c1 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -18,9 +18,9 @@ ) from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers import BaseProvider from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.batched import ( BatchedIngestStrategy, ) @@ -33,7 +33,7 @@ }, }, "reranker": {"type": "NoopReranker"}, - "providers": {"txt": {"type": "DummyProvider"}}, + "parsers": {"txt": {"type": "DummyProvider"}}, "ingest_strategy": {"type": "SequentialIngestStrategy"}, } @@ -70,7 +70,7 @@ async def test_document_search_ingest_from_source(): document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), ) with tempfile.NamedTemporaryFile(suffix=".txt") as f: @@ -434,8 +434,8 @@ def __iter__(self): embeddings_mock = AsyncMock() embeddings_mock.embed_text.return_value = [[0.1, 0.1]] # Non-zero embeddings - # Create providers dict with actual provider instance - providers: Mapping[DocumentType, BaseProvider] = {DocumentType.TXT: DummyProvider()} + # Create parsers dict with actual provider instance + parsers: Mapping[DocumentType, BaseProvider] = {DocumentType.TXT: DummyProvider()} # Mock vector store to track operations vector_store = InMemoryVectorStore(embedder=embeddings_mock) @@ -461,7 +461,7 @@ def __iter__(self): ): document_search = DocumentSearch( vector_store=vector_store, - parser_router=DocumentParserRouter.from_config(providers), + parser_router=DocumentParserRouter(parsers), ) await document_search.ingest("huggingface://dataset_name/train/0") diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 3b018a32e..6ce47e213 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -3,8 +3,8 @@ from ragbits.core.embeddings.noop import NoopEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import IngestStrategy from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy @@ -37,7 +37,7 @@ def documents_fixture() -> list[DocumentMeta]: async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: list[DocumentMeta]) -> None: vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) - parser_router = DocumentParserRouter.from_config({DocumentType.TXT: DummyProvider()}) + parser_router = DocumentParserRouter({DocumentType.TXT: DummyProvider()}) results = await ingest_strategy( documents=documents, From 6afcd80b5e4fb90ada7e18f82a5c2c4d71a542b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 20:40:49 +0100 Subject: [PATCH 07/31] fix tests --- .../tests/unit/test_document_parser_router.py | 4 ++-- .../unit/{test_providers.py => test_document_parsers.py} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename packages/ragbits-document-search/tests/unit/{test_providers.py => test_document_parsers.py} (100%) diff --git a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py index 2c053595f..87369be74 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -15,7 +15,7 @@ async def test_parser_router(): assert isinstance(parser, DummyProvider) -async def test_parser_router_raises_when_no_provider_found(): +async def test_parser_router_raises_when_no_parser_found(): parser_router = DocumentParserRouter() parser_router._parsers = {DocumentType.TXT: DummyProvider()} @@ -26,4 +26,4 @@ async def test_parser_router_raises_when_no_provider_found(): with pytest.raises(ValueError) as err: _ = parser_router.get(document_meta) - assert str(err.value) == f"No provider found for the document type {DocumentType.PDF}" + assert str(err.value) == f"No parser found for the document type {DocumentType.PDF}" diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py similarity index 100% rename from packages/ragbits-document-search/tests/unit/test_providers.py rename to packages/ragbits-document-search/tests/unit/test_document_parsers.py From 8b0dcd0722797d718c235eeb20106f67833e4ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 18 Mar 2025 21:27:53 +0100 Subject: [PATCH 08/31] add enricher router --- .../src/ragbits/document_search/_main.py | 27 ++---- .../ingestion/enrichers/base.py | 3 +- .../ingestion/enrichers/images.py | 2 +- .../ingestion/enrichers/router.py | 82 +++++++++++++++++++ .../ingestion/parsers/router.py | 8 +- .../ingestion/strategies/base.py | 11 ++- .../ingestion/strategies/batched.py | 6 +- .../ingestion/strategies/ray.py | 4 +- .../ingestion/strategies/sequential.py | 4 +- .../tests/unit/test_ingest_strategies.py | 4 +- 10 files changed, 113 insertions(+), 38 deletions(-) create mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 74db2e0cd..1af265d20 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -9,25 +9,19 @@ from ragbits import document_search from ragbits.core.audit import trace, traceable from ragbits.core.config import CoreConfig -from ragbits.core.llms.base import LLMType -from ragbits.core.llms.factory import get_preferred_llm from ragbits.core.utils._pyproject import get_config_from_yaml from ragbits.core.utils.config_handling import ( NoPreferredConfigError, ObjectContructionConfig, WithConstructionConfig, - import_by_path, ) from ragbits.core.vector_stores import VectorStore from ragbits.core.vector_stores.base import VectorStoreOptions -from ragbits.document_search.documents import element from ragbits.document_search.documents.document import Document, DocumentMeta -from ragbits.document_search.documents.element import Element, IntermediateElement, IntermediateImageElement +from ragbits.document_search.documents.element import Element from ragbits.document_search.documents.sources import Source from ragbits.document_search.documents.sources.base import SourceResolver -from ragbits.document_search.ingestion import enrichers -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler -from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import ( IngestStrategy, @@ -60,7 +54,7 @@ class DocumentSearchConfig(BaseModel): reranker: ObjectContructionConfig = ObjectContructionConfig(type="NoopReranker") ingest_strategy: ObjectContructionConfig = ObjectContructionConfig(type="SequentialIngestStrategy") parsers: dict[str, ObjectContructionConfig] = {} - intermediate_element_handlers: dict[str, ObjectContructionConfig] = {} + enrichers: dict[str, ObjectContructionConfig] = {} class DocumentSearch(WithConstructionConfig): @@ -85,7 +79,7 @@ class DocumentSearch(WithConstructionConfig): ingest_strategy: IngestStrategy parser_router: DocumentParserRouter - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler] + enricher_router: ElementEnricherRouter def __init__( self, @@ -94,16 +88,14 @@ def __init__( reranker: Reranker | None = None, ingest_strategy: IngestStrategy | None = None, parser_router: DocumentParserRouter | None = None, - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler] | None = None, + enricher_router: ElementEnricherRouter | None = None, ) -> None: self.vector_store = vector_store self.query_rephraser = query_rephraser or NoopQueryRephraser() self.reranker = reranker or NoopReranker() self.ingest_strategy = ingest_strategy or SequentialIngestStrategy() self.parser_router = parser_router or DocumentParserRouter() - self.enricher_router = enricher_router or { - IntermediateImageElement: ImageIntermediateHandler(llm=get_preferred_llm(llm_type=LLMType.VISION)), - } + self.enricher_router = enricher_router or ElementEnricherRouter() @classmethod def from_config(cls, config: dict) -> Self: @@ -128,12 +120,7 @@ def from_config(cls, config: dict) -> Self: ingest_strategy = IngestStrategy.subclass_from_config(model.ingest_strategy) parser_router = DocumentParserRouter.from_config(model.parsers) - enricher_router = { - import_by_path(element_type, element): ( - import_by_path(handler_config["type"], enrichers).from_config(handler_config["config"]) - ) - for element_type, handler_config in config.get("intermediate_handlers", {}).items() - } + enricher_router = ElementEnricherRouter.from_config(model.enrichers) return cls( vector_store=vector_store, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index 66a4d8f9a..f2bacfbff 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -1,9 +1,10 @@ from abc import ABC, abstractmethod +from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.document_search.documents.element import Element, IntermediateElement -class BaseIntermediateHandler(ABC): +class BaseIntermediateHandler(WithConstructionConfig, ABC): """ Base class for handling `IntermediateElement` processing. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py index 6096dc06d..3f017cd6b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py @@ -38,7 +38,7 @@ class ImageIntermediateHandler(BaseIntermediateHandler): Provides image processing capabilities using an LLM. """ - def __init__(self, llm: LLM, prompt: type[Prompt[ImagePromptInput, Any]] | None = None): + def __init__(self, llm: LLM | None = None, prompt: type[Prompt[ImagePromptInput, Any]] | None = None) -> None: """ Initializes the ImageProvider. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py new file mode 100644 index 000000000..bc2d84436 --- /dev/null +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -0,0 +1,82 @@ +from collections.abc import Mapping +from typing import ClassVar + +from typing_extensions import Self + +from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig, import_by_path +from ragbits.document_search.documents import element +from ragbits.document_search.documents.element import IntermediateElement, IntermediateImageElement +from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler + +_DEFAULT_ENRICHERS: dict[type[IntermediateElement], ImageIntermediateHandler] = { + IntermediateImageElement: ImageIntermediateHandler(), +} + + +class ElementEnricherRouter(WithConstructionConfig): + """ + The class responsible for routing the element to the correct enricher based on the element type. + """ + + configuration_key: ClassVar[str] = "enrichers" + + _enrichers: Mapping[type[IntermediateElement], ImageIntermediateHandler] + + def __init__( + self, + enrichers: Mapping[type[IntermediateElement], ImageIntermediateHandler] | None = None, + ) -> None: + """ + Initialize the ElementEnricherRouter instance. + + Args: + enrichers: The mapping of element types and their enrichers. To override default enrichers. + + Example: + { + IntermediateImageElement: ImageIntermediateHandler(), + IntermediateTextElement: TextIntermediateHandler(), + } + """ + self._enrichers = {**_DEFAULT_ENRICHERS, **enrichers} if enrichers else _DEFAULT_ENRICHERS + + @classmethod + def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: + """ + Initialize the class with the provided configuration. + + Args: + config: A dictionary containing configuration details for the class. + + Returns: + The ElementEnricherRouter. + + Raises: + InvalidConfigError: If any of the provided parsers cannot be initialized. + """ + enrichers = { + import_by_path(element_type, element): BaseIntermediateHandler.subclass_from_config(enricher_config) + for element_type, enricher_config in config.items() + } + return cls(enrichers=enrichers) # type: ignore + + def get(self, element_type: type[IntermediateElement]) -> BaseIntermediateHandler: + """ + Get the enricher for the element. + + Args: + element_type: The element type. + + Returns: + The enricher for processing the element. + + Raises: + ValueError: If no enricher is found for the element type. + """ + enricher = self._enrichers.get(element_type) + + if isinstance(enricher, BaseIntermediateHandler): + return enricher + + raise ValueError(f"No enricher found for the element type {element_type}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 85b3c6ece..19ab6c901 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -55,10 +55,10 @@ def __init__(self, parsers: Mapping[DocumentType, BaseProvider] | None = None) - parsers: The mapping of document types and their parsers. To override default Unstructured parsers. Example: - { - DocumentType.PDF: CustomPDFParser(), - DocumentType.TXT: CustomTextParser(), - } + { + DocumentType.PDF: CustomPDFParser(), + DocumentType.TXT: CustomTextParser(), + } """ self._parsers = {**_DEFAULT_PARSERS, **parsers} if parsers else _DEFAULT_PARSERS diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index e7e9b00ac..5a0cc57af 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -13,7 +13,7 @@ from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter _CallP = ParamSpec("_CallP") @@ -68,7 +68,7 @@ async def __call__( documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, parser_router: DocumentParserRouter, - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> IngestExecutionResult: """ Ingest documents. @@ -148,7 +148,7 @@ async def _parse_document( @staticmethod async def _enrich_elements( elements: Iterable[IntermediateElement], - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> list[Element]: """ Enrich intermediate elements. @@ -159,8 +159,11 @@ async def _enrich_elements( Returns: The list of enriched elements. + + Raises: + ValueError: If no enricher found for the element type. """ - grouped_intermediate_elements: dict[type, list[IntermediateElement]] = defaultdict(list) + grouped_intermediate_elements: dict[type[IntermediateElement], list[IntermediateElement]] = defaultdict(list) for element in elements: if isinstance(element, IntermediateElement): grouped_intermediate_elements[type(element)].append(element) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 73b8bbea2..79e376087 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -7,7 +7,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import Element, IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -55,7 +55,7 @@ async def __call__( documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, parser_router: DocumentParserRouter, - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> IngestExecutionResult: """ Ingest documents sequentially in batches. @@ -161,7 +161,7 @@ async def _parse_batch( async def _enrich_batch( self, batch: list[IngestTaskResult], - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> list[IngestTaskResult | IngestDocumentResult]: """ Enrich batch of documents. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index 6f5a94dda..96b24a187 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -6,7 +6,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -54,7 +54,7 @@ async def __call__( documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, parser_router: DocumentParserRouter, - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> IngestExecutionResult: """ Ingest documents in parallel in batches. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index d753f729b..55ad54b74 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -4,7 +4,7 @@ from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import ( IngestDocumentResult, @@ -23,7 +23,7 @@ async def __call__( documents: Iterable[DocumentMeta | Document | Source], vector_store: VectorStore, parser_router: DocumentParserRouter, - enricher_router: dict[type[IntermediateElement], BaseIntermediateHandler], + enricher_router: ElementEnricherRouter, ) -> IngestExecutionResult: """ Ingest documents sequentially one by one. diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 6ce47e213..b65599ec2 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -3,6 +3,7 @@ from ragbits.core.embeddings.noop import NoopEmbedder from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import IngestStrategy @@ -38,12 +39,13 @@ def documents_fixture() -> list[DocumentMeta]: async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: list[DocumentMeta]) -> None: vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) parser_router = DocumentParserRouter({DocumentType.TXT: DummyProvider()}) + enricher_router = ElementEnricherRouter() results = await ingest_strategy( documents=documents, vector_store=vector_store, parser_router=parser_router, - enricher_router={}, + enricher_router=enricher_router, ) assert len(results.successful) == len(documents) From 3b5a83423a42e404b25e6cdf45d1f145ee20131b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 17:37:14 +0100 Subject: [PATCH 09/31] remove intermediate element abstraction --- .../document_search/documents/element.py | 29 ++++--------------- .../ingestion/enrichers/base.py | 8 ++--- .../ingestion/enrichers/images.py | 26 +++++++---------- .../ingestion/enrichers/router.py | 28 +++++++++++++----- .../document_search/ingestion/parsers/base.py | 5 ++-- .../ingestion/parsers/dummy.py | 10 ++----- .../ingestion/parsers/unstructured/default.py | 7 ++--- .../ingestion/parsers/unstructured/images.py | 13 ++++----- .../ingestion/strategies/base.py | 21 +++++++------- .../ingestion/strategies/batched.py | 26 ++++++++--------- .../ingestion/strategies/ray.py | 13 ++++----- .../ingestion/strategies/sequential.py | 5 ++-- .../tests/unit/test_intermediate_handlers.py | 8 ++--- 13 files changed, 87 insertions(+), 112 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 26bad58ad..00e9b2262 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -19,26 +19,6 @@ class ElementLocation(BaseModel): coordinates: dict | None = None -class IntermediateElement(BaseModel, ABC): - """ - Represents an intermediate element extracted from a document before final processing. - """ - - element_type: str - document_meta: DocumentMeta - location: ElementLocation | None = None - - -class IntermediateImageElement(IntermediateElement): - """ - Represents an intermediate image element extracted from a document before final processing. - """ - - element_type: str = "image" - image_bytes: bytes - ocr_extracted_text: str - - class Element(BaseModel, ABC): """ An object representing an element in a document. @@ -50,7 +30,7 @@ class Element(BaseModel, ABC): _elements_registry: ClassVar[dict[str, type["Element"]]] = {} - # type: ignore[prop-decorator] + @computed_field # type: ignore[prop-decorator] @property def id(self) -> str: """ @@ -100,6 +80,7 @@ def text_representation(self) -> str | None: The text representation. """ + @computed_field # type: ignore[prop-decorator] @property def image_representation(self) -> bytes | None: """ @@ -179,9 +160,9 @@ class ImageElement(Element): """ element_type: str = "image" - description: str - ocr_extracted_text: str image_bytes: SerializableBytes + description: str | None = None + ocr_extracted_text: str | None = None @computed_field # type: ignore[prop-decorator] @property @@ -194,6 +175,7 @@ def text_representation(self) -> str | None: """ if not self.description and not self.ocr_extracted_text: return None + repr = "" if self.description: repr += f"Description: {self.description}\n" @@ -201,6 +183,7 @@ def text_representation(self) -> str | None: repr += f"Extracted text: {self.ocr_extracted_text}" return repr + @computed_field # type: ignore[prop-decorator] @property def image_representation(self) -> bytes: """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index f2bacfbff..fd4bbdb6c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from ragbits.core.utils.config_handling import WithConstructionConfig -from ragbits.document_search.documents.element import Element, IntermediateElement +from ragbits.document_search.documents.element import Element class BaseIntermediateHandler(WithConstructionConfig, ABC): @@ -13,13 +13,13 @@ class BaseIntermediateHandler(WithConstructionConfig, ABC): """ @abstractmethod - async def process(self, intermediate_elements: list[IntermediateElement]) -> list[Element]: + async def process(self, elements: list[Element]) -> list[Element]: """ Process an `IntermediateElement` and return a corresponding `Element`. Args: - intermediate_elements: The intermediate elements to be processed. + elements: The elements to be enriched. Returns: - The list of processed elements. + The list of enriched elements. """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py index 3f017cd6b..0d08307b2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py @@ -10,8 +10,6 @@ from ragbits.document_search.documents.element import ( Element, ImageElement, - IntermediateElement, - IntermediateImageElement, ) from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler @@ -50,38 +48,34 @@ def __init__(self, llm: LLM | None = None, prompt: type[Prompt[ImagePromptInput, self._llm = llm or get_preferred_llm(llm_type=LLMType.VISION) self._prompt = prompt or _ImagePrompt - async def process(self, intermediate_elements: list[IntermediateElement]) -> list[Element]: + async def process(self, elements: list[Element]) -> list[Element]: """ Processes a list of intermediate image elements concurrently and generates corresponding ImageElements. Args: - intermediate_elements: List of intermediate image elements to process. + elements: The elements to be enriched. Returns: - List of processed image elements with generated descriptions. + The list of enriched elements. """ - tasks = [ - self._process_single(element) - for element in intermediate_elements - if isinstance(element, IntermediateImageElement) - ] - skipped_count = len(intermediate_elements) - len(tasks) + tasks = [self._process_single(element) for element in elements if isinstance(element, ImageElement)] + skipped_count = len(elements) - len(tasks) if skipped_count > 0: print(f"Warning: {skipped_count} elements were skipped due to incorrect type.") return await asyncio.gather(*tasks) - async def _process_single(self, intermediate_element: IntermediateImageElement) -> Element: - input_data = self._prompt.input_type(image=intermediate_element.image_bytes) # type: ignore + async def _process_single(self, element: ImageElement) -> ImageElement: + input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore prompt = self._prompt(input_data) response = await self._llm.generate(prompt) return ImageElement( - document_meta=intermediate_element.document_meta, + document_meta=element.document_meta, description=response, - ocr_extracted_text=intermediate_element.ocr_extracted_text, - image_bytes=intermediate_element.image_bytes, + ocr_extracted_text=element.ocr_extracted_text, + image_bytes=element.image_bytes, ) @classmethod diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index bc2d84436..fcf4d74ef 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -5,12 +5,12 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig, import_by_path from ragbits.document_search.documents import element -from ragbits.document_search.documents.element import IntermediateElement, IntermediateImageElement +from ragbits.document_search.documents.element import Element, ImageElement from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler -_DEFAULT_ENRICHERS: dict[type[IntermediateElement], ImageIntermediateHandler] = { - IntermediateImageElement: ImageIntermediateHandler(), +_DEFAULT_ENRICHERS: dict[type[Element], ImageIntermediateHandler] = { + ImageElement: ImageIntermediateHandler(), } @@ -21,11 +21,11 @@ class ElementEnricherRouter(WithConstructionConfig): configuration_key: ClassVar[str] = "enrichers" - _enrichers: Mapping[type[IntermediateElement], ImageIntermediateHandler] + _enrichers: Mapping[type[Element], ImageIntermediateHandler] def __init__( self, - enrichers: Mapping[type[IntermediateElement], ImageIntermediateHandler] | None = None, + enrichers: Mapping[type[Element], ImageIntermediateHandler] | None = None, ) -> None: """ Initialize the ElementEnricherRouter instance. @@ -35,12 +35,24 @@ def __init__( Example: { - IntermediateImageElement: ImageIntermediateHandler(), - IntermediateTextElement: TextIntermediateHandler(), + ImageElement: ImageIntermediateHandler(), + CustomTextElement: TextIntermediateHandler(), } """ self._enrichers = {**_DEFAULT_ENRICHERS, **enrichers} if enrichers else _DEFAULT_ENRICHERS + def __contains__(self, element_type: type[Element]) -> bool: + """ + Check if there is an enricher defined of the given element type. + + Args: + element_type: The element type. + + Returns: + True if the enricher is defined for the element, otherwise False. + """ + return element_type in self._enrichers + @classmethod def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: """ @@ -61,7 +73,7 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: } return cls(enrichers=enrichers) # type: ignore - def get(self, element_type: type[IntermediateElement]) -> BaseIntermediateHandler: + def get(self, element_type: type[Element]) -> BaseIntermediateHandler: """ Get the enricher for the element. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index a2c0cdd22..22e902f8f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -1,10 +1,9 @@ from abc import ABC, abstractmethod -from collections.abc import Sequence from typing import ClassVar from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element, IntermediateElement +from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion import parsers @@ -29,7 +28,7 @@ class BaseProvider(WithConstructionConfig, ABC): SUPPORTED_DOCUMENT_TYPES: set[DocumentType] @abstractmethod - async def process(self, document_meta: DocumentMeta) -> Sequence[Element | IntermediateElement]: + async def process(self, document_meta: DocumentMeta) -> list[Element]: """ Process the document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py index d68876e57..c0f1d24cc 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py @@ -1,11 +1,9 @@ -from collections.abc import Sequence - from ragbits.document_search.documents.document import ( DocumentMeta, DocumentType, TextDocument, ) -from ragbits.document_search.documents.element import Element, ImageElement, IntermediateElement, TextElement +from ragbits.document_search.documents.element import Element, ImageElement, TextElement from ragbits.document_search.ingestion.parsers.base import BaseProvider @@ -17,7 +15,7 @@ class DummyProvider(BaseProvider): SUPPORTED_DOCUMENT_TYPES = {DocumentType.TXT, DocumentType.MD} - async def process(self, document_meta: DocumentMeta) -> list[Element | IntermediateElement]: + async def process(self, document_meta: DocumentMeta) -> list[Element]: """ Process the text document. @@ -43,7 +41,7 @@ class DummyImageProvider(BaseProvider): SUPPORTED_DOCUMENT_TYPES = {DocumentType.JPG, DocumentType.PNG} - async def process(self, document_meta: DocumentMeta) -> Sequence[Element | IntermediateElement]: + async def process(self, document_meta: DocumentMeta) -> list[Element]: """ Process the image document. @@ -61,8 +59,6 @@ async def process(self, document_meta: DocumentMeta) -> Sequence[Element | Inter image_bytes = f.read() return [ ImageElement( - description="", - ocr_extracted_text="", image_bytes=image_bytes, document_meta=document_meta, ) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py index 3bb974b8a..1baefda9b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py @@ -1,4 +1,3 @@ -from collections.abc import Sequence from io import BytesIO from pathlib import Path @@ -10,7 +9,7 @@ from ragbits.core.audit import trace from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element, IntermediateElement +from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.parsers.base import BaseProvider from ragbits.document_search.ingestion.parsers.unstructured.utils import check_required_argument, to_text_element @@ -104,7 +103,7 @@ def client(self) -> UnstructuredClient: self._client = UnstructuredClient(api_key_auth=api_key, server_url=api_server) return self._client - async def process(self, document_meta: DocumentMeta) -> Sequence[Element | IntermediateElement]: + async def process(self, document_meta: DocumentMeta) -> list[Element]: """ Process the document using the Unstructured API. @@ -158,6 +157,6 @@ async def _chunk_and_convert( elements: list[UnstructuredElement], document_meta: DocumentMeta, document_path: Path, - ) -> Sequence[Element | IntermediateElement]: + ) -> list[Element]: chunked_elements = chunk_elements(elements, **self.chunking_kwargs) return [to_text_element(element, document_meta) for element in chunked_elements] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py index a7b3feb5b..cd278513e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py @@ -1,4 +1,3 @@ -from collections.abc import Sequence from pathlib import Path from PIL import Image @@ -8,7 +7,7 @@ from ragbits.core.llms.base import LLM from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element, IntermediateElement, IntermediateImageElement +from ragbits.document_search.documents.element import Element, ImageElement from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider from ragbits.document_search.ingestion.parsers.unstructured.utils import ( crop_and_convert_to_bytes, @@ -54,14 +53,12 @@ def __init__( async def _chunk_and_convert( self, elements: list[UnstructuredElement], document_meta: DocumentMeta, document_path: Path - ) -> Sequence[Element | IntermediateElement]: + ) -> list[Element]: image_elements = [e for e in elements if e.category == ElementType.IMAGE] other_elements = [e for e in elements if e.category != ElementType.IMAGE] chunked_other_elements = chunk_elements(other_elements, **self.chunking_kwargs) - text_elements: list[Element | IntermediateImageElement] = [ - to_text_element(element, document_meta) for element in chunked_other_elements - ] + text_elements: list[Element] = [to_text_element(element, document_meta) for element in chunked_other_elements] if self.ignore_images: return text_elements return text_elements + [ @@ -70,7 +67,7 @@ async def _chunk_and_convert( async def _to_image_element( self, element: UnstructuredElement, document_meta: DocumentMeta, document_path: Path - ) -> IntermediateImageElement: + ) -> ImageElement: top_x, top_y, bottom_x, bottom_y = extract_image_coordinates(element) image = self._load_document_as_image(document_path) top_x, top_y, bottom_x, bottom_y = self._convert_coordinates( @@ -78,7 +75,7 @@ async def _to_image_element( ) img_bytes = crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) - return IntermediateImageElement( + return ImageElement( ocr_extracted_text=element.text, image_bytes=img_bytes, document_meta=document_meta, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 5a0cc57af..2b2287da7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -2,7 +2,7 @@ import random from abc import ABC, abstractmethod from collections import defaultdict -from collections.abc import Awaitable, Callable, Iterable, Sequence +from collections.abc import Awaitable, Callable, Iterable from dataclasses import dataclass, field from types import ModuleType from typing import ClassVar, ParamSpec, TypeVar @@ -10,7 +10,7 @@ from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.core.vector_stores.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta -from ragbits.document_search.documents.element import Element, IntermediateElement +from ragbits.document_search.documents.element import Element from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion import strategies from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter @@ -121,7 +121,7 @@ async def _call_with_error_handling( async def _parse_document( document: DocumentMeta | Document | Source, parser_router: DocumentParserRouter, - ) -> Sequence[Element | IntermediateElement]: + ) -> list[Element]: """ Parse a single document and return the elements. @@ -147,15 +147,15 @@ async def _parse_document( @staticmethod async def _enrich_elements( - elements: Iterable[IntermediateElement], + elements: Iterable[Element], enricher_router: ElementEnricherRouter, ) -> list[Element]: """ - Enrich intermediate elements. + Enrich elements for a single document. Args: elements: The document elements to enrich. - enricher_router: The intermediate element enricher router to use. + enricher_router: The element enricher router to use. Returns: The list of enriched elements. @@ -163,15 +163,14 @@ async def _enrich_elements( Raises: ValueError: If no enricher found for the element type. """ - grouped_intermediate_elements: dict[type[IntermediateElement], list[IntermediateElement]] = defaultdict(list) + grouped_elements = defaultdict(list) for element in elements: - if isinstance(element, IntermediateElement): - grouped_intermediate_elements[type(element)].append(element) + grouped_elements[type(element)].append(element) grouped_enriched_elements = await asyncio.gather( *[ - enricher.process(intermediate_elements) - for element_type, intermediate_elements in grouped_intermediate_elements.items() + enricher.process(elements) + for element_type, elements in grouped_elements.items() if (enricher := enricher_router.get(element_type)) ] ) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py index 79e376087..17deb69a6 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/batched.py @@ -1,11 +1,11 @@ import asyncio -from collections.abc import Iterable, Sequence +from collections.abc import Iterable from dataclasses import dataclass from itertools import islice from ragbits.core.vector_stores.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta -from ragbits.document_search.documents.element import Element, IntermediateElement +from ragbits.document_search.documents.element import Element from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter @@ -23,7 +23,7 @@ class IngestTaskResult: """ document_uri: str - elements: Sequence[Element | IntermediateElement] + elements: list[Element] class BatchedIngestStrategy(IngestStrategy): @@ -80,20 +80,20 @@ async def __call__( successfully_parsed = [result for result in parse_results if isinstance(result, IngestTaskResult)] failed_parsed = [result for result in parse_results if isinstance(result, IngestDocumentResult)] - # Further split successful documents into intermediate and ready - intermediate_parsed = [ + # Further split successful documents into to enrich and ready + to_enrich = [ result for result in successfully_parsed - if any(isinstance(element, IntermediateElement) for element in result.elements) + if any(type(element) in enricher_router for element in result.elements) ] ready_parsed = [ result for result in successfully_parsed - if not any(isinstance(element, IntermediateElement) for element in result.elements) + if not any(type(element) in enricher_router for element in result.elements) ] - # Enrich intermediate documents - enrich_results = await self._enrich_batch(intermediate_parsed, enricher_router) + # Enrich documents + enrich_results = await self._enrich_batch(to_enrich, enricher_router) # Split enriched documents into successful and failed successfully_enriched = [result for result in enrich_results if isinstance(result, IngestTaskResult)] @@ -177,7 +177,7 @@ async def _enrich_batch( *[ self._call_with_error_handling( self._enrich_elements, - elements=[element for element in result.elements if isinstance(element, IntermediateElement)], + elements=[element for element in result.elements if type(element) in enricher_router], enricher_router=enricher_router, ) for result in batch @@ -192,7 +192,7 @@ async def _enrich_batch( if isinstance(response, BaseException) else IngestTaskResult( document_uri=result.document_uri, - elements=[element for element in result.elements if isinstance(element, Element)] + response, + elements=[element for element in result.elements if type(element) not in enricher_router] + response, ) for result, response in zip(batch, responses, strict=True) ] @@ -212,9 +212,7 @@ async def _index_batch( Returns: The task results. """ - elements = [ - element for result in batch for element in result.elements if not isinstance(element, IntermediateElement) - ] + elements = [element for result in batch for element in result.elements] try: await self._call_with_error_handling( self._remove_elements, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py index 96b24a187..811927633 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/ray.py @@ -4,7 +4,6 @@ from ragbits.core.utils.decorators import requires_dependencies from ragbits.core.vector_stores.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta -from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter @@ -83,16 +82,16 @@ async def __call__( successfully_parsed = parse_results.filter(lambda data: isinstance(data["results"], IngestTaskResult)) failed_parsed = parse_results.filter(lambda data: isinstance(data["results"], IngestDocumentResult)) - # Further split valid documents into intermediate and ready - intermediate_parsed = successfully_parsed.filter( - lambda data: any(isinstance(element, IntermediateElement) for element in data["results"].elements) + # Further split valid documents into to enrich and ready + to_enrich = successfully_parsed.filter( + lambda data: any(type(element) in enricher_router for element in data["results"].elements) ) ready_parsed = successfully_parsed.filter( - lambda data: not any(isinstance(element, IntermediateElement) for element in data["results"].elements) + lambda data: not any(type(element) in enricher_router for element in data["results"].elements) ) - # Enrich intermediate documents - enrich_results = intermediate_parsed.map_batches( + # Enrich documents + enrich_results = to_enrich.map_batches( fn=lambda batch: {"results": asyncio.run(self._enrich_batch(batch["results"], enricher_router))}, batch_size=self.io_batch_size, num_cpus=0, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py index 55ad54b74..58db6679e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py @@ -2,7 +2,6 @@ from ragbits.core.vector_stores.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta -from ragbits.document_search.documents.element import IntermediateElement from ragbits.document_search.documents.sources import Source from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter @@ -49,11 +48,11 @@ async def __call__( ) enriched_elements = await self._call_with_error_handling( self._enrich_elements, - elements=[element for element in parsed_elements if isinstance(element, IntermediateElement)], + elements=[element for element in parsed_elements if type(element) in enricher_router], enricher_router=enricher_router, ) elements = [ - element for element in parsed_elements if not isinstance(element, IntermediateElement) + element for element in parsed_elements if type(element) not in enricher_router ] + enriched_elements await self._call_with_error_handling( diff --git a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py index 912d7cb05..73a6c108c 100644 --- a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py +++ b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py @@ -4,7 +4,7 @@ from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.documents.element import ImageElement, IntermediateImageElement +from ragbits.document_search.documents.element import ImageElement from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler, _ImagePrompt @@ -22,8 +22,8 @@ def image_bytes() -> bytes: @pytest.fixture -def intermediate_image_element(image_bytes: bytes) -> IntermediateImageElement: - return IntermediateImageElement( +def intermediate_image_element(image_bytes: bytes) -> ImageElement: + return ImageElement( document_meta=DocumentMeta.create_text_document_from_literal(""), image_bytes=image_bytes, ocr_extracted_text="ocr text", @@ -31,7 +31,7 @@ def intermediate_image_element(image_bytes: bytes) -> IntermediateImageElement: @pytest.mark.asyncio -async def test_process(llm: LiteLLM, intermediate_image_element: IntermediateImageElement): +async def test_process(llm: LiteLLM, intermediate_image_element: ImageElement): handler = ImageIntermediateHandler(llm=llm) results = await handler.process([intermediate_image_element]) From 0d03f8e9a763740c129640df20d90a4766a9d2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 18:15:46 +0100 Subject: [PATCH 10/31] fix tests --- .../src/ragbits/document_search/documents/element.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 00e9b2262..06b9173da 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -30,7 +30,6 @@ class Element(BaseModel, ABC): _elements_registry: ClassVar[dict[str, type["Element"]]] = {} - @computed_field # type: ignore[prop-decorator] @property def id(self) -> str: """ @@ -80,7 +79,6 @@ def text_representation(self) -> str | None: The text representation. """ - @computed_field # type: ignore[prop-decorator] @property def image_representation(self) -> bytes | None: """ @@ -183,7 +181,6 @@ def text_representation(self) -> str | None: repr += f"Extracted text: {self.ocr_extracted_text}" return repr - @computed_field # type: ignore[prop-decorator] @property def image_representation(self) -> bytes: """ From fee490e2deaf503495d9d79a8a5b4113c9cc688e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 19:46:10 +0100 Subject: [PATCH 11/31] refactor parser and enricher interfaces --- .../document_search/processing.md | 2 +- docs/how-to/core/component_preferrences.md | 2 +- .../create_custom_execution_strategy.md | 8 +++---- .../document_search/search_documents.md | 6 ++--- .../src/ragbits/core/vector_stores/qdrant.py | 2 -- .../ingestion/enrichers/__init__.py | 6 ++--- .../ingestion/enrichers/base.py | 11 ++++----- .../ingestion/enrichers/images.py | 24 +++++++++---------- .../ingestion/enrichers/router.py | 20 ++++++++-------- .../ingestion/parsers/__init__.py | 4 ++-- .../document_search/ingestion/parsers/base.py | 12 +++++----- .../ingestion/parsers/dummy.py | 10 ++++---- .../ingestion/parsers/router.py | 14 +++++------ .../ingestion/parsers/unstructured/default.py | 6 ++--- .../ingestion/strategies/base.py | 4 ++-- .../tests/integration/test_unstructured.py | 10 ++++---- .../tests/unit/test_document_parsers.py | 14 +++++------ .../tests/unit/test_document_search.py | 4 ++-- .../tests/unit/test_intermediate_handlers.py | 20 ++++++++-------- 19 files changed, 86 insertions(+), 93 deletions(-) diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index 18015cb27..6cf45fc25 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -3,7 +3,7 @@ ::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter ## Providers -::: ragbits.document_search.ingestion.parsers.base.BaseProvider +::: ragbits.document_search.ingestion.parsers.base.DocumentParser options: heading_level: 3 diff --git a/docs/how-to/core/component_preferrences.md b/docs/how-to/core/component_preferrences.md index 90fb9f5e5..ef7d598ec 100644 --- a/docs/how-to/core/component_preferrences.md +++ b/docs/how-to/core/component_preferrences.md @@ -167,6 +167,6 @@ This is the list of component types for which you can set a preferred configurat | `vector_store` | `ragbits-core` | [`VectorStore`][ragbits.core.vector_stores.base.VectorStore]| | | `history_compressor` | `ragbits-conversations` | [`ConversationHistoryCompressor`][ragbits.conversations.history.compressors.base.ConversationHistoryCompressor]| | | `document_search` | `ragbits-document-search` | [`DocumentSearch`][ragbits.document_search.DocumentSearch]| Specifics: [Configuration](#ds-configuration)| -| `parser` | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider]| | +| `parser` | `ragbits-document-search` | [`DocumentParser`][ragbits.document_search.ingestion.parsers.base.DocumentParser]| | | `rephraser` | `ragbits-document-search` | [`QueryRephraser`][ragbits.document_search.retrieval.rephrasers.QueryRephraser]| | | `reranker` | `ragbits-document-search` | [`Reranker`][ragbits.document_search.retrieval.rerankers.base.Reranker]| | diff --git a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md index 4cac9f8f1..b04c03430 100644 --- a/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md +++ b/docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md @@ -21,14 +21,14 @@ from ragbits.document_search.documents.document import Document, DocumentMeta, S from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import IngestStrategy -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser class DelayedExecutionStrategy(IngestStrategy): async def process_documents( self, documents: Sequence[DocumentMeta | Document | Source], processor_router: DocumentParserRouter, - processor_overwrite: BaseProvider | None = None, + processor_overwrite: DocumentParser | None = None, ) -> list[Element]: elements = [] for document in documents: @@ -50,14 +50,14 @@ from ragbits.document_search.documents.document import Document, DocumentMeta, S from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies import IngestStrategy -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser class DelayedExecutionStrategy(IngestStrategy): async def process_documents( self, documents: Sequence[DocumentMeta | Document | Source], processor_router: DocumentParserRouter, - processor_overwrite: BaseProvider | None = None, + processor_overwrite: DocumentParser | None = None, ) -> list[Element]: elements = [] for document in documents: diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 98cc23332..5835327b3 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -88,14 +88,14 @@ document_search = DocumentSearch( ) ``` -If you want to implement a new provider you should extend the [`BaseProvider`][ragbits.document_search.ingestion.parsers.base.BaseProvider] class: +If you want to implement a new provider you should extend the [`DocumentParser`][ragbits.document_search.ingestion.parsers.base.DocumentParser] class: ```python from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser -class CustomProvider(BaseProvider): +class CustomProvider(DocumentParser): SUPPORTED_DOCUMENT_TYPES = { DocumentType.TXT } # provide supported document types async def process(self, document_meta: DocumentMeta) -> list[Element]: diff --git a/packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py b/packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py index d3a51a329..438db00af 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py +++ b/packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py @@ -71,8 +71,6 @@ def _reconstruct( embedder: Embedder, distance_method: Distance, default_options: VectorStoreOptions, - embedding_name_text: str, - embedding_name_image: str, ) -> QdrantVectorStore: return QdrantVectorStore( client=AsyncQdrantClient(**client_params), diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py index 94620ff30..e89747179 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py @@ -1,4 +1,4 @@ -from .base import BaseIntermediateHandler -from .images import ImageIntermediateHandler +from .base import ElementEnricher +from .images import ImageElementEnricher -__all__ = ["BaseIntermediateHandler", "ImageIntermediateHandler"] +__all__ = ["ElementEnricher", "ImageElementEnricher"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index fd4bbdb6c..58f6e2fae 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -4,18 +4,15 @@ from ragbits.document_search.documents.element import Element -class BaseIntermediateHandler(WithConstructionConfig, ABC): +class ElementEnricher(WithConstructionConfig, ABC): """ - Base class for handling `IntermediateElement` processing. - - Implementations of this class should define how to transform an `IntermediateElement` - into a fully processed `Element` using the `process` method. + Base class for element enrichers, responsible for providing additional information about elements. """ @abstractmethod - async def process(self, elements: list[Element]) -> list[Element]: + async def enrich(self, elements: list[Element]) -> list[Element]: """ - Process an `IntermediateElement` and return a corresponding `Element`. + Enrich elements. Args: elements: The elements to be enriched. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py index 0d08307b2..bd38be1da 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py @@ -11,7 +11,7 @@ Element, ImageElement, ) -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import ElementEnricher class ImagePromptInput(BaseModel): @@ -27,11 +27,11 @@ class _ImagePrompt(Prompt[ImagePromptInput]): Defines a prompt for processing image elements using an LLM. """ - user_prompt: str = "Describe the content of the image." - image_input_fields: list[str] = ["image"] + user_prompt = "Describe the content of the image." + image_input_fields = ["image"] -class ImageIntermediateHandler(BaseIntermediateHandler): +class ImageElementEnricher(ElementEnricher): """ Provides image processing capabilities using an LLM. """ @@ -48,9 +48,9 @@ def __init__(self, llm: LLM | None = None, prompt: type[Prompt[ImagePromptInput, self._llm = llm or get_preferred_llm(llm_type=LLMType.VISION) self._prompt = prompt or _ImagePrompt - async def process(self, elements: list[Element]) -> list[Element]: + async def enrich(self, elements: list[Element]) -> list[Element]: """ - Processes a list of intermediate image elements concurrently and generates corresponding ImageElements. + Enrich image elements with additinal description of the image. Args: elements: The elements to be enriched. @@ -79,18 +79,16 @@ async def _process_single(self, element: ImageElement) -> ImageElement: ) @classmethod - def from_config(cls, config: dict) -> "ImageIntermediateHandler": + def from_config(cls, config: dict) -> "ImageElementEnricher": """ - Create an `ImageIntermediateHandler` instance from a configuration dictionary. + Create an `ImageElementEnricher` instance from a configuration dictionary. Args: config: A dictionary containing the configuration settings. Returns: - An initialized instance of `ImageIntermediateHandler`. + An initialized instance of `ImageElementEnricher`. """ llm: LLM = LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) - prompt_cls = None - if "prompt" in config: - prompt_cls = import_by_path(config["prompt"]) - return cls(llm=llm, prompt=prompt_cls) + prompt = import_by_path(config["prompt"]) if "prompt" in config else None + return cls(llm=llm, prompt=prompt) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index fcf4d74ef..4865b7b20 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -6,11 +6,11 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig, import_by_path from ragbits.document_search.documents import element from ragbits.document_search.documents.element import Element, ImageElement -from ragbits.document_search.ingestion.enrichers.base import BaseIntermediateHandler -from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler +from ragbits.document_search.ingestion.enrichers.base import ElementEnricher +from ragbits.document_search.ingestion.enrichers.images import ImageElementEnricher -_DEFAULT_ENRICHERS: dict[type[Element], ImageIntermediateHandler] = { - ImageElement: ImageIntermediateHandler(), +_DEFAULT_ENRICHERS: dict[type[Element], ImageElementEnricher] = { + ImageElement: ImageElementEnricher(), } @@ -21,11 +21,11 @@ class ElementEnricherRouter(WithConstructionConfig): configuration_key: ClassVar[str] = "enrichers" - _enrichers: Mapping[type[Element], ImageIntermediateHandler] + _enrichers: Mapping[type[Element], ImageElementEnricher] def __init__( self, - enrichers: Mapping[type[Element], ImageIntermediateHandler] | None = None, + enrichers: Mapping[type[Element], ImageElementEnricher] | None = None, ) -> None: """ Initialize the ElementEnricherRouter instance. @@ -35,7 +35,7 @@ def __init__( Example: { - ImageElement: ImageIntermediateHandler(), + ImageElement: ImageElementEnricher(), CustomTextElement: TextIntermediateHandler(), } """ @@ -68,12 +68,12 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: InvalidConfigError: If any of the provided parsers cannot be initialized. """ enrichers = { - import_by_path(element_type, element): BaseIntermediateHandler.subclass_from_config(enricher_config) + import_by_path(element_type, element): ElementEnricher.subclass_from_config(enricher_config) for element_type, enricher_config in config.items() } return cls(enrichers=enrichers) # type: ignore - def get(self, element_type: type[Element]) -> BaseIntermediateHandler: + def get(self, element_type: type[Element]) -> ElementEnricher: """ Get the enricher for the element. @@ -88,7 +88,7 @@ def get(self, element_type: type[Element]) -> BaseIntermediateHandler: """ enricher = self._enrichers.get(element_type) - if isinstance(enricher, BaseIntermediateHandler): + if isinstance(enricher, ElementEnricher): return enricher raise ValueError(f"No enricher found for the element type {element_type}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py index 930d9bda6..c0ecc00cc 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py @@ -1,4 +1,4 @@ -from .base import BaseProvider +from .base import DocumentParser from .dummy import DummyProvider -__all__ = ["BaseProvider", "DummyProvider"] +__all__ = ["DocumentParser", "DummyProvider"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index 22e902f8f..dfec8c603 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -17,23 +17,23 @@ def __init__(self, provider_name: str, document_type: DocumentType) -> None: super().__init__(message) -class BaseProvider(WithConstructionConfig, ABC): +class DocumentParser(WithConstructionConfig, ABC): """ - A base class for the document processing providers. + Base class for document parsers, responsible for converting the document into a list of elements. """ default_module: ClassVar = parsers - configuration_key: ClassVar = "provider" + configuration_key: ClassVar = "parser" SUPPORTED_DOCUMENT_TYPES: set[DocumentType] @abstractmethod - async def process(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ - Process the document. + Parse the document. Args: - document_meta: The document to process. + document_meta: The document to parse. Returns: The list of elements extracted from the document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py index c0f1d24cc..ee7e2dcaa 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py @@ -4,10 +4,10 @@ TextDocument, ) from ragbits.document_search.documents.element import Element, ImageElement, TextElement -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser -class DummyProvider(BaseProvider): +class DummyProvider(DocumentParser): """ This is a mock provider that returns a TextElement with the content of the document. It should be used for testing purposes only. @@ -15,7 +15,7 @@ class DummyProvider(BaseProvider): SUPPORTED_DOCUMENT_TYPES = {DocumentType.TXT, DocumentType.MD} - async def process(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ Process the text document. @@ -33,7 +33,7 @@ async def process(self, document_meta: DocumentMeta) -> list[Element]: return [] -class DummyImageProvider(BaseProvider): +class DummyImageProvider(DocumentParser): """ This is a simple provider that returns an ImageElement with the content of the image and empty text metadata. @@ -41,7 +41,7 @@ class DummyImageProvider(BaseProvider): SUPPORTED_DOCUMENT_TYPES = {DocumentType.JPG, DocumentType.PNG} - async def process(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ Process the image document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 19ab6c901..26217cb67 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -5,7 +5,7 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider @@ -14,7 +14,7 @@ _default_img_parser = UnstructuredImageProvider() _default_pdf_parser = UnstructuredPdfProvider() -_DEFAULT_PARSERS: dict[DocumentType, BaseProvider] = { +_DEFAULT_PARSERS: dict[DocumentType, DocumentParser] = { DocumentType.TXT: _default_parser, DocumentType.MD: _default_parser, DocumentType.PDF: _default_pdf_parser, @@ -45,9 +45,9 @@ class DocumentParserRouter(WithConstructionConfig): configuration_key: ClassVar[str] = "parsers" - _parsers: Mapping[DocumentType, BaseProvider] + _parsers: Mapping[DocumentType, DocumentParser] - def __init__(self, parsers: Mapping[DocumentType, BaseProvider] | None = None) -> None: + def __init__(self, parsers: Mapping[DocumentType, DocumentParser] | None = None) -> None: """ Initialize the DocumentParserRouter instance. @@ -77,12 +77,12 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: InvalidConfigError: If any of the provided parsers cannot be initialized. """ parsers = { - DocumentType(document_type): BaseProvider.subclass_from_config(parser_config) + DocumentType(document_type): DocumentParser.subclass_from_config(parser_config) for document_type, parser_config in config.items() } return cls(parsers=parsers) - def get(self, document_meta: DocumentMeta) -> BaseProvider: + def get(self, document_meta: DocumentMeta) -> DocumentParser: """ Get the parser for the document. @@ -97,7 +97,7 @@ def get(self, document_meta: DocumentMeta) -> BaseProvider: """ parser = self._parsers.get(document_meta.document_type) - if isinstance(parser, BaseProvider): + if isinstance(parser, DocumentParser): return parser raise ValueError(f"No parser found for the document type {document_meta.document_type}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py index 1baefda9b..37337b7bb 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py @@ -10,7 +10,7 @@ from ragbits.core.audit import trace from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.parsers.base import BaseProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser from ragbits.document_search.ingestion.parsers.unstructured.utils import check_required_argument, to_text_element DEFAULT_PARTITION_KWARGS: dict = { @@ -27,7 +27,7 @@ UNSTRUCTURED_SERVER_URL_ENV = "UNSTRUCTURED_SERVER_URL" -class UnstructuredDefaultProvider(BaseProvider): +class UnstructuredDefaultProvider(DocumentParser): """ A provider that uses the Unstructured API or local SDK to process the documents. """ @@ -103,7 +103,7 @@ def client(self) -> UnstructuredClient: self._client = UnstructuredClient(api_key_auth=api_key, server_url=api_server) return self._client - async def process(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ Process the document using the Unstructured API. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 2b2287da7..02254da85 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -143,7 +143,7 @@ async def _parse_document( else document.metadata ) parser = parser_router.get(document_meta) - return await parser.process(document_meta) + return await parser.parse(document_meta) @staticmethod async def _enrich_elements( @@ -169,7 +169,7 @@ async def _enrich_elements( grouped_enriched_elements = await asyncio.gather( *[ - enricher.process(elements) + enricher.enrich(elements) for element_type, elements in grouped_elements.items() if (enricher := enricher_router.get(element_type)) ] diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index 39d280c68..c3ec8fe34 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -32,7 +32,7 @@ async def test_parser_router_processes_text_document_with_unstructured_provider( parser_router = DocumentParserRouter(parsers) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - elements = await parser_router.get(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).parse(document_meta) assert isinstance(parser_router._parsers[DocumentType.TXT], UnstructuredDefaultProvider) assert len(elements) == 1 @@ -47,7 +47,7 @@ async def test_parser_router_processes_md_document_with_unstructured_provider(): parser_router = DocumentParserRouter() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / "test_file.md") - elements = await parser_router.get(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).parse(document_meta) assert len(elements) == 1 assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework." # type: ignore @@ -66,7 +66,7 @@ async def test_parser_router_processes_image_document_with_unstructured_provider parser_router = DocumentParserRouter() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / file_name) - elements = await parser_router.get(document_meta).process(document_meta) + elements = await parser_router.get(document_meta).parse(document_meta) assert len(elements) == 7 assert elements[-1].description != "" # type: ignore @@ -88,7 +88,7 @@ async def test_parser_router_processes_image_document_with_unstructured_provider async def test_unstructured_provider_document_with_default_partition_kwargs(use_api: bool): document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") unstructured_provider = UnstructuredDefaultProvider(use_api=use_api) - elements = await unstructured_provider.process(document_meta) + elements = await unstructured_provider.parse(document_meta) assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS assert len(elements) == 1 @@ -112,7 +112,7 @@ async def test_unstructured_provider_document_with_custom_partition_kwargs(use_a document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") partition_kwargs = {"languages": ["pl"], "strategy": "fast"} unstructured_provider = UnstructuredDefaultProvider(use_api=use_api, partition_kwargs=partition_kwargs) - elements = await unstructured_provider.process(document_meta) + elements = await unstructured_provider.parse(document_meta) assert unstructured_provider.partition_kwargs == partition_kwargs assert len(elements) == 1 diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index be9efbcec..ad56e67e8 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -5,7 +5,7 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.base import BaseProvider, DocumentTypeNotSupportedError +from ragbits.document_search.ingestion.parsers.base import DocumentParser, DocumentTypeNotSupportedError from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider @@ -37,7 +37,7 @@ def test_unsupported_provider_validates_supported_document_types_fails(): @patch.dict(os.environ, {}, clear=True) async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): with pytest.raises(ValueError) as err: - await UnstructuredDefaultProvider(use_api=True).process( + await UnstructuredDefaultProvider(use_api=True).parse( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) @@ -47,7 +47,7 @@ async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): @patch.dict(os.environ, {}, clear=True) async def test_unstructured_provider_raises_value_error_when_server_url_not_set(): with pytest.raises(ValueError) as err: - await UnstructuredDefaultProvider(api_key="api_key", use_api=True).process( + await UnstructuredDefaultProvider(api_key="api_key", use_api=True).parse( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) @@ -56,11 +56,11 @@ async def test_unstructured_provider_raises_value_error_when_server_url_not_set( def test_subclass_from_config(): config = ObjectContructionConfig.model_validate({"type": "ragbits.document_search.ingestion.parsers:DummyProvider"}) - embedding = BaseProvider.subclass_from_config(config) - assert isinstance(embedding, DummyProvider) + parser = DocumentParser.subclass_from_config(config) + assert isinstance(parser, DummyProvider) def test_subclass_from_config_default_path(): config = ObjectContructionConfig.model_validate({"type": "DummyProvider"}) - embedding = BaseProvider.subclass_from_config(config) - assert isinstance(embedding, DummyProvider) + parser = DocumentParser.subclass_from_config(config) + assert isinstance(parser, DummyProvider) diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 7bb3658c1..8007e3d26 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -18,7 +18,7 @@ ) from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource -from ragbits.document_search.ingestion.parsers import BaseProvider +from ragbits.document_search.ingestion.parsers import DocumentParser from ragbits.document_search.ingestion.parsers.dummy import DummyProvider from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.batched import ( @@ -435,7 +435,7 @@ def __iter__(self): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] # Non-zero embeddings # Create parsers dict with actual provider instance - parsers: Mapping[DocumentType, BaseProvider] = {DocumentType.TXT: DummyProvider()} + parsers: Mapping[DocumentType, DocumentParser] = {DocumentType.TXT: DummyProvider()} # Mock vector store to track operations vector_store = InMemoryVectorStore(embedder=embeddings_mock) diff --git a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py index 73a6c108c..2eb112c44 100644 --- a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py +++ b/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py @@ -5,7 +5,7 @@ from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions from ragbits.document_search.documents.document import DocumentMeta from ragbits.document_search.documents.element import ImageElement -from ragbits.document_search.ingestion.enrichers.images import ImageIntermediateHandler, _ImagePrompt +from ragbits.document_search.ingestion.enrichers.images import ImageElementEnricher, _ImagePrompt @pytest.fixture @@ -31,15 +31,15 @@ def intermediate_image_element(image_bytes: bytes) -> ImageElement: @pytest.mark.asyncio -async def test_process(llm: LiteLLM, intermediate_image_element: ImageElement): - handler = ImageIntermediateHandler(llm=llm) - results = await handler.process([intermediate_image_element]) +async def test_process(llm: LiteLLM, image_element: ImageElement): + enricher = ImageElementEnricher(llm=llm) + results = await enricher.enrich([image_element]) assert len(results) == 1 assert isinstance(results[0], ImageElement) assert results[0].description == "response" - assert results[0].image_bytes == intermediate_image_element.image_bytes - assert results[0].ocr_extracted_text == intermediate_image_element.ocr_extracted_text + assert results[0].image_bytes == image_element.image_bytes + assert results[0].ocr_extracted_text == image_element.ocr_extracted_text def test_from_config(): @@ -50,8 +50,8 @@ def test_from_config(): } } - handler = ImageIntermediateHandler.from_config(config) + enricher = ImageElementEnricher.from_config(config) - assert isinstance(handler, ImageIntermediateHandler) - assert isinstance(handler._llm, LiteLLM) - assert handler._prompt == _ImagePrompt + assert isinstance(enricher, ImageElementEnricher) + assert isinstance(enricher._llm, LiteLLM) + assert enricher._prompt == _ImagePrompt From fd1ce0c4a4027a381bee9078add038839a02f5ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 19:58:54 +0100 Subject: [PATCH 12/31] fix test --- ...intermediate_handlers.py => test_element_enrichers.py} | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) rename packages/ragbits-document-search/tests/unit/{test_intermediate_handlers.py => test_element_enrichers.py} (86%) diff --git a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py similarity index 86% rename from packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py rename to packages/ragbits-document-search/tests/unit/test_element_enrichers.py index 2eb112c44..ef7fab95c 100644 --- a/packages/ragbits-document-search/tests/unit/test_intermediate_handlers.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py @@ -10,9 +10,8 @@ @pytest.fixture def llm() -> LiteLLM: - options = LiteLLMOptions(mock_response="response") - llm = LiteLLM(model_name="gpt-4o", api_key="key", default_options=options) - return llm + default_options = LiteLLMOptions(mock_response="response") + return LiteLLM(model_name="gpt-4o", default_options=default_options) @pytest.fixture @@ -22,7 +21,7 @@ def image_bytes() -> bytes: @pytest.fixture -def intermediate_image_element(image_bytes: bytes) -> ImageElement: +def image_element(image_bytes: bytes) -> ImageElement: return ImageElement( document_meta=DocumentMeta.create_text_document_from_literal(""), image_bytes=image_bytes, @@ -30,7 +29,6 @@ def intermediate_image_element(image_bytes: bytes) -> ImageElement: ) -@pytest.mark.asyncio async def test_process(llm: LiteLLM, image_element: ImageElement): enricher = ImageElementEnricher(llm=llm) results = await enricher.enrich([image_element]) From 12a00aacb9ba1c6148b66cec1be1491ebda5384e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 20:42:01 +0100 Subject: [PATCH 13/31] update CHANGELOG --- packages/ragbits-core/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/ragbits-core/CHANGELOG.md b/packages/ragbits-core/CHANGELOG.md index 3927ccde1..ecc3f9bfe 100644 --- a/packages/ragbits-core/CHANGELOG.md +++ b/packages/ragbits-core/CHANGELOG.md @@ -3,6 +3,8 @@ ## Unreleased ## 0.10.1 (2025-03-19) + +- Fix Qdrant vector store serialization - Better handling of cases when text and image embeddings are mixed in VectorStore ## 0.10.0 (2025-03-17) From 52802072cf55c5ef8e82fec8ca4354c9c32738bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Wed, 19 Mar 2025 20:42:29 +0100 Subject: [PATCH 14/31] rename image enricher module --- .../src/ragbits/document_search/ingestion/enrichers/__init__.py | 2 +- .../document_search/ingestion/enrichers/{images.py => image.py} | 0 .../src/ragbits/document_search/ingestion/enrichers/router.py | 2 +- .../tests/unit/test_element_enrichers.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/{images.py => image.py} (100%) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py index e89747179..5515a38d9 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py @@ -1,4 +1,4 @@ from .base import ElementEnricher -from .images import ImageElementEnricher +from .image import ImageElementEnricher __all__ = ["ElementEnricher", "ImageElementEnricher"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/images.py rename to packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index 4865b7b20..ec531be21 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -7,7 +7,7 @@ from ragbits.document_search.documents import element from ragbits.document_search.documents.element import Element, ImageElement from ragbits.document_search.ingestion.enrichers.base import ElementEnricher -from ragbits.document_search.ingestion.enrichers.images import ImageElementEnricher +from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher _DEFAULT_ENRICHERS: dict[type[Element], ImageElementEnricher] = { ImageElement: ImageElementEnricher(), diff --git a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py index ef7fab95c..0f2741dcf 100644 --- a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py @@ -5,7 +5,7 @@ from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions from ragbits.document_search.documents.document import DocumentMeta from ragbits.document_search.documents.element import ImageElement -from ragbits.document_search.ingestion.enrichers.images import ImageElementEnricher, _ImagePrompt +from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher, _ImagePrompt @pytest.fixture From 5c298102329f63f0251e78e37989e35402dc8f14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 21 Mar 2025 01:33:55 +0100 Subject: [PATCH 15/31] refactor unstructured parser --- .../document_search/processing.md | 2 +- .../document_search/search_documents.md | 4 +- .../config/pipeline/parsers/unstructured.yaml | 4 +- .../parsers/unstructured_optimization.yaml | 4 +- .../document-search/basic/evaluate.py | 2 +- .../document-search/basic/optimize.py | 2 +- .../document_search/ingestion/parsers/base.py | 4 +- .../ingestion/parsers/dummy.py | 4 +- .../ingestion/parsers/router.py | 14 +- .../ingestion/parsers/unstructured.py | 274 ++++++++++++++++++ .../parsers/unstructured/__init__.py | 5 - .../ingestion/parsers/unstructured/default.py | 162 ----------- .../ingestion/parsers/unstructured/images.py | 102 ------- .../ingestion/parsers/unstructured/pdf.py | 43 --- .../ingestion/parsers/unstructured/utils.py | 123 -------- .../tests/integration/test_unstructured.py | 14 +- .../tests/unit/test_document_parsers.py | 26 +- 17 files changed, 306 insertions(+), 483 deletions(-) create mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index 6cf45fc25..399c3a5db 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -11,7 +11,7 @@ options: heading_level: 3 -::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDefaultProvider +::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser options: heading_level: 3 diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 5835327b3..90523fbcb 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -78,13 +78,13 @@ from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider +from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDocumentParser embedder = LiteLLMEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) document_search = DocumentSearch( vector_store=vector_store, - parser_router=DocumentParserRouter({DocumentType.TXT: UnstructuredDefaultProvider()}) + parser_router=DocumentParserRouter({DocumentType.TXT: UnstructuredDocumentParser()}) ) ``` diff --git a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml index 3ce63e4ba..38eb96aeb 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml @@ -1,5 +1,5 @@ txt: - type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser config: use_api: false partition_kwargs: @@ -12,7 +12,7 @@ txt: overlap_all: 0 md: - type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser config: use_api: false partition_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml index cc10e47a1..594f6328f 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml @@ -1,5 +1,5 @@ txt: - type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser config: use_api: false partition_kwargs: @@ -16,7 +16,7 @@ txt: overlap_all: 0 md: - type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider + type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser config: use_api: false partition_kwargs: diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py index 74f6e5026..f90bf397d 100644 --- a/examples/evaluation/document-search/basic/evaluate.py +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -53,7 +53,7 @@ }, "parsers": { "txt": { - "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", + "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser", }, }, "source": { diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py index 5662530eb..9108a303e 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/optimize.py @@ -55,7 +55,7 @@ }, "parsers": { "txt": { - "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDefaultProvider", + "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser", }, }, "source": { diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index dfec8c603..7b175ce5a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -25,7 +25,7 @@ class DocumentParser(WithConstructionConfig, ABC): default_module: ClassVar = parsers configuration_key: ClassVar = "parser" - SUPPORTED_DOCUMENT_TYPES: set[DocumentType] + supported_document_types: set[DocumentType] @abstractmethod async def parse(self, document_meta: DocumentMeta) -> list[Element]: @@ -49,5 +49,5 @@ def validate_document_type(self, document_type: DocumentType) -> None: Raises: DocumentTypeNotSupportedError: If the document type is not supported. """ - if document_type not in self.SUPPORTED_DOCUMENT_TYPES: + if document_type not in self.supported_document_types: raise DocumentTypeNotSupportedError(provider_name=self.__class__.__name__, document_type=document_type) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py index ee7e2dcaa..a5e731670 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py @@ -13,7 +13,7 @@ class DummyProvider(DocumentParser): It should be used for testing purposes only. """ - SUPPORTED_DOCUMENT_TYPES = {DocumentType.TXT, DocumentType.MD} + supported_document_types = {DocumentType.TXT, DocumentType.MD} async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ @@ -39,7 +39,7 @@ class DummyImageProvider(DocumentParser): and empty text metadata. """ - SUPPORTED_DOCUMENT_TYPES = {DocumentType.JPG, DocumentType.PNG} + supported_document_types = {DocumentType.JPG, DocumentType.PNG} async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 26217cb67..6f4e5de03 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -6,18 +6,14 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.base import DocumentParser -from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider -from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider +from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser -_default_parser = UnstructuredDefaultProvider() -_default_img_parser = UnstructuredImageProvider() -_default_pdf_parser = UnstructuredPdfProvider() +_default_parser = UnstructuredDocumentParser() _DEFAULT_PARSERS: dict[DocumentType, DocumentParser] = { DocumentType.TXT: _default_parser, DocumentType.MD: _default_parser, - DocumentType.PDF: _default_pdf_parser, + DocumentType.PDF: _default_parser, DocumentType.DOCX: _default_parser, DocumentType.DOC: _default_parser, DocumentType.PPTX: _default_parser, @@ -33,8 +29,8 @@ DocumentType.RTF: _default_parser, DocumentType.TSV: _default_parser, DocumentType.XML: _default_parser, - DocumentType.JPG: _default_img_parser, - DocumentType.PNG: _default_img_parser, + DocumentType.JPG: _default_parser, + DocumentType.PNG: _default_parser, } diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py new file mode 100644 index 000000000..106609d7b --- /dev/null +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -0,0 +1,274 @@ +import io +import os +from io import BytesIO +from pathlib import Path + +from pdf2image import convert_from_path +from PIL import Image +from unstructured.chunking.basic import chunk_elements +from unstructured.documents.elements import Element as UnstructuredElement +from unstructured.documents.elements import ElementType +from unstructured.partition.auto import partition +from unstructured.staging.base import elements_from_dicts +from unstructured_client import UnstructuredClient + +from ragbits.core.audit import traceable +from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.element import Element, ElementLocation, ImageElement, TextElement +from ragbits.document_search.ingestion.parsers.base import DocumentParser + +DEFAULT_PARTITION_KWARGS: dict = { + "strategy": "hi_res", + "languages": ["eng"], + "split_pdf_page": True, + "split_pdf_allow_failed": True, + "split_pdf_concurrency_level": 15, +} + +DEFAULT_CHUNKING_KWARGS: dict = {} + +UNSTRUCTURED_API_KEY_ENV = "UNSTRUCTURED_API_KEY" +UNSTRUCTURED_SERVER_URL_ENV = "UNSTRUCTURED_SERVER_URL" + + +class UnstructuredDocumentParser(DocumentParser): + """ + A provider that uses the Unstructured API or local SDK to process the documents. + """ + + supported_document_types = { + DocumentType.TXT, + DocumentType.MD, + DocumentType.PDF, + DocumentType.DOCX, + DocumentType.DOC, + DocumentType.PPTX, + DocumentType.PPT, + DocumentType.XLSX, + DocumentType.XLS, + DocumentType.CSV, + DocumentType.HTML, + DocumentType.EPUB, + DocumentType.ORG, + DocumentType.ODT, + DocumentType.RST, + DocumentType.RTF, + DocumentType.TSV, + DocumentType.JSON, + DocumentType.XML, + DocumentType.JPG, + DocumentType.PNG, + } + + def __init__( + self, + partition_kwargs: dict | None = None, + chunking_kwargs: dict | None = None, + api_key: str | None = None, + api_server: str | None = None, + use_api: bool = False, + ignore_images: bool = False, + ) -> None: + """Initialize the UnstructuredDocumentParser. + + Args: + partition_kwargs: The additional arguments for the partitioning. Refer to the Unstructured API documentation + for the available options: https://docs.unstructured.io/api-reference/api-services/api-parameters + chunking_kwargs: The additional arguments for the chunking. + api_key: The API key to use for the Unstructured API. If not specified, the UNSTRUCTURED_API_KEY environment + variable will be used. + api_server: The API server URL to use for the Unstructured API. If not specified, the + UNSTRUCTURED_SERVER_URL environment variable will be used. + use_api: whether to use Unstructured API, otherwise use local version of Unstructured library + ignore_images: if True images will be skipped + """ + self.partition_kwargs = partition_kwargs or DEFAULT_PARTITION_KWARGS + self.chunking_kwargs = chunking_kwargs or DEFAULT_CHUNKING_KWARGS + self.api_key = api_key + self.api_server = api_server + self.use_api = use_api + self._client: UnstructuredClient | None = None + self.ignore_images = ignore_images + + @property + def client(self) -> UnstructuredClient: + """ + Get the UnstructuredClient instance. If the client is not initialized, it will be created. + + Returns: + The UnstructuredClient instance. + + Raises: + ValueError: If the UNSTRUCTURED_API_KEY_ENV environment variable is not set. + ValueError: If the UNSTRUCTURED_SERVER_URL_ENV environment variable is not set. + """ + if self._client is not None: + return self._client + api_key = check_required_argument(arg_name="api_key", value=self.api_key, fallback_env=UNSTRUCTURED_API_KEY_ENV) + api_server = check_required_argument( + arg_name="api_server", value=self.api_server, fallback_env=UNSTRUCTURED_SERVER_URL_ENV + ) + self._client = UnstructuredClient(api_key_auth=api_key, server_url=api_server) + return self._client + + @traceable + async def parse(self, document_meta: DocumentMeta) -> list[Element]: + """ + Process the document using the Unstructured API. + + Args: + document_meta: The document to process. + + Returns: + The list of elements extracted from the document. + + Raises: + DocumentTypeNotSupportedError: If the document type is not supported. + + """ + self.validate_document_type(document_meta.document_type) + document = await document_meta.fetch() + + if self.use_api: + res = await self.client.general.partition_async( + request={ + "partition_parameters": { + "files": { + "content": document.local_path.read_bytes(), + "file_name": document.local_path.name, + }, + "coordinates": True, + **self.partition_kwargs, + } + } + ) + elements = elements_from_dicts(res.elements) # type: ignore + else: + elements = partition( + file=BytesIO(document.local_path.read_bytes()), + metadata_filename=document.local_path.name, + **self.partition_kwargs, + ) + return await self._chunk_and_convert(elements, document_meta, document.local_path) + + async def _chunk_and_convert( + self, elements: list[UnstructuredElement], document_meta: DocumentMeta, document_path: Path + ) -> list[Element]: + image_elements = [e for e in elements if e.category == ElementType.IMAGE] + other_elements = [e for e in elements if e.category != ElementType.IMAGE] + chunked_other_elements = chunk_elements(other_elements, **self.chunking_kwargs) + + text_elements: list[Element] = [to_text_element(element, document_meta) for element in chunked_other_elements] + if self.ignore_images: + return text_elements + return text_elements + [ + await _to_image_element(element, document_meta, document_path) for element in image_elements + ] + + +async def _to_image_element( + element: UnstructuredElement, document_meta: DocumentMeta, document_path: Path +) -> ImageElement: + top_x, top_y, bottom_x, bottom_y = extract_image_coordinates(element) + + image = ( + convert_from_path(document_path)[0] + if document_meta.document_type == DocumentType.PDF + else Image.open(document_path).convert("RGB") + ) + img_bytes = crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) + return ImageElement( + ocr_extracted_text=element.text, + image_bytes=img_bytes, + document_meta=document_meta, + ) + + +def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) -> TextElement: + """ + Converts unstructured element to ragbits text element + + Args: + element: element from unstructured + document_meta: metadata of the document + + Returns: + text element + """ + location = to_element_location(element) + return TextElement( + document_meta=document_meta, + content=element.text, + location=location, + ) + + +def to_element_location(element: UnstructuredElement) -> ElementLocation: + """ + Converts unstructured element to element location. + + Args: + element: element from unstructured + + Returns: + element location + """ + metadata = element.metadata.to_dict() + page_number = metadata.get("page_number") + coordinates = metadata.get("coordinates") + return ElementLocation( + page_number=page_number, + coordinates=coordinates, + ) + + +def check_required_argument(value: str | None, arg_name: str, fallback_env: str) -> str: + """ + Checks if given environment variable is set and returns it or raises an error + + Args: + arg_name: name of the variable + value: optional default value + fallback_env: name of the environment variable to get + + Raises: + ValueError: if environment variable is not set + + Returns: + environment variable value + """ + if value is not None: + return value + if (env_value := os.getenv(fallback_env)) is None: + raise ValueError(f"Either pass {arg_name} argument or set the {fallback_env} environment variable") + return env_value + + +def extract_image_coordinates(element: UnstructuredElement) -> tuple[float, float, float, float]: + """ + Extracts image coordinates from unstructured element + Args: + element: element from unstructured + Returns: + x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner + """ + p1, p2, p3, p4 = element.metadata.coordinates.points # type: ignore + return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) + + +def crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: + """ + Crops the image and converts to bytes + Args: + image: PIL image + x0: x of top left corner + y0: y of top left corner + x1: x of bottom right corner + y1: y of bottom right corner + Returns: + bytes of the cropped image + """ + image = image.crop((x0, y0, x1, y1)) + buffered = io.BytesIO() + image.save(buffered, format="JPEG") + return buffered.getvalue() diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py deleted file mode 100644 index 6e0588856..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .default import UnstructuredDefaultProvider -from .images import UnstructuredImageProvider -from .pdf import UnstructuredPdfProvider - -__all__ = ["UnstructuredDefaultProvider", "UnstructuredImageProvider", "UnstructuredPdfProvider"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py deleted file mode 100644 index 37337b7bb..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/default.py +++ /dev/null @@ -1,162 +0,0 @@ -from io import BytesIO -from pathlib import Path - -from unstructured.chunking.basic import chunk_elements -from unstructured.documents.elements import Element as UnstructuredElement -from unstructured.partition.auto import partition -from unstructured.staging.base import elements_from_dicts -from unstructured_client import UnstructuredClient - -from ragbits.core.audit import trace -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element -from ragbits.document_search.ingestion.parsers.base import DocumentParser -from ragbits.document_search.ingestion.parsers.unstructured.utils import check_required_argument, to_text_element - -DEFAULT_PARTITION_KWARGS: dict = { - "strategy": "hi_res", - "languages": ["eng"], - "split_pdf_page": True, - "split_pdf_allow_failed": True, - "split_pdf_concurrency_level": 15, -} - -DEFAULT_CHUNKING_KWARGS: dict = {} - -UNSTRUCTURED_API_KEY_ENV = "UNSTRUCTURED_API_KEY" -UNSTRUCTURED_SERVER_URL_ENV = "UNSTRUCTURED_SERVER_URL" - - -class UnstructuredDefaultProvider(DocumentParser): - """ - A provider that uses the Unstructured API or local SDK to process the documents. - """ - - SUPPORTED_DOCUMENT_TYPES = { - DocumentType.TXT, - DocumentType.MD, - DocumentType.DOCX, - DocumentType.DOC, - DocumentType.PPTX, - DocumentType.PPT, - DocumentType.XLSX, - DocumentType.XLS, - DocumentType.CSV, - DocumentType.HTML, - DocumentType.EPUB, - DocumentType.ORG, - DocumentType.ODT, - DocumentType.RST, - DocumentType.RTF, - DocumentType.TSV, - DocumentType.XML, - } - - def __init__( - self, - partition_kwargs: dict | None = None, - chunking_kwargs: dict | None = None, - api_key: str | None = None, - api_server: str | None = None, - use_api: bool = False, - ignore_images: bool = False, - ) -> None: - """Initialize the UnstructuredDefaultProvider. - - Args: - partition_kwargs: The additional arguments for the partitioning. Refer to the Unstructured API documentation - for the available options: https://docs.unstructured.io/api-reference/api-services/api-parameters - chunking_kwargs: The additional arguments for the chunking. - api_key: The API key to use for the Unstructured API. If not specified, the UNSTRUCTURED_API_KEY environment - variable will be used. - api_server: The API server URL to use for the Unstructured API. If not specified, the - UNSTRUCTURED_SERVER_URL environment variable will be used. - use_api: whether to use Unstructured API, otherwise use local version of Unstructured library - ignore_images: if True images will be skipped - """ - self.partition_kwargs = partition_kwargs or DEFAULT_PARTITION_KWARGS - self.chunking_kwargs = chunking_kwargs or DEFAULT_CHUNKING_KWARGS - self.api_key = api_key - self.api_server = api_server - self.use_api = use_api - self._client: UnstructuredClient | None = None - self.ignore_images = ignore_images - - @property - def client(self) -> UnstructuredClient: - """ - Get the UnstructuredClient instance. If the client is not initialized, it will be created. - - Returns: - The UnstructuredClient instance. - - Raises: - ValueError: If the UNSTRUCTURED_API_KEY_ENV environment variable is not set. - ValueError: If the UNSTRUCTURED_SERVER_URL_ENV environment variable is not set. - """ - if self._client is not None: - return self._client - api_key = check_required_argument(arg_name="api_key", value=self.api_key, fallback_env=UNSTRUCTURED_API_KEY_ENV) - api_server = check_required_argument( - arg_name="api_server", value=self.api_server, fallback_env=UNSTRUCTURED_SERVER_URL_ENV - ) - self._client = UnstructuredClient(api_key_auth=api_key, server_url=api_server) - return self._client - - async def parse(self, document_meta: DocumentMeta) -> list[Element]: - """ - Process the document using the Unstructured API. - - Args: - document_meta: The document to process. - - Returns: - The list of elements extracted from the document. - - Raises: - DocumentTypeNotSupportedError: If the document type is not supported. - - """ - with trace( - partition_arg=self.partition_kwargs, - chunking_arg=self.chunking_kwargs, - api_server=self.api_server, - api_key=self.api_key, - ignore_images=self.ignore_images, - ) as outputs: - self.validate_document_type(document_meta.document_type) - document = await document_meta.fetch() - - if self.use_api: - res = await self.client.general.partition_async( - request={ - "partition_parameters": { - "files": { - "content": document.local_path.read_bytes(), - "file_name": document.local_path.name, - }, - "coordinates": True, - **self.partition_kwargs, - } - } - ) - elements = elements_from_dicts(res.elements) # type: ignore - else: - elements = partition( - file=BytesIO(document.local_path.read_bytes()), - metadata_filename=document.local_path.name, - **self.partition_kwargs, - ) - - outputs.results = await self._chunk_and_convert(elements, document_meta, document.local_path) - return outputs.results - - async def _chunk_and_convert( - # pylint: disable=unused-argument - self, - elements: list[UnstructuredElement], - document_meta: DocumentMeta, - document_path: Path, - ) -> list[Element]: - chunked_elements = chunk_elements(elements, **self.chunking_kwargs) - return [to_text_element(element, document_meta) for element in chunked_elements] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py deleted file mode 100644 index cd278513e..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/images.py +++ /dev/null @@ -1,102 +0,0 @@ -from pathlib import Path - -from PIL import Image -from unstructured.chunking.basic import chunk_elements -from unstructured.documents.elements import Element as UnstructuredElement -from unstructured.documents.elements import ElementType - -from ragbits.core.llms.base import LLM -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element, ImageElement -from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.parsers.unstructured.utils import ( - crop_and_convert_to_bytes, - extract_image_coordinates, - to_text_element, -) - - -class UnstructuredImageProvider(UnstructuredDefaultProvider): - """ - A specialized provider that handles pngs and jpgs using the Unstructured - """ - - SUPPORTED_DOCUMENT_TYPES = { - DocumentType.JPG, - DocumentType.PNG, - } - - def __init__( - self, - partition_kwargs: dict | None = None, - chunking_kwargs: dict | None = None, - api_key: str | None = None, - api_server: str | None = None, - use_api: bool = False, - llm: LLM | None = None, - ) -> None: - """Initialize the UnstructuredPdfProvider. - - Args: - partition_kwargs: The additional arguments for the partitioning. Refer to the Unstructured API documentation - for the available options: https://docs.unstructured.io/api-reference/api-services/api-parameters - chunking_kwargs: The additional arguments for the chunking. - api_key: The API key to use for the Unstructured API. If not specified, the UNSTRUCTURED_API_KEY environment - variable will be used. - api_server: The API server URL to use for the Unstructured API. If not specified, the - UNSTRUCTURED_SERVER_URL environment variable will be used. - use_api: Whether to use the Unstructured API. If False, the provider will only use the local processing. - llm: llm to use - """ - super().__init__(partition_kwargs, chunking_kwargs, api_key, api_server, use_api) - self._llm = llm - - async def _chunk_and_convert( - self, elements: list[UnstructuredElement], document_meta: DocumentMeta, document_path: Path - ) -> list[Element]: - image_elements = [e for e in elements if e.category == ElementType.IMAGE] - other_elements = [e for e in elements if e.category != ElementType.IMAGE] - chunked_other_elements = chunk_elements(other_elements, **self.chunking_kwargs) - - text_elements: list[Element] = [to_text_element(element, document_meta) for element in chunked_other_elements] - if self.ignore_images: - return text_elements - return text_elements + [ - await self._to_image_element(element, document_meta, document_path) for element in image_elements - ] - - async def _to_image_element( - self, element: UnstructuredElement, document_meta: DocumentMeta, document_path: Path - ) -> ImageElement: - top_x, top_y, bottom_x, bottom_y = extract_image_coordinates(element) - image = self._load_document_as_image(document_path) - top_x, top_y, bottom_x, bottom_y = self._convert_coordinates( - top_x, top_y, bottom_x, bottom_y, image.width, image.height, element - ) - - img_bytes = crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) - return ImageElement( - ocr_extracted_text=element.text, - image_bytes=img_bytes, - document_meta=document_meta, - ) - - @staticmethod - def _load_document_as_image( - document_path: Path, - page: int | None = None, # pylint: disable=unused-argument - ) -> Image.Image: - return Image.open(document_path).convert("RGB") - - @staticmethod - def _convert_coordinates( - # pylint: disable=unused-argument - top_x: float, - top_y: float, - bottom_x: float, - bottom_y: float, - image_width: int, - image_height: int, - element: UnstructuredElement, - ) -> tuple[float, float, float, float]: - return top_x, top_y, bottom_x, bottom_y diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py deleted file mode 100644 index 8009809c7..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/pdf.py +++ /dev/null @@ -1,43 +0,0 @@ -from pathlib import Path - -from pdf2image import convert_from_path -from PIL import Image -from unstructured.documents.coordinates import CoordinateSystem, Orientation -from unstructured.documents.elements import Element as UnstructuredElement - -from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider - - -class UnstructuredPdfProvider(UnstructuredImageProvider): - """ - A specialized provider that handles pdfs using the Unstructured - """ - - SUPPORTED_DOCUMENT_TYPES = { - DocumentType.PDF, - } - - @staticmethod - def _load_document_as_image(document_path: Path, page: int | None = None) -> Image.Image: - return convert_from_path(document_path, first_page=page, last_page=page)[0] # type: ignore - - @staticmethod - def _convert_coordinates( - top_x: float, - top_y: float, - bottom_x: float, - bottom_y: float, - image_width: int, - image_height: int, - element: UnstructuredElement, - ) -> tuple[float, float, float, float]: - new_system = CoordinateSystem(image_width, image_height) - new_system.orientation = Orientation.SCREEN - new_top_x, new_top_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( # type: ignore - new_system, top_x, top_y - ) - new_bottom_x, new_bottom_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( # type: ignore - new_system, bottom_x, bottom_y - ) - return new_top_x, new_top_y, new_bottom_x, new_bottom_y diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py deleted file mode 100644 index cdd00b215..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured/utils.py +++ /dev/null @@ -1,123 +0,0 @@ -import io -import os -import warnings as wrngs - -from PIL import Image -from unstructured.documents.elements import Element as UnstructuredElement - -from ragbits.core.llms.base import LLM -from ragbits.core.prompt.base import BasePrompt -from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.documents.element import ElementLocation, TextElement - - -def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) -> TextElement: - """ - Converts unstructured element to ragbits text element - - Args: - element: element from unstructured - document_meta: metadata of the document - - Returns: - text element - """ - location = to_element_location(element) - return TextElement( - document_meta=document_meta, - content=element.text, - location=location, - ) - - -def to_element_location(element: UnstructuredElement) -> ElementLocation: - """ - Converts unstructured element to element location. - - Args: - element: element from unstructured - - Returns: - element location - """ - metadata = element.metadata.to_dict() - page_number = metadata.get("page_number") - coordinates = metadata.get("coordinates") - return ElementLocation( - page_number=page_number, - coordinates=coordinates, - ) - - -def check_required_argument(value: str | None, arg_name: str, fallback_env: str) -> str: - """ - Checks if given environment variable is set and returns it or raises an error - - Args: - arg_name: name of the variable - value: optional default value - fallback_env: name of the environment variable to get - - Raises: - ValueError: if environment variable is not set - - Returns: - environment variable value - """ - if value is not None: - return value - if (env_value := os.getenv(fallback_env)) is None: - raise ValueError(f"Either pass {arg_name} argument or set the {fallback_env} environment variable") - return env_value - - -def extract_image_coordinates(element: UnstructuredElement) -> tuple[float, float, float, float]: - """ - Extracts image coordinates from unstructured element - Args: - element: element from unstructured - Returns: - x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner - """ - p1, p2, p3, p4 = element.metadata.coordinates.points # type: ignore - return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) - - -def crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: - """ - Crops the image and converts to bytes - Args: - image: PIL image - x0: x of top left corner - y0: y of top left corner - x1: x of bottom right corner - y1: y of bottom right corner - Returns: - bytes of the cropped image - """ - image = image.crop((x0, y0, x1, y1)) - buffered = io.BytesIO() - image.save(buffered, format="JPEG") - return buffered.getvalue() - - -class ImageDescriber: - """ - Describes images content using an LLM - """ - - def __init__(self, llm: LLM): - self.llm = llm - - async def get_image_description(self, prompt: BasePrompt) -> str: - """ - Provides summary of the image passed with prompt - - Args: - prompt: BasePrompt an instance of a prompt - Returns: - summary of the image - """ - if not prompt.list_images(): - wrngs.warn(message="Image data not provided", category=UserWarning) - return await self.llm.generate(prompt=prompt) diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index c3ec8fe34..5e3eeef0a 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -4,11 +4,11 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter -from ragbits.document_search.ingestion.parsers.unstructured.default import ( +from ragbits.document_search.ingestion.parsers.unstructured import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, UNSTRUCTURED_SERVER_URL_ENV, - UnstructuredDefaultProvider, + UnstructuredDocumentParser, ) from ..helpers import env_vars_not_set @@ -18,9 +18,9 @@ "parsers", [ {}, - pytest.param({DocumentType.TXT: UnstructuredDefaultProvider()}), + pytest.param({DocumentType.TXT: UnstructuredDocumentParser()}), pytest.param( - {DocumentType.TXT: UnstructuredDefaultProvider(use_api=True)}, + {DocumentType.TXT: UnstructuredDocumentParser(use_api=True)}, marks=pytest.mark.skipif( env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), reason="Unstructured API environment variables not set", @@ -34,7 +34,7 @@ async def test_parser_router_processes_text_document_with_unstructured_provider( elements = await parser_router.get(document_meta).parse(document_meta) - assert isinstance(parser_router._parsers[DocumentType.TXT], UnstructuredDefaultProvider) + assert isinstance(parser_router._parsers[DocumentType.TXT], UnstructuredDocumentParser) assert len(elements) == 1 assert elements[0].content == "Name of Peppa's brother is George." # type: ignore @@ -87,7 +87,7 @@ async def test_parser_router_processes_image_document_with_unstructured_provider ) async def test_unstructured_provider_document_with_default_partition_kwargs(use_api: bool): document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - unstructured_provider = UnstructuredDefaultProvider(use_api=use_api) + unstructured_provider = UnstructuredDocumentParser(use_api=use_api) elements = await unstructured_provider.parse(document_meta) assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS @@ -111,7 +111,7 @@ async def test_unstructured_provider_document_with_default_partition_kwargs(use_ async def test_unstructured_provider_document_with_custom_partition_kwargs(use_api: bool): document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") partition_kwargs = {"languages": ["pl"], "strategy": "fast"} - unstructured_provider = UnstructuredDefaultProvider(use_api=use_api, partition_kwargs=partition_kwargs) + unstructured_provider = UnstructuredDocumentParser(use_api=use_api, partition_kwargs=partition_kwargs) elements = await unstructured_provider.parse(document_meta) assert unstructured_provider.partition_kwargs == partition_kwargs diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index ad56e67e8..0357193e1 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -7,37 +7,25 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.base import DocumentParser, DocumentTypeNotSupportedError from ragbits.document_search.ingestion.parsers.dummy import DummyProvider -from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDefaultProvider -from ragbits.document_search.ingestion.parsers.unstructured.images import UnstructuredImageProvider -from ragbits.document_search.ingestion.parsers.unstructured.pdf import UnstructuredPdfProvider +from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser -@pytest.mark.parametrize("document_type", UnstructuredDefaultProvider.SUPPORTED_DOCUMENT_TYPES) +@pytest.mark.parametrize("document_type", UnstructuredDocumentParser.supported_document_types) def test_unsupported_provider_validates_supported_document_types_passes(document_type: DocumentType): - UnstructuredDefaultProvider().validate_document_type(document_type) - - -@pytest.mark.parametrize("document_type", UnstructuredPdfProvider.SUPPORTED_DOCUMENT_TYPES) -def test_unsupported_pdf_provider_validates_supported_document_types_passes(document_type: DocumentType): - UnstructuredPdfProvider().validate_document_type(document_type) - - -@pytest.mark.parametrize("document_type", UnstructuredImageProvider.SUPPORTED_DOCUMENT_TYPES) -def test_unsupported_images_provider_validates_supported_document_types_passes(document_type: DocumentType): - UnstructuredImageProvider().validate_document_type(document_type) + UnstructuredDocumentParser().validate_document_type(document_type) def test_unsupported_provider_validates_supported_document_types_fails(): with pytest.raises(DocumentTypeNotSupportedError) as err: - UnstructuredDefaultProvider().validate_document_type(DocumentType.UNKNOWN) + UnstructuredDocumentParser().validate_document_type(DocumentType.UNKNOWN) - assert "Document type unknown is not supported by the UnstructuredDefaultProvider" in str(err.value) + assert "Document type unknown is not supported by the UnstructuredDocumentParser" in str(err.value) @patch.dict(os.environ, {}, clear=True) async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): with pytest.raises(ValueError) as err: - await UnstructuredDefaultProvider(use_api=True).parse( + await UnstructuredDocumentParser(use_api=True).parse( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) @@ -47,7 +35,7 @@ async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): @patch.dict(os.environ, {}, clear=True) async def test_unstructured_provider_raises_value_error_when_server_url_not_set(): with pytest.raises(ValueError) as err: - await UnstructuredDefaultProvider(api_key="api_key", use_api=True).parse( + await UnstructuredDocumentParser(api_key="api_key", use_api=True).parse( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) From da82c37a1e6c3153bc7a01abd961080b00f29bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Fri, 21 Mar 2025 22:27:00 +0100 Subject: [PATCH 16/31] add better typing + clean up code for unstructured parser --- .../ingestion/parsers/unstructured.py | 309 +++++++++--------- .../tests/integration/test_unstructured.py | 6 - 2 files changed, 146 insertions(+), 169 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py index 106609d7b..848c99ae1 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -1,32 +1,22 @@ -import io +import base64 import os from io import BytesIO -from pathlib import Path -from pdf2image import convert_from_path from PIL import Image from unstructured.chunking.basic import chunk_elements from unstructured.documents.elements import Element as UnstructuredElement -from unstructured.documents.elements import ElementType +from unstructured.documents.elements import ElementType, Points from unstructured.partition.auto import partition from unstructured.staging.base import elements_from_dicts from unstructured_client import UnstructuredClient +from unstructured_client.models.operations import PartitionRequestTypedDict +from unstructured_client.models.shared import FilesTypedDict, PartitionParametersTypedDict, Strategy from ragbits.core.audit import traceable -from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.document import Document, DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element, ElementLocation, ImageElement, TextElement from ragbits.document_search.ingestion.parsers.base import DocumentParser -DEFAULT_PARTITION_KWARGS: dict = { - "strategy": "hi_res", - "languages": ["eng"], - "split_pdf_page": True, - "split_pdf_allow_failed": True, - "split_pdf_concurrency_level": 15, -} - -DEFAULT_CHUNKING_KWARGS: dict = {} - UNSTRUCTURED_API_KEY_ENV = "UNSTRUCTURED_API_KEY" UNSTRUCTURED_SERVER_URL_ENV = "UNSTRUCTURED_SERVER_URL" @@ -69,7 +59,8 @@ def __init__( use_api: bool = False, ignore_images: bool = False, ) -> None: - """Initialize the UnstructuredDocumentParser. + """ + Initialize the UnstructuredDocumentParser instance. Args: partition_kwargs: The additional arguments for the partitioning. Refer to the Unstructured API documentation @@ -82,193 +73,185 @@ def __init__( use_api: whether to use Unstructured API, otherwise use local version of Unstructured library ignore_images: if True images will be skipped """ - self.partition_kwargs = partition_kwargs or DEFAULT_PARTITION_KWARGS - self.chunking_kwargs = chunking_kwargs or DEFAULT_CHUNKING_KWARGS - self.api_key = api_key - self.api_server = api_server + self.partition_kwargs = partition_kwargs or {} + self.chunking_kwargs = chunking_kwargs or {} + self.api_key = api_key or os.getenv(UNSTRUCTURED_API_KEY_ENV) + self.api_server = api_server or os.getenv(UNSTRUCTURED_SERVER_URL_ENV) self.use_api = use_api - self._client: UnstructuredClient | None = None self.ignore_images = ignore_images - @property - def client(self) -> UnstructuredClient: - """ - Get the UnstructuredClient instance. If the client is not initialized, it will be created. - - Returns: - The UnstructuredClient instance. - - Raises: - ValueError: If the UNSTRUCTURED_API_KEY_ENV environment variable is not set. - ValueError: If the UNSTRUCTURED_SERVER_URL_ENV environment variable is not set. - """ - if self._client is not None: - return self._client - api_key = check_required_argument(arg_name="api_key", value=self.api_key, fallback_env=UNSTRUCTURED_API_KEY_ENV) - api_server = check_required_argument( - arg_name="api_server", value=self.api_server, fallback_env=UNSTRUCTURED_SERVER_URL_ENV + self._client = UnstructuredClient( + api_key_auth=self.api_key, + server_url=self.api_server, ) - self._client = UnstructuredClient(api_key_auth=api_key, server_url=api_server) - return self._client @traceable async def parse(self, document_meta: DocumentMeta) -> list[Element]: """ - Process the document using the Unstructured API. + Parse the document using the Unstructured API. Args: - document_meta: The document to process. + document_meta: The document to parse. Returns: The list of elements extracted from the document. Raises: DocumentTypeNotSupportedError: If the document type is not supported. - """ self.validate_document_type(document_meta.document_type) document = await document_meta.fetch() + elements = await self._partition(document) + return self._chunk(elements, document) + + async def _partition(self, document: Document) -> list[UnstructuredElement]: + """ + Partition the document. + + Args: + document: The document to parse. + + Returns: + The list of extracted elements. + """ if self.use_api: - res = await self.client.general.partition_async( - request={ - "partition_parameters": { - "files": { - "content": document.local_path.read_bytes(), - "file_name": document.local_path.name, - }, - "coordinates": True, - **self.partition_kwargs, - } - } - ) - elements = elements_from_dicts(res.elements) # type: ignore - else: - elements = partition( - file=BytesIO(document.local_path.read_bytes()), - metadata_filename=document.local_path.name, - **self.partition_kwargs, + request = PartitionRequestTypedDict( + partition_parameters=PartitionParametersTypedDict( + files=FilesTypedDict( + content=document.local_path.read_bytes(), + file_name=document.local_path.name, + ), + coordinates=True, + strategy=Strategy.HI_RES, + languages=["eng"], + extract_image_block_types=["Image", "Table"], + split_pdf_allow_failed=True, + split_pdf_concurrency_level=15, + split_pdf_page=True, + include_orig_elements=True, + ), ) - return await self._chunk_and_convert(elements, document_meta, document.local_path) - - async def _chunk_and_convert( - self, elements: list[UnstructuredElement], document_meta: DocumentMeta, document_path: Path - ) -> list[Element]: - image_elements = [e for e in elements if e.category == ElementType.IMAGE] - other_elements = [e for e in elements if e.category != ElementType.IMAGE] - chunked_other_elements = chunk_elements(other_elements, **self.chunking_kwargs) + request["partition_parameters"].update(**self.partition_kwargs) # type: ignore + response = await self._client.general.partition_async(request=request) + return elements_from_dicts(response.elements) if response.elements else [] + + return partition( + filename=str(document.local_path), + metadata_filename=document.local_path.name, + extract_image_block_types=["Image", "Table"], + extract_image_block_to_payload=True, + include_orig_elements=True, + **self.partition_kwargs, + ) - text_elements: list[Element] = [to_text_element(element, document_meta) for element in chunked_other_elements] - if self.ignore_images: - return text_elements - return text_elements + [ - await _to_image_element(element, document_meta, document_path) for element in image_elements - ] + def _chunk(self, elements: list[UnstructuredElement], document: Document) -> list[Element]: + """ + Chunk the list of elements. + Args: + elements: The list of unstructured elements. + document: The document to parse. -async def _to_image_element( - element: UnstructuredElement, document_meta: DocumentMeta, document_path: Path -) -> ImageElement: - top_x, top_y, bottom_x, bottom_y = extract_image_coordinates(element) + Returns: + The list of chunked elements. + """ + nonimage_elements = [element for element in elements if element.category != ElementType.IMAGE] - image = ( - convert_from_path(document_path)[0] - if document_meta.document_type == DocumentType.PDF - else Image.open(document_path).convert("RGB") - ) - img_bytes = crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) - return ImageElement( - ocr_extracted_text=element.text, - image_bytes=img_bytes, - document_meta=document_meta, - ) + text_elements: list[Element] = [ + TextElement( + document_meta=document.metadata, + location=self._extract_element_location(element), + content=element.text, + ) + for element in chunk_elements(nonimage_elements, **self.chunking_kwargs) + ] + if self.ignore_images: + return text_elements -def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) -> TextElement: - """ - Converts unstructured element to ragbits text element + return text_elements + [ + ImageElement( + document_meta=document.metadata, + location=self._extract_element_location(element), + image_bytes=self._extract_image_bytes(element, document), + ocr_extracted_text=element.text, + ) + for element in elements + if element.category == ElementType.IMAGE + ] - Args: - element: element from unstructured - document_meta: metadata of the document + @staticmethod + def _extract_element_location(element: UnstructuredElement) -> ElementLocation: + """ + Convert unstructured element to element location. - Returns: - text element - """ - location = to_element_location(element) - return TextElement( - document_meta=document_meta, - content=element.text, - location=location, - ) + Args: + element: The element from unstructured. + Returns: + The element location. + """ + metadata = element.metadata.to_dict() + return ElementLocation( + page_number=metadata.get("page_number"), + coordinates=metadata.get("coordinates"), + ) -def to_element_location(element: UnstructuredElement) -> ElementLocation: - """ - Converts unstructured element to element location. + def _extract_image_bytes(self, element: UnstructuredElement, document: Document) -> bytes: + """ + Extract image data using alternative methods when element.metadata.image_base64 is empty. - Args: - element: element from unstructured + This handles cases where the Unstructured doesn't properly extract image data, + requiring additional processing. - Returns: - element location - """ - metadata = element.metadata.to_dict() - page_number = metadata.get("page_number") - coordinates = metadata.get("coordinates") - return ElementLocation( - page_number=page_number, - coordinates=coordinates, - ) + Args: + element: The Unstructured image element. + document: The Document to parse. + Return: + The raw image data. + """ + if element.metadata.image_base64: + return base64.b64decode(element.metadata.image_base64) -def check_required_argument(value: str | None, arg_name: str, fallback_env: str) -> str: - """ - Checks if given environment variable is set and returns it or raises an error + elif element.metadata.coordinates and element.metadata.coordinates.points: + image = Image.open(document.local_path).convert("RGB") + top_x, top_y, bottom_x, bottom_y = self._extract_image_coordinates(element.metadata.coordinates.points) + return self._crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) - Args: - arg_name: name of the variable - value: optional default value - fallback_env: name of the environment variable to get + return b"" - Raises: - ValueError: if environment variable is not set + @staticmethod + def _extract_image_coordinates(points: Points) -> tuple[float, float, float, float]: + """ + Extract image coordinates from unstructured element points. - Returns: - environment variable value - """ - if value is not None: - return value - if (env_value := os.getenv(fallback_env)) is None: - raise ValueError(f"Either pass {arg_name} argument or set the {fallback_env} environment variable") - return env_value + Args: + points: The Unstructured element points. + Returns: + x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner. + """ + p1, p2, p3, p4 = points + return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) -def extract_image_coordinates(element: UnstructuredElement) -> tuple[float, float, float, float]: - """ - Extracts image coordinates from unstructured element - Args: - element: element from unstructured - Returns: - x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner - """ - p1, p2, p3, p4 = element.metadata.coordinates.points # type: ignore - return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) + @staticmethod + def _crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: + """ + Crop the image and converts to bytes. + Args: + image: The image to crop. + x0: x of top left corner. + y0: y of top left corner. + x1: x of bottom right corner. + y1: y of bottom right corner. -def crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: - """ - Crops the image and converts to bytes - Args: - image: PIL image - x0: x of top left corner - y0: y of top left corner - x1: x of bottom right corner - y1: y of bottom right corner - Returns: - bytes of the cropped image - """ - image = image.crop((x0, y0, x1, y1)) - buffered = io.BytesIO() - image.save(buffered, format="JPEG") - return buffered.getvalue() + Returns: + The bytes of the cropped image. + """ + image = image.crop((x0, y0, x1, y1)) + buffered = BytesIO() + image.save(buffered, format="JPEG") + return buffered.getvalue() diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index 5e3eeef0a..3d9f19f28 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -5,7 +5,6 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.parsers.unstructured import ( - DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, UNSTRUCTURED_SERVER_URL_ENV, UnstructuredDocumentParser, @@ -57,10 +56,6 @@ async def test_parser_router_processes_md_document_with_unstructured_provider(): env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), reason="Unstructured API environment variables not set", ) -@pytest.mark.skipif( - env_vars_not_set(["OPENAI_API_KEY"]), - reason="OpenAI API environment variables not set", -) @pytest.mark.parametrize("file_name", ["transformers_paper_page.pdf", "transformers_paper_page.png"]) async def test_parser_router_processes_image_document_with_unstructured_provider(file_name: str): parser_router = DocumentParserRouter() @@ -90,7 +85,6 @@ async def test_unstructured_provider_document_with_default_partition_kwargs(use_ unstructured_provider = UnstructuredDocumentParser(use_api=use_api) elements = await unstructured_provider.parse(document_meta) - assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS assert len(elements) == 1 assert elements[0].content == "Name of Peppa's brother is George." # type: ignore From 2843cf4f9c20fdba714913af9ca1ad8f401a99d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sat, 22 Mar 2025 01:15:56 +0100 Subject: [PATCH 17/31] remove redundant helper methods from parser --- .../ingestion/parsers/unstructured.py | 62 +++++-------------- 1 file changed, 16 insertions(+), 46 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py index 848c99ae1..a747f6ea2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -5,7 +5,7 @@ from PIL import Image from unstructured.chunking.basic import chunk_elements from unstructured.documents.elements import Element as UnstructuredElement -from unstructured.documents.elements import ElementType, Points +from unstructured.documents.elements import ElementType from unstructured.partition.auto import partition from unstructured.staging.base import elements_from_dicts from unstructured_client import UnstructuredClient @@ -79,11 +79,7 @@ def __init__( self.api_server = api_server or os.getenv(UNSTRUCTURED_SERVER_URL_ENV) self.use_api = use_api self.ignore_images = ignore_images - - self._client = UnstructuredClient( - api_key_auth=self.api_key, - server_url=self.api_server, - ) + self._client = UnstructuredClient(api_key_auth=self.api_key, server_url=self.api_server) @traceable async def parse(self, document_meta: DocumentMeta) -> list[Element]: @@ -174,7 +170,7 @@ def _chunk(self, elements: list[UnstructuredElement], document: Document) -> lis ImageElement( document_meta=document.metadata, location=self._extract_element_location(element), - image_bytes=self._extract_image_bytes(element, document), + image_bytes=self._extract_image_element_bytes(element, document), ocr_extracted_text=element.text, ) for element in elements @@ -198,7 +194,8 @@ def _extract_element_location(element: UnstructuredElement) -> ElementLocation: coordinates=metadata.get("coordinates"), ) - def _extract_image_bytes(self, element: UnstructuredElement, document: Document) -> bytes: + @staticmethod + def _extract_image_element_bytes(element: UnstructuredElement, document: Document) -> bytes: """ Extract image data using alternative methods when element.metadata.image_base64 is empty. @@ -215,43 +212,16 @@ def _extract_image_bytes(self, element: UnstructuredElement, document: Document) if element.metadata.image_base64: return base64.b64decode(element.metadata.image_base64) - elif element.metadata.coordinates and element.metadata.coordinates.points: - image = Image.open(document.local_path).convert("RGB") - top_x, top_y, bottom_x, bottom_y = self._extract_image_coordinates(element.metadata.coordinates.points) - return self._crop_and_convert_to_bytes(image, top_x, top_y, bottom_x, bottom_y) + if element.metadata.coordinates and element.metadata.coordinates.points: + buffered = BytesIO() + Image.open(document.local_path).convert("RGB").crop( + ( + min(element.metadata.coordinates.points[0][0], element.metadata.coordinates.points[1][0]), + min(element.metadata.coordinates.points[0][1], element.metadata.coordinates.points[3][1]), + max(element.metadata.coordinates.points[2][0], element.metadata.coordinates.points[3][0]), + max(element.metadata.coordinates.points[1][1], element.metadata.coordinates.points[2][1]), + ) + ).save(buffered, format="JPEG") + return buffered.getvalue() return b"" - - @staticmethod - def _extract_image_coordinates(points: Points) -> tuple[float, float, float, float]: - """ - Extract image coordinates from unstructured element points. - - Args: - points: The Unstructured element points. - - Returns: - x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner. - """ - p1, p2, p3, p4 = points - return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) - - @staticmethod - def _crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: - """ - Crop the image and converts to bytes. - - Args: - image: The image to crop. - x0: x of top left corner. - y0: y of top left corner. - x1: x of bottom right corner. - y1: y of bottom right corner. - - Returns: - The bytes of the cropped image. - """ - image = image.crop((x0, y0, x1, y1)) - buffered = BytesIO() - image.save(buffered, format="JPEG") - return buffered.getvalue() From 94da0088d2b509cd53a199f02ba21a7de0feeadc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sat, 22 Mar 2025 01:30:10 +0100 Subject: [PATCH 18/31] fix examples --- docs/how-to/document_search/search_documents.md | 2 +- examples/document-search/configurable.py | 2 +- .../advanced/config/experiments/chunking-1000.yaml | 2 +- .../advanced/config/experiments/chunking-250.yaml | 2 +- .../advanced/config/experiments/chunking-500.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 90523fbcb..4453f4db4 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -112,7 +112,7 @@ There is an additional functionality of [`DocumentSearch`][ragbits.document_sear config = { "vector_store": {...}, "reranker": {...}, - "providers": {...}, + "parsers": {...}, "rephraser": {...}, } diff --git a/examples/document-search/configurable.py b/examples/document-search/configurable.py index 542e863c1..e934f3089 100644 --- a/examples/document-search/configurable.py +++ b/examples/document-search/configurable.py @@ -90,7 +90,7 @@ class to rephrase the query. }, }, }, - "providers": {"txt": {"type": "DummyProvider"}}, + "parsers": {"txt": {"type": "DummyProvider"}}, "rephraser": { "type": "LLMQueryRephraser", "config": { diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml index 8a47f0fb0..82fe67a09 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - providers: + parsers: txt: config: chunking_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml index c01e26dfd..7276e2593 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - providers: + parsers: txt: config: chunking_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml index abb95ccb8..2902f3ce5 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - providers: + parsers: txt: config: chunking_kwargs: From 8b7c8753eb2b81eeed805708ba485d3e3ed63ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sat, 22 Mar 2025 15:17:36 +0100 Subject: [PATCH 19/31] fix tests --- .gitignore | 2 + .../document_search/processing.md | 2 +- examples/document-search/configurable.py | 2 +- examples/document-search/distributed copy.py | 261 ++++++++++++++++++ examples/document-search/multimodal_basic.py | 4 +- examples/document-search/multimodal_chroma.py | 4 +- examples/document-search/multimodal_qdrant.py | 4 +- .../ingestion/enrichers/image.py | 2 +- .../ingestion/enrichers/router.py | 6 - .../ingestion/parsers/__init__.py | 5 +- .../document_search/ingestion/parsers/base.py | 78 ++++-- .../ingestion/parsers/dummy.py | 65 ----- .../ingestion/parsers/exceptions.py | 29 ++ .../ingestion/parsers/router.py | 11 +- .../ingestion/parsers/unstructured.py | 12 +- .../ingestion/strategies/base.py | 9 +- .../img}/transformers_paper_page.png | Bin .../{integration => assets/md}/test_file.md | 0 .../pdf}/transformers_paper_page.pdf | Bin .../tests/integration/test_unstructured.py | 115 +++----- .../tests/unit/test_document_parser_router.py | 8 +- .../tests/unit/test_document_parsers.py | 53 ++-- .../tests/unit/test_document_search.py | 12 +- .../tests/unit/test_ingest_strategies.py | 4 +- 24 files changed, 441 insertions(+), 247 deletions(-) create mode 100644 examples/document-search/distributed copy.py delete mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py create mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py rename packages/ragbits-document-search/tests/{integration => assets/img}/transformers_paper_page.png (100%) rename packages/ragbits-document-search/tests/{integration => assets/md}/test_file.md (100%) rename packages/ragbits-document-search/tests/{integration => assets/pdf}/transformers_paper_page.pdf (100%) diff --git a/.gitignore b/.gitignore index 088ec593e..69de54164 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,5 @@ chroma/ qdrant/ .aider* + +.DS_Store diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md index 399c3a5db..0286911ed 100644 --- a/docs/api_reference/document_search/processing.md +++ b/docs/api_reference/document_search/processing.md @@ -7,7 +7,7 @@ options: heading_level: 3 -::: ragbits.document_search.ingestion.parsers.dummy.DummyProvider +::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser options: heading_level: 3 diff --git a/examples/document-search/configurable.py b/examples/document-search/configurable.py index e934f3089..3f7881937 100644 --- a/examples/document-search/configurable.py +++ b/examples/document-search/configurable.py @@ -90,7 +90,7 @@ class to rephrase the query. }, }, }, - "parsers": {"txt": {"type": "DummyProvider"}}, + "parsers": {"txt": {"type": "TextDocumentParser"}}, "rephraser": { "type": "LLMQueryRephraser", "config": { diff --git a/examples/document-search/distributed copy.py b/examples/document-search/distributed copy.py new file mode 100644 index 000000000..c82f9426e --- /dev/null +++ b/examples/document-search/distributed copy.py @@ -0,0 +1,261 @@ +""" +Ragbits Document Search Example: Distributed Ingest + +This example is based on the "Basic" example, but it demonstrates how to ingest documents in a distributed manner. +The distributed ingestion is provided by "RayDistributedIngestStrategy" which uses Ray to parallelize the ingestion process. + +The script performs the following steps: + + 1. Create a list of documents. + 2. Initialize the `LiteLLMEmbedder` class with the OpenAI `text-embedding-3-small` embedding model. + 3. Initialize the `InMemoryVectorStore` class. + 4. Initialize the `DocumentSearch` class with the embedder and the vector store. + 5. Ingest the documents into the `DocumentSearch` instance in a distributed manner. + 6. Search for documents using a query. + 7. Print the search results. + + + docker run -p 6333:6333 qdrant/qdrant + +To run the script, execute the following command: + + ```bash + uv run examples/document-search/distributed.py + ``` +""" + +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "ragbits-document-search[ray]", +# "ragbits-core", +# ] +# /// + +import asyncio + +from qdrant_client import AsyncQdrantClient + +from ragbits.core import audit +from ragbits.core.embeddings.litellm import LiteLLMEmbedder +from ragbits.core.vector_stores.qdrant import QdrantVectorStore +from ragbits.document_search import DocumentSearch +from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.element import Element, TextElement +from ragbits.document_search.ingestion.parsers.base import DocumentParser +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter +from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy + +# ray.init() + +documents = [ + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + RIP boiled water. You will be mist. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why doesn't James Bond fart in bed? Because it would blow his cover. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + Why programmers don't like to swim? Because they're scared of the floating points. + """ + ), + DocumentMeta.create_text_document_from_literal( + """ + This one is completely unrelated. + """ + ), +] + + +class Test(DocumentParser): + """ + A provider handling html files from Hexagon Community forum. + """ + + @audit.traceable + async def parse(self, document_meta: DocumentMeta) -> list[Element]: + """ + Processes the Hexagon Community post. + + Args: + document_meta: The document to process. + + Returns: + The list of elements extracted from the document. + + Raises: + DocumentTypeNotSupportedError: If the document type is not supported. + """ + document = await document_meta.fetch() + content = document.local_path.read_text() + # await asyncio.sleep(random.choice([1, 2, 3])) + # if random.choice([True, False]): + # raise ValueError("Dupa") + return [TextElement(content=content, document_meta=document_meta) for _ in range(10)] + # return [ImageElement(image_bytes=b"addad", ocr_extracted_text="xd", document_meta=document_meta) for _ in range(10)] + + +async def main() -> None: + """ + Run the example. + """ + embedder = LiteLLMEmbedder( + model="text-embedding-3-small", + ) + vector_store = QdrantVectorStore( + embedder=embedder, + client=AsyncQdrantClient( + host="localhost", + port=6333, + ), + index_name="jokes", + ) + ingest_strategy = RayDistributedIngestStrategy(3) + # ingest_strategy = BatchedIngestStrategy(3) + parser_router = DocumentParserRouter( + { + # Change to JSON after ragbits bump: + DocumentType.TXT: Test(), + } + ) + document_search = DocumentSearch( + vector_store=vector_store, + parser_router=parser_router, + ingest_strategy=ingest_strategy, + ) + import rich + + # audit.set_trace_handlers("cli") + rich.print(await document_search.ingest(documents)) + + # results = await document_search.search("I'm boiling my water and I need a joke") + # print(results) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/document-search/multimodal_basic.py b/examples/document-search/multimodal_basic.py index 6f00d1fcd..4cea5b4ea 100644 --- a/examples/document-search/multimodal_basic.py +++ b/examples/document-search/multimodal_basic.py @@ -38,7 +38,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.base import ImageDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -59,7 +59,7 @@ async def main() -> None: embedder = VertexAIMultimodelEmbedder() vector_store = InMemoryVectorStore(embedder=embedder) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: ImageDocumentParser()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_chroma.py b/examples/document-search/multimodal_chroma.py index eac4b579a..c732c2eb9 100644 --- a/examples/document-search/multimodal_chroma.py +++ b/examples/document-search/multimodal_chroma.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.base import ImageDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: ImageDocumentParser()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/examples/document-search/multimodal_qdrant.py b/examples/document-search/multimodal_qdrant.py index c9b4fda0e..780e539f8 100644 --- a/examples/document-search/multimodal_qdrant.py +++ b/examples/document-search/multimodal_qdrant.py @@ -41,7 +41,7 @@ from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.ingestion.parsers.dummy import DummyImageProvider +from ragbits.document_search.ingestion.parsers.base import ImageDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter IMAGES_PATH = Path(__file__).parent / "images" @@ -66,7 +66,7 @@ async def main() -> None: embedder=embedder, ) # For this example, we want to skip OCR and make sure that we test direct image embeddings. - parser_router = DocumentParserRouter({DocumentType.JPG: DummyImageProvider()}) + parser_router = DocumentParserRouter({DocumentType.JPG: ImageDocumentParser()}) document_search = DocumentSearch( vector_store=vector_store, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index bd38be1da..274a095d2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -74,8 +74,8 @@ async def _process_single(self, element: ImageElement) -> ImageElement: return ImageElement( document_meta=element.document_meta, description=response, - ocr_extracted_text=element.ocr_extracted_text, image_bytes=element.image_bytes, + ocr_extracted_text=element.ocr_extracted_text, ) @classmethod diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index ec531be21..38d0bd414 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -32,12 +32,6 @@ def __init__( Args: enrichers: The mapping of element types and their enrichers. To override default enrichers. - - Example: - { - ImageElement: ImageElementEnricher(), - CustomTextElement: TextIntermediateHandler(), - } """ self._enrichers = {**_DEFAULT_ENRICHERS, **enrichers} if enrichers else _DEFAULT_ENRICHERS diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py index c0ecc00cc..1782e7dbf 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py @@ -1,4 +1,3 @@ -from .base import DocumentParser -from .dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.base import DocumentParser, ImageDocumentParser, TextDocumentParser -__all__ = ["DocumentParser", "DummyProvider"] +__all__ = ["DocumentParser", "ImageDocumentParser", "TextDocumentParser"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index 7b175ce5a..e8a529088 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -2,19 +2,10 @@ from typing import ClassVar from ragbits.core.utils.config_handling import WithConstructionConfig -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element +from ragbits.document_search.documents.document import Document, DocumentType +from ragbits.document_search.documents.element import Element, ImageElement, TextElement from ragbits.document_search.ingestion import parsers - - -class DocumentTypeNotSupportedError(Exception): - """ - Raised when the document type is not supported by the provider. - """ - - def __init__(self, provider_name: str, document_type: DocumentType) -> None: - message = f"Document type {document_type.value} is not supported by the {provider_name}" - super().__init__(message) +from ragbits.document_search.ingestion.parsers.exceptions import ParserDocumentNotSupportedError class DocumentParser(WithConstructionConfig, ABC): @@ -28,26 +19,77 @@ class DocumentParser(WithConstructionConfig, ABC): supported_document_types: set[DocumentType] @abstractmethod - async def parse(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document: Document) -> list[Element]: """ Parse the document. Args: - document_meta: The document to parse. + document: The document to parse. Returns: The list of elements extracted from the document. + + Raises: + ParserError: If the parsing of the document failed. """ def validate_document_type(self, document_type: DocumentType) -> None: """ - Check if the provider supports the document type. + Check if the parser supports the document type. Args: - document_type: The document type. + document_type: The document type to validate against the parser. Raises: - DocumentTypeNotSupportedError: If the document type is not supported. + ParserDocumentNotSupportedError: If the document type is not supported. """ if document_type not in self.supported_document_types: - raise DocumentTypeNotSupportedError(provider_name=self.__class__.__name__, document_type=document_type) + raise ParserDocumentNotSupportedError(parser_name=self.__class__.__name__, document_type=document_type) + + +class TextDocumentParser(DocumentParser): + """ + Simple parser that maps a text to the text element. + """ + + supported_document_types = {DocumentType.TXT, DocumentType.MD} + + async def parse(self, document: Document) -> list[Element]: + """ + Parse the document. + + Args: + document: The document to parse. + + Returns: + List with an text element with the text content. + + Raises: + ParserDocumentNotSupportedError: If the document type is not supported by the parser. + """ + self.validate_document_type(document.metadata.document_type) + return [TextElement(content=document.local_path.read_text(), document_meta=document.metadata)] + + +class ImageDocumentParser(DocumentParser): + """ + Simple parser that maps an image to the image element. + """ + + supported_document_types = {DocumentType.JPG, DocumentType.PNG} + + async def parse(self, document: Document) -> list[Element]: + """ + Parse the document. + + Args: + document: The document to parse. + + Returns: + List with an image element with the image content. + + Raises: + ParserDocumentNotSupportedError: If the document type is not supported by the parser. + """ + self.validate_document_type(document.metadata.document_type) + return [ImageElement(image_bytes=document.local_path.read_bytes(), document_meta=document.metadata)] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py deleted file mode 100644 index a5e731670..000000000 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/dummy.py +++ /dev/null @@ -1,65 +0,0 @@ -from ragbits.document_search.documents.document import ( - DocumentMeta, - DocumentType, - TextDocument, -) -from ragbits.document_search.documents.element import Element, ImageElement, TextElement -from ragbits.document_search.ingestion.parsers.base import DocumentParser - - -class DummyProvider(DocumentParser): - """ - This is a mock provider that returns a TextElement with the content of the document. - It should be used for testing purposes only. - """ - - supported_document_types = {DocumentType.TXT, DocumentType.MD} - - async def parse(self, document_meta: DocumentMeta) -> list[Element]: - """ - Process the text document. - - Args: - document_meta: The document to process. - - Returns: - List with a single TextElement containing the content of the document. - """ - self.validate_document_type(document_meta.document_type) - - document = await document_meta.fetch() - if isinstance(document, TextDocument): - return [TextElement(content=document.content, document_meta=document_meta)] - return [] - - -class DummyImageProvider(DocumentParser): - """ - This is a simple provider that returns an ImageElement with the content of the image - and empty text metadata. - """ - - supported_document_types = {DocumentType.JPG, DocumentType.PNG} - - async def parse(self, document_meta: DocumentMeta) -> list[Element]: - """ - Process the image document. - - Args: - document_meta: The document to process. - - Returns: - List with a single ImageElement containing the content of the image. - """ - self.validate_document_type(document_meta.document_type) - - document = await document_meta.fetch() - image_path = document.local_path - with open(image_path, "rb") as f: - image_bytes = f.read() - return [ - ImageElement( - image_bytes=image_bytes, - document_meta=document_meta, - ) - ] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py new file mode 100644 index 000000000..0c06bec93 --- /dev/null +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py @@ -0,0 +1,29 @@ +from ragbits.document_search.documents.document import DocumentType + + +class ParserError(Exception): + """ + Class for all exceptions raised by the document parser and router. + """ + + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + + +class ParserNotFoundError(ParserError): + """ + Raised when no parser was found for the document type. + """ + + def __init__(self, document_type: DocumentType) -> None: + super().__init__(f"No parser found for the document type {document_type}") + + +class ParserDocumentNotSupportedError(ParserError): + """ + Raised when the document type is not supported by the parser. + """ + + def __init__(self, parser_name: str, document_type: DocumentType) -> None: + super().__init__(f"Document type {document_type.value} is not supported by the {parser_name}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 6f4e5de03..76df52b48 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -6,6 +6,7 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.base import DocumentParser +from ragbits.document_search.ingestion.parsers.exceptions import ParserNotFoundError from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser _default_parser = UnstructuredDocumentParser() @@ -49,12 +50,6 @@ def __init__(self, parsers: Mapping[DocumentType, DocumentParser] | None = None) Args: parsers: The mapping of document types and their parsers. To override default Unstructured parsers. - - Example: - { - DocumentType.PDF: CustomPDFParser(), - DocumentType.TXT: CustomTextParser(), - } """ self._parsers = {**_DEFAULT_PARSERS, **parsers} if parsers else _DEFAULT_PARSERS @@ -89,11 +84,11 @@ def get(self, document_meta: DocumentMeta) -> DocumentParser: The parser for processing the document. Raises: - ValueError: If no parser is found for the document type. + ParserNotFoundError: If no parser is found for the document type. """ parser = self._parsers.get(document_meta.document_type) if isinstance(parser, DocumentParser): return parser - raise ValueError(f"No parser found for the document type {document_meta.document_type}") + raise ParserNotFoundError(document_meta.document_type) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py index a747f6ea2..e2bd72521 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -13,7 +13,7 @@ from unstructured_client.models.shared import FilesTypedDict, PartitionParametersTypedDict, Strategy from ragbits.core.audit import traceable -from ragbits.document_search.documents.document import Document, DocumentMeta, DocumentType +from ragbits.document_search.documents.document import Document, DocumentType from ragbits.document_search.documents.element import Element, ElementLocation, ImageElement, TextElement from ragbits.document_search.ingestion.parsers.base import DocumentParser @@ -82,22 +82,20 @@ def __init__( self._client = UnstructuredClient(api_key_auth=self.api_key, server_url=self.api_server) @traceable - async def parse(self, document_meta: DocumentMeta) -> list[Element]: + async def parse(self, document: Document) -> list[Element]: """ Parse the document using the Unstructured API. Args: - document_meta: The document to parse. + document: The document to parse. Returns: The list of elements extracted from the document. Raises: - DocumentTypeNotSupportedError: If the document type is not supported. + ParserDocumentNotSupportedError: If the document type is not supported by the parser. """ - self.validate_document_type(document_meta.document_type) - document = await document_meta.fetch() - + self.validate_document_type(document.metadata.document_type) elements = await self._partition(document) return self._chunk(elements, document) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 02254da85..012a0c65b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -133,7 +133,10 @@ async def _parse_document( The list of elements. Raises: - ValueError: If no parser is found for the document type. + ParserError: If the parsing of the document failed. + ParserDocumentNotSupportedError: If the document type is not supported. + ParserNotFoundError: If no parser is found for the document type. + SourceError: If the download of the document failed. """ document_meta = ( await DocumentMeta.from_source(document) @@ -143,7 +146,9 @@ async def _parse_document( else document.metadata ) parser = parser_router.get(document_meta) - return await parser.parse(document_meta) + parser.validate_document_type(document_meta.document_type) + document = await document_meta.fetch() + return await parser.parse(document) @staticmethod async def _enrich_elements( diff --git a/packages/ragbits-document-search/tests/integration/transformers_paper_page.png b/packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png similarity index 100% rename from packages/ragbits-document-search/tests/integration/transformers_paper_page.png rename to packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png diff --git a/packages/ragbits-document-search/tests/integration/test_file.md b/packages/ragbits-document-search/tests/assets/md/test_file.md similarity index 100% rename from packages/ragbits-document-search/tests/integration/test_file.md rename to packages/ragbits-document-search/tests/assets/md/test_file.md diff --git a/packages/ragbits-document-search/tests/integration/transformers_paper_page.pdf b/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf similarity index 100% rename from packages/ragbits-document-search/tests/integration/transformers_paper_page.pdf rename to packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index 3d9f19f28..ea7371dfe 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -2,112 +2,65 @@ import pytest -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter +from ragbits.document_search.documents.document import DocumentMeta from ragbits.document_search.ingestion.parsers.unstructured import ( UNSTRUCTURED_API_KEY_ENV, UNSTRUCTURED_SERVER_URL_ENV, UnstructuredDocumentParser, ) - -from ..helpers import env_vars_not_set +from tests.helpers import env_vars_not_set @pytest.mark.parametrize( - "parsers", + "use_api", [ - {}, - pytest.param({DocumentType.TXT: UnstructuredDocumentParser()}), pytest.param( - {DocumentType.TXT: UnstructuredDocumentParser(use_api=True)}, - marks=pytest.mark.skipif( - env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), - reason="Unstructured API environment variables not set", - ), + False, + id="local", ), - ], -) -async def test_parser_router_processes_text_document_with_unstructured_provider(parsers: dict): - parser_router = DocumentParserRouter(parsers) - document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - - elements = await parser_router.get(document_meta).parse(document_meta) - - assert isinstance(parser_router._parsers[DocumentType.TXT], UnstructuredDocumentParser) - assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." # type: ignore - - -@pytest.mark.skipif( - env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), - reason="Unstructured API environment variables not set", -) -async def test_parser_router_processes_md_document_with_unstructured_provider(): - parser_router = DocumentParserRouter() - document_meta = DocumentMeta.from_local_path(Path(__file__).parent / "test_file.md") - - elements = await parser_router.get(document_meta).parse(document_meta) - - assert len(elements) == 1 - assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework." # type: ignore - - -@pytest.mark.skipif( - env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), - reason="Unstructured API environment variables not set", -) -@pytest.mark.parametrize("file_name", ["transformers_paper_page.pdf", "transformers_paper_page.png"]) -async def test_parser_router_processes_image_document_with_unstructured_provider(file_name: str): - parser_router = DocumentParserRouter() - document_meta = DocumentMeta.from_local_path(Path(__file__).parent / file_name) - - elements = await parser_router.get(document_meta).parse(document_meta) - - assert len(elements) == 7 - assert elements[-1].description != "" # type: ignore - - -@pytest.mark.parametrize( - "use_api", - [ - False, pytest.param( True, marks=pytest.mark.skipif( env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), reason="Unstructured API environment variables not set", ), + id="api", ), ], ) -async def test_unstructured_provider_document_with_default_partition_kwargs(use_api: bool): - document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - unstructured_provider = UnstructuredDocumentParser(use_api=use_api) - elements = await unstructured_provider.parse(document_meta) - - assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." # type: ignore - - @pytest.mark.parametrize( - "use_api", + ("document_metadata", "expected_num_elements"), [ - False, pytest.param( - True, - marks=pytest.mark.skipif( - env_vars_not_set([UNSTRUCTURED_SERVER_URL_ENV, UNSTRUCTURED_API_KEY_ENV]), - reason="Unstructured API environment variables not set", + DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George."), + 1, + id="TextDocument", + ), + pytest.param( + DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md"), + 1, + id="MarkdownDocument", + ), + pytest.param( + DocumentMeta.from_local_path( + Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png" ), + 7, + id="ImageDocument", + ), + pytest.param( + DocumentMeta.from_local_path( + Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf" + ), + 7, + id="PDFDocument", ), ], ) -async def test_unstructured_provider_document_with_custom_partition_kwargs(use_api: bool): - document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - partition_kwargs = {"languages": ["pl"], "strategy": "fast"} - unstructured_provider = UnstructuredDocumentParser(use_api=use_api, partition_kwargs=partition_kwargs) - elements = await unstructured_provider.parse(document_meta) +async def test_unstructured_parser(use_api: bool, document_metadata: DocumentMeta, expected_num_elements: int) -> None: + document = await document_metadata.fetch() + parser = UnstructuredDocumentParser(use_api=use_api) + + elements = await parser.parse(document) - assert unstructured_provider.partition_kwargs == partition_kwargs - assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." # type: ignore + assert len(elements) == expected_num_elements diff --git a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py index 87369be74..ea5c1677e 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -1,23 +1,23 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.base import TextDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter async def test_parser_router(): - parser_router = DocumentParserRouter({DocumentType.TXT: DummyProvider()}) + parser_router = DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") parser = parser_router.get(document_meta) - assert isinstance(parser, DummyProvider) + assert isinstance(parser, TextDocumentParser) async def test_parser_router_raises_when_no_parser_found(): parser_router = DocumentParserRouter() - parser_router._parsers = {DocumentType.TXT: DummyProvider()} + parser_router._parsers = {DocumentType.TXT: TextDocumentParser()} document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index 0357193e1..150c25b41 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -1,54 +1,35 @@ -import os -from unittest.mock import patch - import pytest from ragbits.core.utils.config_handling import ObjectContructionConfig -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.parsers.base import DocumentParser, DocumentTypeNotSupportedError -from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.documents.document import DocumentType +from ragbits.document_search.ingestion.parsers.base import ( + DocumentParser, + TextDocumentParser, +) +from ragbits.document_search.ingestion.parsers.exceptions import ParserDocumentNotSupportedError from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser @pytest.mark.parametrize("document_type", UnstructuredDocumentParser.supported_document_types) -def test_unsupported_provider_validates_supported_document_types_passes(document_type: DocumentType): +def test_parser_validates_supported_document_types_passes(document_type: DocumentType) -> None: UnstructuredDocumentParser().validate_document_type(document_type) -def test_unsupported_provider_validates_supported_document_types_fails(): - with pytest.raises(DocumentTypeNotSupportedError) as err: +def test_parser_validates_supported_document_types_fails() -> None: + with pytest.raises(ParserDocumentNotSupportedError) as err: UnstructuredDocumentParser().validate_document_type(DocumentType.UNKNOWN) - assert "Document type unknown is not supported by the UnstructuredDocumentParser" in str(err.value) -@patch.dict(os.environ, {}, clear=True) -async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): - with pytest.raises(ValueError) as err: - await UnstructuredDocumentParser(use_api=True).parse( - DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - ) - - assert str(err.value) == "Either pass api_key argument or set the UNSTRUCTURED_API_KEY environment variable" - - -@patch.dict(os.environ, {}, clear=True) -async def test_unstructured_provider_raises_value_error_when_server_url_not_set(): - with pytest.raises(ValueError) as err: - await UnstructuredDocumentParser(api_key="api_key", use_api=True).parse( - DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") - ) - - assert str(err.value) == "Either pass api_server argument or set the UNSTRUCTURED_SERVER_URL environment variable" - - -def test_subclass_from_config(): - config = ObjectContructionConfig.model_validate({"type": "ragbits.document_search.ingestion.parsers:DummyProvider"}) +def test_subclass_from_config() -> None: + config = ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.parsers:TextDocumentParser"} + ) parser = DocumentParser.subclass_from_config(config) - assert isinstance(parser, DummyProvider) + assert isinstance(parser, TextDocumentParser) -def test_subclass_from_config_default_path(): - config = ObjectContructionConfig.model_validate({"type": "DummyProvider"}) +def test_subclass_from_config_default_path() -> None: + config = ObjectContructionConfig.model_validate({"type": "TextDocumentParser"}) parser = DocumentParser.subclass_from_config(config) - assert isinstance(parser, DummyProvider) + assert isinstance(parser, TextDocumentParser) diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 8007e3d26..871385be5 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -19,7 +19,7 @@ from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import GCSSource, LocalFileSource from ragbits.document_search.ingestion.parsers import DocumentParser -from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.base import TextDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.batched import ( BatchedIngestStrategy, @@ -33,7 +33,7 @@ }, }, "reranker": {"type": "NoopReranker"}, - "parsers": {"txt": {"type": "DummyProvider"}}, + "parsers": {"txt": {"type": "TextDocumentParser"}}, "ingest_strategy": {"type": "SequentialIngestStrategy"}, } @@ -70,7 +70,7 @@ async def test_document_search_ingest_from_source(): document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}), ) with tempfile.NamedTemporaryFile(suffix=".txt") as f: @@ -100,7 +100,7 @@ async def test_document_search_ingest(document: DocumentMeta | Document): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}), ) await document_search.ingest([document]) @@ -123,7 +123,7 @@ async def test_document_search_with_search_config(): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] document_search = DocumentSearch( vector_store=InMemoryVectorStore(embedder=embeddings_mock), - parser_router=DocumentParserRouter({DocumentType.TXT: DummyProvider()}), + parser_router=DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}), ) await document_search.ingest([DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George")]) @@ -435,7 +435,7 @@ def __iter__(self): embeddings_mock.embed_text.return_value = [[0.1, 0.1]] # Non-zero embeddings # Create parsers dict with actual provider instance - parsers: Mapping[DocumentType, DocumentParser] = {DocumentType.TXT: DummyProvider()} + parsers: Mapping[DocumentType, DocumentParser] = {DocumentType.TXT: TextDocumentParser()} # Mock vector store to track operations vector_store = InMemoryVectorStore(embedder=embeddings_mock) diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index b65599ec2..99b2bc5fb 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -4,7 +4,7 @@ from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter -from ragbits.document_search.ingestion.parsers.dummy import DummyProvider +from ragbits.document_search.ingestion.parsers.base import TextDocumentParser from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter from ragbits.document_search.ingestion.strategies.base import IngestStrategy from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy @@ -38,7 +38,7 @@ def documents_fixture() -> list[DocumentMeta]: async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: list[DocumentMeta]) -> None: vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) - parser_router = DocumentParserRouter({DocumentType.TXT: DummyProvider()}) + parser_router = DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}) enricher_router = ElementEnricherRouter() results = await ingest_strategy( From ef23cd8dc31b6eb8492959ca95d864bab7b2eb19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sat, 22 Mar 2025 15:48:29 +0100 Subject: [PATCH 20/31] fix tests --- examples/document-search/distributed copy.py | 261 ------------------ .../ingestion/parsers/unstructured.py | 14 + .../tests/unit/test_document_parser_router.py | 10 +- 3 files changed, 19 insertions(+), 266 deletions(-) delete mode 100644 examples/document-search/distributed copy.py diff --git a/examples/document-search/distributed copy.py b/examples/document-search/distributed copy.py deleted file mode 100644 index c82f9426e..000000000 --- a/examples/document-search/distributed copy.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Ragbits Document Search Example: Distributed Ingest - -This example is based on the "Basic" example, but it demonstrates how to ingest documents in a distributed manner. -The distributed ingestion is provided by "RayDistributedIngestStrategy" which uses Ray to parallelize the ingestion process. - -The script performs the following steps: - - 1. Create a list of documents. - 2. Initialize the `LiteLLMEmbedder` class with the OpenAI `text-embedding-3-small` embedding model. - 3. Initialize the `InMemoryVectorStore` class. - 4. Initialize the `DocumentSearch` class with the embedder and the vector store. - 5. Ingest the documents into the `DocumentSearch` instance in a distributed manner. - 6. Search for documents using a query. - 7. Print the search results. - - - docker run -p 6333:6333 qdrant/qdrant - -To run the script, execute the following command: - - ```bash - uv run examples/document-search/distributed.py - ``` -""" - -# /// script -# requires-python = ">=3.10" -# dependencies = [ -# "ragbits-document-search[ray]", -# "ragbits-core", -# ] -# /// - -import asyncio - -from qdrant_client import AsyncQdrantClient - -from ragbits.core import audit -from ragbits.core.embeddings.litellm import LiteLLMEmbedder -from ragbits.core.vector_stores.qdrant import QdrantVectorStore -from ragbits.document_search import DocumentSearch -from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.documents.element import Element, TextElement -from ragbits.document_search.ingestion.parsers.base import DocumentParser -from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter -from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy - -# ray.init() - -documents = [ - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - RIP boiled water. You will be mist. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why doesn't James Bond fart in bed? Because it would blow his cover. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - Why programmers don't like to swim? Because they're scared of the floating points. - """ - ), - DocumentMeta.create_text_document_from_literal( - """ - This one is completely unrelated. - """ - ), -] - - -class Test(DocumentParser): - """ - A provider handling html files from Hexagon Community forum. - """ - - @audit.traceable - async def parse(self, document_meta: DocumentMeta) -> list[Element]: - """ - Processes the Hexagon Community post. - - Args: - document_meta: The document to process. - - Returns: - The list of elements extracted from the document. - - Raises: - DocumentTypeNotSupportedError: If the document type is not supported. - """ - document = await document_meta.fetch() - content = document.local_path.read_text() - # await asyncio.sleep(random.choice([1, 2, 3])) - # if random.choice([True, False]): - # raise ValueError("Dupa") - return [TextElement(content=content, document_meta=document_meta) for _ in range(10)] - # return [ImageElement(image_bytes=b"addad", ocr_extracted_text="xd", document_meta=document_meta) for _ in range(10)] - - -async def main() -> None: - """ - Run the example. - """ - embedder = LiteLLMEmbedder( - model="text-embedding-3-small", - ) - vector_store = QdrantVectorStore( - embedder=embedder, - client=AsyncQdrantClient( - host="localhost", - port=6333, - ), - index_name="jokes", - ) - ingest_strategy = RayDistributedIngestStrategy(3) - # ingest_strategy = BatchedIngestStrategy(3) - parser_router = DocumentParserRouter( - { - # Change to JSON after ragbits bump: - DocumentType.TXT: Test(), - } - ) - document_search = DocumentSearch( - vector_store=vector_store, - parser_router=parser_router, - ingest_strategy=ingest_strategy, - ) - import rich - - # audit.set_trace_handlers("cli") - rich.print(await document_search.ingest(documents)) - - # results = await document_search.search("I'm boiling my water and I need a joke") - # print(results) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py index e2bd72521..00a992561 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -1,8 +1,10 @@ import base64 +import inspect import os from io import BytesIO from PIL import Image +from typing_extensions import Self from unstructured.chunking.basic import chunk_elements from unstructured.documents.elements import Element as UnstructuredElement from unstructured.documents.elements import ElementType @@ -81,6 +83,18 @@ def __init__( self.ignore_images = ignore_images self._client = UnstructuredClient(api_key_auth=self.api_key, server_url=self.api_server) + def __reduce__(self) -> tuple[type[Self], tuple]: + """ + Enables the UnstructuredDocumentParser to be pickled and unpickled. + + Returns: + The tuple of class and its arguments that allows object reconstruction. + """ + return self.__class__, tuple( + self.__getattribute__(param_name) + for param_name in list(inspect.signature(self.__class__.__init__).parameters)[1:] + ) + @traceable async def parse(self, document: Document) -> list[Element]: """ diff --git a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py index ea5c1677e..69d806156 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -2,10 +2,11 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.parsers.base import TextDocumentParser +from ragbits.document_search.ingestion.parsers.exceptions import ParserNotFoundError from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter -async def test_parser_router(): +async def test_parser_router() -> None: parser_router = DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") @@ -15,15 +16,14 @@ async def test_parser_router(): assert isinstance(parser, TextDocumentParser) -async def test_parser_router_raises_when_no_parser_found(): +async def test_parser_router_raises_when_no_parser_found() -> None: parser_router = DocumentParserRouter() parser_router._parsers = {DocumentType.TXT: TextDocumentParser()} document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") - document_meta.document_type = DocumentType.PDF - with pytest.raises(ValueError) as err: - _ = parser_router.get(document_meta) + with pytest.raises(ParserNotFoundError) as err: + parser_router.get(document_meta) assert str(err.value) == f"No parser found for the document type {DocumentType.PDF}" From 5aa84e01d61aeab86e3befed45939cc991ec6d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 14:33:36 +0100 Subject: [PATCH 21/31] unify interfaces --- .../ingestion/enrichers/__init__.py | 7 +- .../ingestion/enrichers/base.py | 25 +++++- .../ingestion/enrichers/exceptions.py | 29 +++++++ .../ingestion/enrichers/image.py | 87 ++++++++++--------- .../ingestion/enrichers/router.py | 15 ++-- .../ingestion/parsers/__init__.py | 3 +- .../document_search/ingestion/parsers/base.py | 7 +- .../ingestion/parsers/unstructured.py | 2 +- .../ingestion/strategies/base.py | 16 ++-- .../tests/unit/test_document_parsers.py | 12 +-- .../tests/unit/test_element_enrichers.py | 23 +++-- 11 files changed, 150 insertions(+), 76 deletions(-) create mode 100644 packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py index 5515a38d9..24ce095f9 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py @@ -1,4 +1,5 @@ -from .base import ElementEnricher -from .image import ImageElementEnricher +from ragbits.document_search.ingestion.enrichers.base import ElementEnricher +from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter -__all__ = ["ElementEnricher", "ImageElementEnricher"] +__all__ = ["ElementEnricher", "ElementEnricherRouter", "ImageElementEnricher"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index 58f6e2fae..13ed2fbf9 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -1,16 +1,20 @@ from abc import ABC, abstractmethod +from typing import Generic, TypeVar from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.document_search.documents.element import Element +from ragbits.document_search.ingestion.enrichers.exceptions import EnricherElementNotSupportedError +ElementT = TypeVar("ElementT", bound=Element) -class ElementEnricher(WithConstructionConfig, ABC): + +class ElementEnricher(Generic[ElementT], WithConstructionConfig, ABC): """ Base class for element enrichers, responsible for providing additional information about elements. """ @abstractmethod - async def enrich(self, elements: list[Element]) -> list[Element]: + async def enrich(self, elements: list[ElementT]) -> list[ElementT]: """ Enrich elements. @@ -19,4 +23,21 @@ async def enrich(self, elements: list[Element]) -> list[Element]: Returns: The list of enriched elements. + + Raises: + EnricherError: If the enrichment of the elements failed. + """ + + @classmethod + def validate_element_type(cls, element_type: type[Element]) -> None: + """ + Check if the enricher supports the enricher type. + + Args: + element_type: The element type to validate against the enricher. + + Raises: + EnricherElementNotSupportedError: If the element type is not supported. """ + if element_type != cls.__orig_bases__[0].__args__[0]: # type: ignore + raise EnricherElementNotSupportedError(enricher_name=cls.__name__, element_type=element_type) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py new file mode 100644 index 000000000..227a4ec82 --- /dev/null +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py @@ -0,0 +1,29 @@ +from ragbits.document_search.documents.element import Element + + +class EnricherError(Exception): + """ + Class for all exceptions raised by the element enricher and router. + """ + + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + + +class EnricherNotFoundError(EnricherError): + """ + Raised when no enricher was found for the element type. + """ + + def __init__(self, element_type: type[Element]) -> None: + super().__init__(f"No enricher found for the element type {element_type}") + + +class EnricherElementNotSupportedError(EnricherError): + """ + Raised when the element type is not supported by the enricher. + """ + + def __init__(self, enricher_name: str, element_type: type[Element]) -> None: + super().__init__(f"Element type {element_type} is not supported by the {enricher_name}") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index 274a095d2..81074dc7e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -1,54 +1,59 @@ -import asyncio -from typing import Any - from pydantic import BaseModel from ragbits.core.llms.base import LLM, LLMType from ragbits.core.llms.factory import get_preferred_llm from ragbits.core.prompt import Prompt from ragbits.core.utils.config_handling import ObjectContructionConfig, import_by_path -from ragbits.document_search.documents.element import ( - Element, - ImageElement, -) +from ragbits.document_search.documents.element import ImageElement from ragbits.document_search.ingestion.enrichers.base import ElementEnricher -class ImagePromptInput(BaseModel): +class ImageDescriberInput(BaseModel): """ - Represents the input for an image processing prompt. + Input data for an image describer prompt. """ image: bytes -class _ImagePrompt(Prompt[ImagePromptInput]): +class ImageDescriberOutput(BaseModel): + """ + Output data for an image describer prompt. + """ + + description: str + + +class ImageDescriberPrompt(Prompt[ImageDescriberInput, ImageDescriberOutput]): """ - Defines a prompt for processing image elements using an LLM. + Prompt for describing image elements using LLM. """ user_prompt = "Describe the content of the image." image_input_fields = ["image"] -class ImageElementEnricher(ElementEnricher): +class ImageElementEnricher(ElementEnricher[ImageElement]): """ - Provides image processing capabilities using an LLM. + Enricher that describes image elements using LLM. """ - def __init__(self, llm: LLM | None = None, prompt: type[Prompt[ImagePromptInput, Any]] | None = None) -> None: + def __init__( + self, + llm: LLM | None = None, + prompt: type[Prompt[ImageDescriberInput, ImageDescriberOutput]] | None = None, + ) -> None: """ - Initializes the ImageProvider. + Initialize the ImageElementEnricher instance. Args: - llm: The language model to use for processing images. + llm: The language model to use for describing images. prompt: The prompt class to use. - Defaults to `_ImagePrompt` if not provided. """ self._llm = llm or get_preferred_llm(llm_type=LLMType.VISION) - self._prompt = prompt or _ImagePrompt + self._prompt = prompt or ImageDescriberPrompt - async def enrich(self, elements: list[Element]) -> list[Element]: + async def enrich(self, elements: list[ImageElement]) -> list[ImageElement]: """ Enrich image elements with additinal description of the image. @@ -57,26 +62,28 @@ async def enrich(self, elements: list[Element]) -> list[Element]: Returns: The list of enriched elements. - """ - tasks = [self._process_single(element) for element in elements if isinstance(element, ImageElement)] - skipped_count = len(elements) - len(tasks) - - if skipped_count > 0: - print(f"Warning: {skipped_count} elements were skipped due to incorrect type.") - - return await asyncio.gather(*tasks) - async def _process_single(self, element: ImageElement) -> ImageElement: - input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore - prompt = self._prompt(input_data) - response = await self._llm.generate(prompt) - - return ImageElement( - document_meta=element.document_meta, - description=response, - image_bytes=element.image_bytes, - ocr_extracted_text=element.ocr_extracted_text, - ) + Raises: + EnricherElementNotSupportedError: If the element type is not supported. + LLMError: If LLM generation fails. + """ + responses: list[ImageDescriberOutput] = [] + for element in elements: + self.validate_element_type(type(element)) + if isinstance(element, ImageElement): + input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore + prompt = self._prompt(input_data) + responses.append(await self._llm.generate(prompt)) + + return [ + ImageElement( + document_meta=element.document_meta, + description=response.description, + image_bytes=element.image_bytes, + ocr_extracted_text=element.ocr_extracted_text, + ) + for element, response in zip(elements, responses, strict=True) + ] @classmethod def from_config(cls, config: dict) -> "ImageElementEnricher": @@ -84,10 +91,10 @@ def from_config(cls, config: dict) -> "ImageElementEnricher": Create an `ImageElementEnricher` instance from a configuration dictionary. Args: - config: A dictionary containing the configuration settings. + config: The dictionary containing the configuration settings. Returns: - An initialized instance of `ImageElementEnricher`. + The initialized instance of `ImageElementEnricher`. """ llm: LLM = LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) prompt = import_by_path(config["prompt"]) if "prompt" in config else None diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index 38d0bd414..fb5c4c6b2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -7,9 +7,10 @@ from ragbits.document_search.documents import element from ragbits.document_search.documents.element import Element, ImageElement from ragbits.document_search.ingestion.enrichers.base import ElementEnricher +from ragbits.document_search.ingestion.enrichers.exceptions import EnricherNotFoundError from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher -_DEFAULT_ENRICHERS: dict[type[Element], ImageElementEnricher] = { +_DEFAULT_ENRICHERS: dict[type[Element], ElementEnricher] = { ImageElement: ImageElementEnricher(), } @@ -21,11 +22,11 @@ class ElementEnricherRouter(WithConstructionConfig): configuration_key: ClassVar[str] = "enrichers" - _enrichers: Mapping[type[Element], ImageElementEnricher] + _enrichers: Mapping[type[Element], ElementEnricher] def __init__( self, - enrichers: Mapping[type[Element], ImageElementEnricher] | None = None, + enrichers: Mapping[type[Element], ElementEnricher] | None = None, ) -> None: """ Initialize the ElementEnricherRouter instance. @@ -61,11 +62,11 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: Raises: InvalidConfigError: If any of the provided parsers cannot be initialized. """ - enrichers = { + enrichers: dict[type[Element], ElementEnricher] = { import_by_path(element_type, element): ElementEnricher.subclass_from_config(enricher_config) for element_type, enricher_config in config.items() } - return cls(enrichers=enrichers) # type: ignore + return cls(enrichers=enrichers) def get(self, element_type: type[Element]) -> ElementEnricher: """ @@ -78,11 +79,11 @@ def get(self, element_type: type[Element]) -> ElementEnricher: The enricher for processing the element. Raises: - ValueError: If no enricher is found for the element type. + EnricherNotFoundError: If no enricher is found for the element type. """ enricher = self._enrichers.get(element_type) if isinstance(enricher, ElementEnricher): return enricher - raise ValueError(f"No enricher found for the element type {element_type}") + raise EnricherNotFoundError(element_type) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py index 1782e7dbf..e2a5ef78d 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py @@ -1,3 +1,4 @@ from ragbits.document_search.ingestion.parsers.base import DocumentParser, ImageDocumentParser, TextDocumentParser +from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter -__all__ = ["DocumentParser", "ImageDocumentParser", "TextDocumentParser"] +__all__ = ["DocumentParser", "DocumentParserRouter", "ImageDocumentParser", "TextDocumentParser"] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index e8a529088..9945be208 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -33,7 +33,8 @@ async def parse(self, document: Document) -> list[Element]: ParserError: If the parsing of the document failed. """ - def validate_document_type(self, document_type: DocumentType) -> None: + @classmethod + def validate_document_type(cls, document_type: DocumentType) -> None: """ Check if the parser supports the document type. @@ -43,8 +44,8 @@ def validate_document_type(self, document_type: DocumentType) -> None: Raises: ParserDocumentNotSupportedError: If the document type is not supported. """ - if document_type not in self.supported_document_types: - raise ParserDocumentNotSupportedError(parser_name=self.__class__.__name__, document_type=document_type) + if document_type not in cls.supported_document_types: + raise ParserDocumentNotSupportedError(parser_name=cls.__name__, document_type=document_type) class TextDocumentParser(DocumentParser): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py index 00a992561..049300fc0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/unstructured.py @@ -25,7 +25,7 @@ class UnstructuredDocumentParser(DocumentParser): """ - A provider that uses the Unstructured API or local SDK to process the documents. + Parser that uses the Unstructured API or local SDK to process the documents. """ supported_document_types = { diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 012a0c65b..249ecd504 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -145,9 +145,11 @@ async def _parse_document( if isinstance(document, DocumentMeta) else document.metadata ) + parser = parser_router.get(document_meta) parser.validate_document_type(document_meta.document_type) document = await document_meta.fetch() + return await parser.parse(document) @staticmethod @@ -166,18 +168,20 @@ async def _enrich_elements( The list of enriched elements. Raises: - ValueError: If no enricher found for the element type. + EnricherError: If the enrichment of the elements failed. + EnricherElementNotSupportedError: If the element type is not supported. + EnricherNotFoundError: If no enricher is found for the element type. """ grouped_elements = defaultdict(list) for element in elements: grouped_elements[type(element)].append(element) + for element_type in grouped_elements: + enricher = enricher_router.get(element_type) + enricher.validate_element_type(element_type) + grouped_enriched_elements = await asyncio.gather( - *[ - enricher.enrich(elements) - for element_type, elements in grouped_elements.items() - if (enricher := enricher_router.get(element_type)) - ] + *[enricher_router.get(element_type).enrich(elements) for element_type, elements in grouped_elements.items()] ) return [element for enriched_elements in grouped_enriched_elements for element in enriched_elements] diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index 150c25b41..5e65194f1 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -2,23 +2,19 @@ from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.parsers.base import ( - DocumentParser, - TextDocumentParser, -) +from ragbits.document_search.ingestion.parsers.base import DocumentParser, TextDocumentParser from ragbits.document_search.ingestion.parsers.exceptions import ParserDocumentNotSupportedError from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser @pytest.mark.parametrize("document_type", UnstructuredDocumentParser.supported_document_types) def test_parser_validates_supported_document_types_passes(document_type: DocumentType) -> None: - UnstructuredDocumentParser().validate_document_type(document_type) + UnstructuredDocumentParser.validate_document_type(document_type) def test_parser_validates_supported_document_types_fails() -> None: - with pytest.raises(ParserDocumentNotSupportedError) as err: - UnstructuredDocumentParser().validate_document_type(DocumentType.UNKNOWN) - assert "Document type unknown is not supported by the UnstructuredDocumentParser" in str(err.value) + with pytest.raises(ParserDocumentNotSupportedError): + UnstructuredDocumentParser.validate_document_type(DocumentType.UNKNOWN) def test_subclass_from_config() -> None: diff --git a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py index 0f2741dcf..40ecb1321 100644 --- a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py @@ -4,13 +4,14 @@ from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.documents.element import ImageElement -from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher, _ImagePrompt +from ragbits.document_search.documents.element import Element, ImageElement +from ragbits.document_search.ingestion.enrichers.exceptions import EnricherElementNotSupportedError +from ragbits.document_search.ingestion.enrichers.image import ImageDescriberPrompt, ImageElementEnricher @pytest.fixture def llm() -> LiteLLM: - default_options = LiteLLMOptions(mock_response="response") + default_options = LiteLLMOptions(mock_response="{\"description\": \"response\"}") return LiteLLM(model_name="gpt-4o", default_options=default_options) @@ -29,6 +30,18 @@ def image_element(image_bytes: bytes) -> ImageElement: ) +def test_enricher_validates_supported_element_types_passes() -> None: + ImageElementEnricher.validate_element_type(ImageElement) + + +def test_enricher_validates_supported_document_types_fails() -> None: + class CustomElement(Element): + pass + + with pytest.raises(EnricherElementNotSupportedError): + ImageElementEnricher.validate_element_type(CustomElement) # type: ignore + + async def test_process(llm: LiteLLM, image_element: ImageElement): enricher = ImageElementEnricher(llm=llm) results = await enricher.enrich([image_element]) @@ -44,7 +57,7 @@ def test_from_config(): config = { "llm": { "type": "LiteLLM", - "prompt": "ragbits.document_search.ingestion.enrichers.images:_ImagePrompt", + "prompt": "ragbits.document_search.ingestion.enrichers.image:ImageDescriberPrompt", } } @@ -52,4 +65,4 @@ def test_from_config(): assert isinstance(enricher, ImageElementEnricher) assert isinstance(enricher._llm, LiteLLM) - assert enricher._prompt == _ImagePrompt + assert enricher._prompt == ImageDescriberPrompt From 30bd88637bfc799c0879c5ca612d9d6f2065cb4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 14:46:18 +0100 Subject: [PATCH 22/31] update api ref --- .../document_search/ingest/enrichers.md | 7 ++++++ .../document_search/ingest/parsers.md | 11 +++++++++ .../document_search/processing.md | 24 ------------------- mkdocs.yml | 3 ++- .../tests/unit/test_element_enrichers.py | 2 +- 5 files changed, 21 insertions(+), 26 deletions(-) create mode 100644 docs/api_reference/document_search/ingest/enrichers.md create mode 100644 docs/api_reference/document_search/ingest/parsers.md delete mode 100644 docs/api_reference/document_search/processing.md diff --git a/docs/api_reference/document_search/ingest/enrichers.md b/docs/api_reference/document_search/ingest/enrichers.md new file mode 100644 index 000000000..bebb7c06e --- /dev/null +++ b/docs/api_reference/document_search/ingest/enrichers.md @@ -0,0 +1,7 @@ +# Element Enrichers + +::: ragbits.document_search.ingestion.enrichers.router.ElementEnricherRouter + +::: ragbits.document_search.ingestion.enrichers.base.ElementEnricher + +::: ragbits.document_search.ingestion.enrichers.image.ImageElementEnricher diff --git a/docs/api_reference/document_search/ingest/parsers.md b/docs/api_reference/document_search/ingest/parsers.md new file mode 100644 index 000000000..a12a3181d --- /dev/null +++ b/docs/api_reference/document_search/ingest/parsers.md @@ -0,0 +1,11 @@ +# Document Parsers + +::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter + +::: ragbits.document_search.ingestion.parsers.base.DocumentParser + +::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser + +::: ragbits.document_search.ingestion.parsers.base.ImageDocumentParser + +::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser diff --git a/docs/api_reference/document_search/processing.md b/docs/api_reference/document_search/processing.md deleted file mode 100644 index 0286911ed..000000000 --- a/docs/api_reference/document_search/processing.md +++ /dev/null @@ -1,24 +0,0 @@ -# Document Processing - -::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter - -## Providers -::: ragbits.document_search.ingestion.parsers.base.DocumentParser - options: - heading_level: 3 - -::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser - options: - heading_level: 3 - -::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser - options: - heading_level: 3 - -::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredImageProvider - options: - heading_level: 3 - -::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredPdfProvider - options: - heading_level: 3 \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 9f5ee8078..2204e1497 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,7 +53,8 @@ nav: - api_reference/document_search/index.md - api_reference/document_search/documents.md - Ingest: - - api_reference/document_search/processing.md + - api_reference/document_search/ingest/parsers.md + - api_reference/document_search/ingest/enrichers.md - api_reference/document_search/ingest/strategies.md - Retrieval: - api_reference/document_search/retrieval/rephrasers.md diff --git a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py index 40ecb1321..a4f36425a 100644 --- a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py @@ -11,7 +11,7 @@ @pytest.fixture def llm() -> LiteLLM: - default_options = LiteLLMOptions(mock_response="{\"description\": \"response\"}") + default_options = LiteLLMOptions(mock_response='{"description": "response"}') return LiteLLM(model_name="gpt-4o", default_options=default_options) From 384461c543ca1edb70097df733f1ace5bc10a29c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 14:47:48 +0100 Subject: [PATCH 23/31] fix changelog --- packages/ragbits-core/CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/ragbits-core/CHANGELOG.md b/packages/ragbits-core/CHANGELOG.md index 251707ec0..d38032309 100644 --- a/packages/ragbits-core/CHANGELOG.md +++ b/packages/ragbits-core/CHANGELOG.md @@ -2,11 +2,12 @@ ## Unreleased +- Fix Qdrant vector store serialization + ## 0.10.2 (2025-03-21) ## 0.10.1 (2025-03-19) -- Fix Qdrant vector store serialization - Better handling of cases when text and image embeddings are mixed in VectorStore ## 0.10.0 (2025-03-17) From 801397642a28a8c6a06a221c2ffc8beaa2ebb862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 19:36:26 +0100 Subject: [PATCH 24/31] update deps --- .../ragbits-document-search/pyproject.toml | 11 ++- .../ingestion/enrichers/exceptions.py | 13 ++++ .../ingestion/enrichers/image.py | 11 +-- .../document_search/ingestion/parsers/base.py | 2 +- .../ingestion/parsers/exceptions.py | 13 ++++ .../tests/integration/test_unstructured.py | 1 + uv.lock | 70 ++++++++----------- 7 files changed, 73 insertions(+), 48 deletions(-) diff --git a/packages/ragbits-document-search/pyproject.toml b/packages/ragbits-document-search/pyproject.toml index 943d26398..89ee32d11 100644 --- a/packages/ragbits-document-search/pyproject.toml +++ b/packages/ragbits-document-search/pyproject.toml @@ -31,8 +31,13 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", ] - -dependencies = ["unstructured>=0.16.9", "unstructured-client>=0.26.0", "pdf2image>=1.17.0", "rerankers>=0.6.1", "aiohttp>=3.10.8", "ragbits-core==0.10.2"] +dependencies = [ + "unstructured>=0.16.9", + "unstructured-client>=0.26.0", + "rerankers>=0.6.1", + "aiohttp>=3.10.8", + "ragbits-core==0.10.2", +] [project.urls] "Homepage" = "https://github.com/deepsense-ai/ragbits" @@ -66,7 +71,7 @@ dev-dependencies = [ "pytest-cov~=5.0.0", "pytest-asyncio~=0.24.0", "pip-licenses>=4.0.0,<5.0.0", - "ragbits[local]" + "ragbits[local]", ] [tool.uv.sources] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py index 227a4ec82..d72c462ea 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py @@ -1,3 +1,7 @@ +import inspect + +from typing_extensions import Self + from ragbits.document_search.documents.element import Element @@ -10,6 +14,12 @@ def __init__(self, message: str) -> None: super().__init__(message) self.message = message + def __reduce__(self) -> tuple[type[Self], tuple]: + return self.__class__, tuple( + self.__getattribute__(param_name) + for param_name in list(inspect.signature(self.__class__.__init__).parameters)[1:] + ) + class EnricherNotFoundError(EnricherError): """ @@ -18,6 +28,7 @@ class EnricherNotFoundError(EnricherError): def __init__(self, element_type: type[Element]) -> None: super().__init__(f"No enricher found for the element type {element_type}") + self.element_type = element_type class EnricherElementNotSupportedError(EnricherError): @@ -27,3 +38,5 @@ class EnricherElementNotSupportedError(EnricherError): def __init__(self, enricher_name: str, element_type: type[Element]) -> None: super().__init__(f"Element type {element_type} is not supported by the {enricher_name}") + self.enricher_name = enricher_name + self.element_type = element_type diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index 81074dc7e..6d57b24d0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -70,10 +70,9 @@ async def enrich(self, elements: list[ImageElement]) -> list[ImageElement]: responses: list[ImageDescriberOutput] = [] for element in elements: self.validate_element_type(type(element)) - if isinstance(element, ImageElement): - input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore - prompt = self._prompt(input_data) - responses.append(await self._llm.generate(prompt)) + input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore + prompt = self._prompt(input_data) + responses.append(await self._llm.generate(prompt)) return [ ImageElement( @@ -95,6 +94,10 @@ def from_config(cls, config: dict) -> "ImageElementEnricher": Returns: The initialized instance of `ImageElementEnricher`. + + Raises: + ValidationError: If the configuration doesn't follow the expected format. + InvalidConfigError: If llm or prompt can't be found or are not the correct type. """ llm: LLM = LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) prompt = import_by_path(config["prompt"]) if "prompt" in config else None diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index 9945be208..bf7c46608 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -16,7 +16,7 @@ class DocumentParser(WithConstructionConfig, ABC): default_module: ClassVar = parsers configuration_key: ClassVar = "parser" - supported_document_types: set[DocumentType] + supported_document_types: set[DocumentType] = set() @abstractmethod async def parse(self, document: Document) -> list[Element]: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py index 0c06bec93..4c626cb74 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py @@ -1,3 +1,7 @@ +import inspect + +from typing_extensions import Self + from ragbits.document_search.documents.document import DocumentType @@ -10,6 +14,12 @@ def __init__(self, message: str) -> None: super().__init__(message) self.message = message + def __reduce__(self) -> tuple[type[Self], tuple]: + return self.__class__, tuple( + self.__getattribute__(param_name) + for param_name in list(inspect.signature(self.__class__.__init__).parameters)[1:] + ) + class ParserNotFoundError(ParserError): """ @@ -18,6 +28,7 @@ class ParserNotFoundError(ParserError): def __init__(self, document_type: DocumentType) -> None: super().__init__(f"No parser found for the document type {document_type}") + self.document_type = document_type class ParserDocumentNotSupportedError(ParserError): @@ -27,3 +38,5 @@ class ParserDocumentNotSupportedError(ParserError): def __init__(self, parser_name: str, document_type: DocumentType) -> None: super().__init__(f"Document type {document_type.value} is not supported by the {parser_name}") + self.parser_name = parser_name + self.document_type = document_type diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index ea7371dfe..05e16903e 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -16,6 +16,7 @@ [ pytest.param( False, + marks=pytest.mark.skipif(True, reason="No dependencies installed"), id="local", ), pytest.param( diff --git a/uv.lock b/uv.lock index 5eb10e0a6..0383f1471 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version < '3.11' and platform_machine == 'x86_64' and sys_platform == 'darwin'", @@ -460,7 +461,7 @@ name = "build" version = "1.2.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "os_name == 'nt'" }, + { name = "colorama", marker = "(os_name == 'nt' and platform_machine != 'x86_64') or (os_name == 'nt' and sys_platform != 'darwin')" }, { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, { name = "packaging" }, { name = "pyproject-hooks" }, @@ -683,7 +684,7 @@ name = "click" version = "8.1.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } wheels = [ @@ -2432,7 +2433,7 @@ version = "1.6.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "ghp-import" }, { name = "jinja2" }, { name = "markdown" }, @@ -2794,6 +2795,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 }, { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 }, { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 }, + { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 }, + { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 }, ] [[package]] @@ -3008,7 +3011,7 @@ name = "nvidia-cudnn-cu12" version = "8.9.2.26" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ff/74/a2e2be7fb83aaedec84f391f082cf765dfb635e7caa9b49065f73e4835d8/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9", size = 731725872 }, @@ -3035,9 +3038,9 @@ name = "nvidia-cusolver-cu12" version = "11.4.5.107" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/bc/1d/8de1e5c67099015c834315e333911273a8c6aaba78923dd1d1e25fc5f217/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd", size = 124161928 }, @@ -3048,7 +3051,7 @@ name = "nvidia-cusparse-cu12" version = "12.1.0.106" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/65/5b/cfaeebf25cd9fdec14338ccb16f6b2c4c7fa9163aefcf057d86b9cc248bb/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c", size = 195958278 }, @@ -3067,7 +3070,6 @@ name = "nvidia-nvjitlink-cu12" version = "12.6.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/8c/386018fdffdce2ff8d43fedf192ef7d14cab7501cbf78a106dd2e9f1fc1f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d", size = 19270432 }, { url = "https://files.pythonhosted.org/packages/fe/e4/486de766851d58699bcfeb3ba6a3beb4d89c3809f75b9d423b9508a8760f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142", size = 19745114 }, ] @@ -3528,18 +3530,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, ] -[[package]] -name = "pdf2image" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pillow" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/00/d8/b280f01045555dc257b8153c00dee3bc75830f91a744cd5f84ef3a0a64b1/pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57", size = 12811 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/33/61766ae033518957f877ab246f87ca30a85b778ebaad65b7f74fa7e52988/pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2", size = 11618 }, -] - [[package]] name = "pillow" version = "10.4.0" @@ -3648,7 +3638,7 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "platform_system == 'Windows'" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ @@ -3719,8 +3709,6 @@ version = "6.0.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/18/c7/8c6872f7372eb6a6b2e4708b88419fb46b857f7a2e1892966b851cc79fc9/psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2", size = 508067 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/66/78c9c3020f573c58101dc43a44f6855d01bbbd747e24da2f0c4491200ea3/psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35", size = 249766 }, - { url = "https://files.pythonhosted.org/packages/e1/3f/2403aa9558bea4d3854b0e5e567bc3dd8e9fbc1fc4453c0aa9aafeb75467/psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1", size = 253024 }, { url = "https://files.pythonhosted.org/packages/0b/37/f8da2fbd29690b3557cca414c1949f92162981920699cd62095a984983bf/psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0", size = 250961 }, { url = "https://files.pythonhosted.org/packages/35/56/72f86175e81c656a01c4401cd3b1c923f891b31fbcebe98985894176d7c9/psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0", size = 287478 }, { url = "https://files.pythonhosted.org/packages/19/74/f59e7e0d392bc1070e9a70e2f9190d652487ac115bb16e2eff6b22ad1d24/psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd", size = 290455 }, @@ -4375,6 +4363,7 @@ requires-dist = [ { name = "transformers", marker = "extra == 'local'", specifier = "~=4.44.2" }, { name = "typer", specifier = "~=0.12.5" }, ] +provides-extras = ["chroma", "local", "fastembed", "lab", "promptfoo", "otel", "qdrant", "pgvector", "fastembed-gpu"] [package.metadata.requires-dev] dev = [ @@ -4391,7 +4380,6 @@ version = "0.10.2" source = { editable = "packages/ragbits-document-search" } dependencies = [ { name = "aiohttp" }, - { name = "pdf2image" }, { name = "ragbits-core" }, { name = "rerankers" }, { name = "unstructured" }, @@ -4436,13 +4424,13 @@ requires-dist = [ { name = "boto3", marker = "extra == 's3'", specifier = "~=1.35.42" }, { name = "datasets", marker = "extra == 'huggingface'", specifier = "~=3.0.1" }, { name = "gcloud-aio-storage", marker = "extra == 'gcs'", specifier = "~=9.3.0" }, - { name = "pdf2image", specifier = ">=1.17.0" }, { name = "ragbits-core", editable = "packages/ragbits-core" }, { name = "ray", extras = ["data"], marker = "extra == 'ray'", specifier = "~=2.43.0" }, { name = "rerankers", specifier = ">=0.6.1" }, { name = "unstructured", specifier = ">=0.16.9" }, { name = "unstructured-client", specifier = ">=0.26.0" }, ] +provides-extras = ["azure", "gcs", "huggingface", "ray", "s3"] [package.metadata.requires-dev] dev = [ @@ -4489,6 +4477,7 @@ requires-dist = [ { name = "optuna", specifier = "==4.0.0" }, { name = "ragbits-core", editable = "packages/ragbits-core" }, ] +provides-extras = ["relari"] [package.metadata.requires-dev] dev = [ @@ -4526,6 +4515,7 @@ requires-dist = [ { name = "openai", marker = "extra == 'openai'", specifier = "~=1.57.3" }, { name = "ragbits-core", editable = "packages/ragbits-core" }, ] +provides-extras = ["openai"] [package.metadata.requires-dev] dev = [ @@ -5551,19 +5541,19 @@ dependencies = [ { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "sympy" }, - { name = "triton", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "triton", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ @@ -5589,7 +5579,7 @@ name = "tqdm" version = "4.66.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504 } wheels = [ @@ -5622,7 +5612,7 @@ name = "triton" version = "2.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock", marker = "python_full_version < '3.12'" }, + { name = "filelock", marker = "(python_full_version < '3.12' and platform_machine != 'x86_64') or (python_full_version < '3.12' and sys_platform != 'darwin')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/95/05/ed974ce87fe8c8843855daa2136b3409ee1c126707ab54a8b72815c08b49/triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5", size = 167900779 }, From a354196b057307901d4db7118036685cef86b746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 20:29:08 +0100 Subject: [PATCH 25/31] add more tests for routers --- .../ingestion/parsers/router.py | 10 ++++---- .../ingestion/strategies/base.py | 2 +- .../tests/unit/test_document_parser_router.py | 19 ++++++-------- .../unit/test_element_enricher_router.py | 25 +++++++++++++++++++ 4 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 packages/ragbits-document-search/tests/unit/test_element_enricher_router.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 76df52b48..4554b5e1e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -4,7 +4,7 @@ from typing_extensions import Self from ragbits.core.utils.config_handling import ObjectContructionConfig, WithConstructionConfig -from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.document import DocumentType from ragbits.document_search.ingestion.parsers.base import DocumentParser from ragbits.document_search.ingestion.parsers.exceptions import ParserNotFoundError from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser @@ -73,12 +73,12 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: } return cls(parsers=parsers) - def get(self, document_meta: DocumentMeta) -> DocumentParser: + def get(self, document_type: DocumentType) -> DocumentParser: """ Get the parser for the document. Args: - document_meta: The document metadata. + document_type: The document type. Returns: The parser for processing the document. @@ -86,9 +86,9 @@ def get(self, document_meta: DocumentMeta) -> DocumentParser: Raises: ParserNotFoundError: If no parser is found for the document type. """ - parser = self._parsers.get(document_meta.document_type) + parser = self._parsers.get(document_type) if isinstance(parser, DocumentParser): return parser - raise ParserNotFoundError(document_meta.document_type) + raise ParserNotFoundError(document_type) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py index 249ecd504..9529510b4 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/base.py @@ -146,7 +146,7 @@ async def _parse_document( else document.metadata ) - parser = parser_router.get(document_meta) + parser = parser_router.get(document_meta.document_type) parser.validate_document_type(document_meta.document_type) document = await document_meta.fetch() diff --git a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py index 69d806156..98994f28a 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -1,29 +1,24 @@ import pytest -from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.document import DocumentType from ragbits.document_search.ingestion.parsers.base import TextDocumentParser from ragbits.document_search.ingestion.parsers.exceptions import ParserNotFoundError from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter async def test_parser_router() -> None: - parser_router = DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}) + parser = TextDocumentParser() + parser_router = DocumentParserRouter({DocumentType.TXT: parser}) - document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") - - parser = parser_router.get(document_meta) - - assert isinstance(parser, TextDocumentParser) + assert parser_router.get(DocumentType.TXT) is parser async def test_parser_router_raises_when_no_parser_found() -> None: parser_router = DocumentParserRouter() parser_router._parsers = {DocumentType.TXT: TextDocumentParser()} - document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George") - document_meta.document_type = DocumentType.PDF - with pytest.raises(ParserNotFoundError) as err: - parser_router.get(document_meta) + parser_router.get(DocumentType.PDF) - assert str(err.value) == f"No parser found for the document type {DocumentType.PDF}" + assert err.value.message == f"No parser found for the document type {DocumentType.PDF}" + assert err.value.document_type == DocumentType.PDF diff --git a/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py b/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py new file mode 100644 index 000000000..200e4e279 --- /dev/null +++ b/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py @@ -0,0 +1,25 @@ +import pytest + +from ragbits.document_search.documents.element import ImageElement, TextElement +from ragbits.document_search.ingestion.enrichers.exceptions import EnricherNotFoundError +from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher +from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter + + +async def test_enricher_router() -> None: + enricher = ImageElementEnricher() + enricher_router = ElementEnricherRouter({ImageElement: enricher}) + + assert enricher_router.get(ImageElement) is enricher + + +async def test_enricher_router_raises_when_no_enricher_found() -> None: + enricher = ImageElementEnricher() + enricher_router = ElementEnricherRouter() + enricher_router._enrichers = {ImageElement: enricher} + + with pytest.raises(EnricherNotFoundError) as err: + enricher_router.get(TextElement) + + assert err.value.message == f"No enricher found for the element type {TextElement}" + assert err.value.element_type == TextElement From b4e896d04c8b315055070b953a614f3c88e4cfed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Sun, 23 Mar 2025 23:50:09 +0100 Subject: [PATCH 26/31] add more tests --- .../ingestion/enrichers/base.py | 7 +- .../ingestion/enrichers/image.py | 4 +- .../document_search/ingestion/parsers/base.py | 5 +- .../ragbits-document-search/tests/test.png | Bin 132907 -> 0 bytes .../tests/unit/test_document_parser_router.py | 33 +++++- .../tests/unit/test_document_parsers.py | 81 +++++++++++-- .../unit/test_element_enricher_router.py | 26 ++++- .../tests/unit/test_element_enrichers.py | 109 +++++++++++------- .../tests/unit/test_ingest_strategies.py | 36 ++++-- 9 files changed, 228 insertions(+), 73 deletions(-) delete mode 100644 packages/ragbits-document-search/tests/test.png diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index 13ed2fbf9..1698589a0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -1,8 +1,10 @@ from abc import ABC, abstractmethod -from typing import Generic, TypeVar +from types import ModuleType +from typing import ClassVar, Generic, TypeVar from ragbits.core.utils.config_handling import WithConstructionConfig from ragbits.document_search.documents.element import Element +from ragbits.document_search.ingestion import enrichers from ragbits.document_search.ingestion.enrichers.exceptions import EnricherElementNotSupportedError ElementT = TypeVar("ElementT", bound=Element) @@ -13,6 +15,9 @@ class ElementEnricher(Generic[ElementT], WithConstructionConfig, ABC): Base class for element enrichers, responsible for providing additional information about elements. """ + default_module: ClassVar[ModuleType | None] = enrichers + configuration_key: ClassVar[str] = "enricher" + @abstractmethod async def enrich(self, elements: list[ElementT]) -> list[ElementT]: """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index 6d57b24d0..ec14d3064 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -99,6 +99,8 @@ def from_config(cls, config: dict) -> "ImageElementEnricher": ValidationError: If the configuration doesn't follow the expected format. InvalidConfigError: If llm or prompt can't be found or are not the correct type. """ - llm: LLM = LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) + llm: LLM | None = ( + LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) if "llm" in config else None + ) prompt = import_by_path(config["prompt"]) if "prompt" in config else None return cls(llm=llm, prompt=prompt) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py index bf7c46608..b35f6d9df 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/base.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from types import ModuleType from typing import ClassVar from ragbits.core.utils.config_handling import WithConstructionConfig @@ -13,8 +14,8 @@ class DocumentParser(WithConstructionConfig, ABC): Base class for document parsers, responsible for converting the document into a list of elements. """ - default_module: ClassVar = parsers - configuration_key: ClassVar = "parser" + default_module: ClassVar[ModuleType | None] = parsers + configuration_key: ClassVar[str] = "parser" supported_document_types: set[DocumentType] = set() diff --git a/packages/ragbits-document-search/tests/test.png b/packages/ragbits-document-search/tests/test.png deleted file mode 100644 index bdb84a59910b0f297ed97d6d84446221fc4b5c57..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 132907 zcmV(rK<>YZP)^Ib200IW`Nkl3}aF|?4-+g|9{`+b!fLp9#B0sXqN&-K%gY+%i&F0MWnk6jcB+f=4I9GZiuUz zs#Z^?2vP)7^uSIuWvBrrQ!T`>LgsrCDNXrSEUAE!ATqqirKAn%O*@N6cbQp)aZjZ@ zrWj>axKoi~H3zAcvVu{JsGMD0g?!D&VB`cAdMg`ae>a6SllIxV`hS! z4RJS>^YIATb$!g)$N~W(V5lytjZyYg2`IX%t5F8DYIpyromQcu72*H=`{%l@KY#oh zKEhdkb5#S1iyw~T_2ub$z2FBm@DIy44FCD-cDZjbxNZHIS*5dT&fKaJ*0m=ZQ5K`D zD%=mTEW?t^Wy+`lB8Z+2<8oae+uXJp5j-VAOIle~(QzZdV%_3+SdfDwP_;0lW+lev zHX?583gP1t-F<&-;8nlF)T#@a&GXchX=2bfqXIHr&PBQmQ8}W#EzU0(M<_hnopOYYIz63_k1cQa8xPCyIrF=q7np&M zIOdYV`n91`=^n7->Im#xpq8IME^og*!z6M~r(-@IEyn7NuXzOV^mOi?-b%x*%d)(@ zSoVdQF1A@^Z)xEs+Z3o*4)y(Mr8J1gvAjN?Yt9vm(qTRpWHaN_r!$Dx>kX~jP_+^E z@k)y6ymzX$=>J4mg<|Bm612s#hr-9q`2Roi7!CtN21%Z)YAt93+IZ63Rh7{UY1Mb< zVZdW_L@0@E8ynqvC^nJkgv0BPikql2~Rl#+=S zsN%Q?l&jwPRqucg=wix?cfRjD{ElP71|VR7a)@>v1BNC#f4$qHSkL?C({R4s?NYh8 zX}Y_*4c@yDx105L6;c{83pZE0IZyLEuGgzea~g-S>q6hLUw!PiyB*NvJRC#tC;_pt z6!m73St8P*a|RehqFd55l@P3w9GM2wHRN?`1S;jQ6x=QzLs)~M!n4R2DWOk*J53sG z1Rr7VT+~p$aexUuXo_e7_8v#U0^x*bf2&k@STx!ZP&0YSFeK_jOUMk` ziu4eF+5zT8p(H}5u6E_C93)f8b`j_zG}3~MTHzN_E|qT^Ilu@`oz@ZxA8C;X=rMmL zKeW7DoCoQl-`FMA%NdJjOp@57t0$G~h$>8`e`(-A9OtTXkY)s#O53y&MBS|a@NfzTaj9!oVdF;sE^ zwuaW6&I16jX>!X#M8XRd?7QPN%~MAAb1s_~+x(xsP!eo=_H` z)dDl;>E`bC;rjmN@ciHZ|KEF$h61&N56E;tGD|`UWs$MH8s{{eN=bMMh^w0hMwDp= zVfe~og_NpG!>1xE1z<$sFWesu)x)xjc;_Z-*0^L6D#&@^l(9AVR{2r(JXbX56~^Qj z-)Id;rwb=hB|Kvtvdkb=Er78uBW48qj%=K1MF%N-#mw0junutKFk3CeuQgel9jf5l z%0E`YH|VxPsF;`~S>gl53b%m}OOM=w8Wj~Gs%BUsW2D`=S>O`JEy58j1frupg5TVE zOcKVuh7Qs}HeBh%{mfMoiBQ3EX(q_(R9Rl(p6-w;RjBqDO4%YUve{`OKt>3O8t9;hp1fAf)olcXu#;^VYKIA@(ZXj z&COi(5+Xz%b$Z^&MS&-UgC1!^=!Es_t4-g9!|}X7VXFo2=<$-0I2DgDg%80&TEhe) zJ-n*AfN|$o6|(O2cC~KD)6n$+Bi)Eb2Szw&gCSVtJmVXBAp)>LHMebhJYA9@eWGmp z!?|AtJ*P!V&`^<(E#ROFZoBQARr>`Avhji9N@;>i+eJ60dA0IrB!-B*A%aIYXcoDC zo+)z1j3;c%!~GS;nIL+e|ZBBEx=&#f84COMX?455# zepkMqCra!PT=e>7KU!c`AkBb#ywlO?BJvp+^z?MVb+Zod-0kWD-{0J<*F7tIA+PlP z=KHU{+xF|nm)}w3Z@&7RvP9|FcZIK5g7bHGx0}uCcsTUlcAtLq=i?uF8gH-fo?f1| zrCe`z^E6_^9=?4)#`w>F{2i_T^3%_(4BL_mBo%$i*O^zDC{d&| zj&`wS87nOS#NK1?h-Ib7Vy1!NjMkF6 z$xT?CFiw5v4#VJrnkF?)sevRC>046}@5&kEg$`K+=Nv-{9tR<|*oxh54HqxRLz>|! z3*=p<8Rkjsn3F`cl?9z>Svbcna-;$#MvrC#>(+gQZo#ZZ?<*7H5$+Tl5fs{sP-kvp z#4aM%rYX0gRKXm`^f*ozPG>Cy#|+vbc7C(&@PCgin23h2hJXunus6Hi{_t!XQY6J7 zQ{W|f2P23iMw)apTe0rf@4kCKr(x9_BzXk*<_kIlKwtGe;O2Vg`aTZhoYx&p#bCCZ z70~m~#}`0z^gW;l1vw81zyn9ok$%=tXQu%I3YqksM>~TjAC1E_qD_78z1#0k@80j; zd*^<)-A0EdSacjkcP?P0?yes`c>iOg>^z-UyFMlK{Jh&;&3QbZ27vi?v&F%@oKI(J z?RMEe{c+qs-Q3)wZD54&zWo{UWS$4-1Il|m9=`kfD|8K);}MA-FNF}}+#0eXr#xz9 zodyblYGUUUjtnYMn5|_cMPz6+M`+(xKE{S(Zlz1F-fCRfEV39zPjI5T!4_K?eJ`#lf3&;{|e#!PJr+J)0Vu%UGWn?KEw`ea82Rh7q_~q z7}i=X_nKm) z(DhG}S!lbC!%%eY>uqADxR|6o%*^xue>2SVww$ai*Kt)#s?Rx=Tg{!LEzu@oA_%J? zGhqxR*HRz^3qZT1&yyS+3~S3&qGZdNm6gS85jZ-{MUPh$om2=E$SfNvmeJvv2|I5L z)epuzaX}AU(;*RRD3~uRr9@?@9oYca9m@WMcGc|J@|{&V25MHLoSQcUR;y+2v z>KHvW<9aQMgFB?w8kK@4FaXRWxj-1L*PBhboo4bS8^n=nk2nDj_PF5oT9*`Z^x$_0 z={lXy<4!4lmj0cRYdaJqhA{Xldc5G>u$pEwWD;TfsY9y81B_iQDH?!_{COAlyTc!U z{*goY@a?zXe7yeZ?XfL2r&Llt&$q|@alJjJY4!piV5EVMB{8L`DO7JZyYX-sd8h4& zs7C)U`tjFaFm!u3jq`20A8(I0kJH(@5PW7;mZLY)_4Cg^f(RrWcS+7}v6KRI5Z`vx z{*$%UN}=*(q;=1VE7buFo)x~8vc_-=9*kj-GBe6zl(epA>qYdn70z1ULlP+EEQvzZ z4%lHy)TG5Yt$|K$6u)pw8>S{cZ0ITiZD=PlTD`b~IFh!``T&wCm zx?|n5b1UmtF07qbB-^5rmYLWm=qc4MP~?m6V8%j!>Tn2?%v?UZO%@ zn8$la%cF|kN9R!!TH>b#bxB5i^i7O}QP4@unJ9)x$F@mR^y0$tthr3PLJ0Iph5@TI zp%G&L+!>9QBkO(J_Sdk2BL&9+5~1RtX5etjqrBTA{7+;`Q6^uL63xv~#h`=JCA*v) zZSeSVG1@3l*Yp?Ni80n80JOGki&!tTl+H>h$}bdBRKLIlClaYEwd;&i)S$r&df^)7 zZmk2m6^RmqD2qWXydaEQYi0(O;-!~80Pk#HxX6VSqzxe&>TtHzZvBe)eI5KX78Ew; zn3Ly-8WRHHVzA5r@-#W77`*2^BwkqV2IT{v3=Y90ccBL^Tsob`ci(;kA!93D4)YKY zCZQK8#pCv{9X8uK91jO*I*+Hv?e%S~zy0|$VoMI^adegw>#s;WD}LPW2ZT`RaX-$7 zd0RJ^(`7vD>rnY@&W=ii^5#6x@4opVm^e-6Z@>EPqYppZ?~iODA(e5yjPvy{Px56V z5M>)u;@H-H`I4W`a@`JfKMZ>~&Izr9iAu0fJSp!|*-n>Nr|UhIL4-n^R01KHb&CN zjBaBiTS&8nHd;myCqTjkZIFj&I7P=>%`2-TYh;CcOauIox!^JB5L?T*!Gl^T2Kk#L zaUnNa$=oZ*?>r3Yp_hX(Xiy-Sbwd|SSI+eSDFnQAn0cofuY0Czyw*;3I1~VSh%yor zw~`kOXw?E2ZG>^8iTCyvFOsH^bLls6Bmk83>HYfz_b7bU9N{|{BJ5G}(!V6iEVLXp zmLO`-JToo7Fw_74QE+)}izQ3BRhlH@J)*T|THQSjRl=wZ2~inDmdjX7-H6lch4Mr+ zUcfVLEzd{McwD(9q-Qa!l?1Lx`J&hp!ZE9;&qGoc;D#Vz(wzkpE#ub9XbT?(_vX~} zR5BnJ>vHE@rBB-!qm?Xf!XooUwri@oLQI&BrX7m5fDceH($FAa zcWm+U@%r-q!uyIlBhw|5x8wQ#_5p3uM$k?oKK8^SI=R0eUa)+GFDp{EW za3Drr6=2MtQbc_EMP8gg-d<=I%Q}C5I~v2{E99rmVSBe;YxK!tRVw(ZCH*6<7s>p$=U%MXN!_+hKrmoq|ri8j-P8^J>%#5i^X{urpB9(_Q?q|i)8ZH>C z&$T;)0S$~42qb>a(&BNA>#JjTH}x-v&1IyuET{Qr5sQggFtO_91YU_b4A4SsHP~${QBRv-gG`%?@5MA60j1q<$CWiV2HYBUT1u!0=>B90)T}Bk~sirL=0#wO3*`2 zVocArc0lw)TgeB(9?^=uJ85jdA%rF&4xz(I0~UpsbfKNhT2>Eb9rzKOjV6}SZpCyXEX>sVSP;GDK(jbZbip~wn*|wGI1U#WePojvH2Dv!A-cdeA)X1*e$mf|=hlPZ z67mYSF_gG%83gHe2hvNO?2$lzS{-4A*44Q|{Ch z7q>xF^Tb_|2f;^<;%~X+vORcz{m3CkdXM2IQVZl0%^tUt7e_&eW!WqaO0R3ooU}MG zEK-ZJD2}Mnu?&#Z`EiY(=mCdatNQDd>7yO$!IJ+9Z~XLc_BmnFtWbG@DUT9}wwye8(cj0>wz8 z7PM=Icm?F&ZFYzUl(F62pftX?+-@)>bME%nRB7q?6=whX=5Ddvc4`tg2+urpsw*Hv zlhas*NJ`klj2HCftrJsT|3SoHcO79FO4tXPSC|Rv1oMImn zSxf4u70OZ>vN?D+_Oy{ve4C0Hes@=e7Bko~?C7&4nn=qUHUSG3q|3e2@!r@)-0UCS zN9kZNHV8P5q1PAmS%N+&BLK%>O#t6ui9lE8f_q!QgwTzNyA4rb;*M#+1sz(}5%YHi z9u$BSv5iEh3Vm5H6{8JhMH6d{QC}h$QNR#WWfkoC_Gb;ad2xQB24YV*t zfl1mAOJhdq&&HVCqQ=YomM~UZ_!7t6&W!?CxB-NqijL>T#<7eLyq9F~UYdk7;U+<2%GmIro-(d;FvN$G2=YXBBq$FPPb+g8xIoc>{B%a8qWpMB<)!|@>~09U~&-~dNqHq#nN-9E{J&^{mw?h1Cr zSuNFG@ubClzuoK~J|wgR_ml(K`vx}`6y7|Jm*I)pATejz3wA&A<|ZU9@~R=61fK^* zm?*h}K5}qkBf}j01UGZxFN`2@X=ND}F+w)8&K;9o%$b(%_Uv<+;(8X-j%=JgXK`eT z_eUlytCWG*BkXiO9Fp$?lPA!Fbig2Q!280Epbs*DE`GP(s5N8oR^vh4i+;@yOqpw& z>}?u44Rf9BEJ;9j3XzR&#C7@2vuXE|$pb_}>dYYG~Hm^mnweC`gChiS}Y%@i5@EGzTw;eHSFFpuoA z2Xw~3mjN~be6->nq6J4jOXIMKj6RhB4&Lsk8Sil1Eg?oIhk9a`qf(j}WEZnw9iRb? zvPiJtaxhtTr6L!TmW)BBJJQakP-oB#$0{xoeE3Zj3r*P|BiAtmmTjx3ZcvQzV4V(+ z+!P1$*ptujDGnNS6u!(p!~%fYD?y!Gi(*``<=hgq>*8;Ch%c0i@LOqZwuMWP*3%wQ zb=MN{Lluvy4#*@k;XgwuoTgR2N`r%}ash-k^Kq}K+mi#KVsRPrk}!0NNP!>^AbgpW zG#1)V$QJfRxCq99wd|Y@l(Oqwo|ec25eJu7T^uhMEWO3)nS-Ql$mqj+)o1cdv#;Lm zxha;Blk#L^mC*t2)W%W&nG1P;`7%x08oNuoAAr#scSf|zTp~7d0|%Vqs|&V zXf>izhUsh6HQ77=%rjC;<8o?1v4Q3j_06hq%UF~IqDG9hk=e>$9!n)-xkf z{9=!SMiEL5)RGSv#S_$1s1xXXb4=xYj#x;}+M$vrn3TWUGd&rS_bhH)1@a-r$l9>x z6TzxXNXk9`E6gjg-JZwEExe)RHk!Js=E-k}lAnJH(4e2NH6VbzH5N&KR3Y|A7;)*X z;6xs8q=P&I(j}3$^>;@uRZs4D=43#nED7ktkYrMjMIR>@A}7Pqk2P?Fu1S?X92yng zD&p7+0>C8N!LBw>(Au*~2#Fwpd0HGMzd^(QS{%$d@H8*WE6Ne_6z5hg8s`Ef$Wyji zC~ee48WPHSD=1 z8B3p&Mh6EpPuAk^eRPtcD(HFRt0S&tK{w2-vJ+l!$h084L_A29YE0rjCm_E~;z>v5 zp|$MdY1*o4n!2VaHzf8j{{K{w195l4O1AJyOVPTyU`Ipvm>+~){Iee+F=Yvv*#nGMO!3N%X*RzfHY{Z4U-Ym zCi5?fqc{Rb(p^(gHDE_fS4mY5oeqxp`Ky;*&IGk|NCqx*;P=8y#rv$S&d!7cmaTkK zMP@rtQp)$cT`Vs+0V2r{Q!Vvr#fX7?XkA@ao}8nM3 zUZ$SyGB*zsX2bQJ4wYfpZKjwu*vf_}?n`5^6&IFXlz%QkJsP?|omKP*{=#Q4DX7J) z6^AoCBF77Kng&Z3#h=1-F&~xZR32sSfKY}TigF#S>N`W^3C!vAq2dWlbB-ld@fkN_ zkYXF(@RC{FbZ{pWXZ|oe+%DCY^3o94A%1s2Sv2$nRw<>@?VMh^Zi*(Cw6cB^yw+dt z)bdP9aP7zM-(MpQ*H3W^5QWT*w!el2!%`TZ+ofSoQQ4a?`GXRZwI16spr!l zeqn@WZ}l~r!mN|KMXebJu$4$K*x23DcS4&=>uTy+uB6RVrKO@*Pv5ZCn4y-c9jB&~ zo@<(&N{@7?(@J*a`dw4T@P{J&t(Dn9=9JB7QC@l=K{szt9Rw6wARA9Mix;TAk_$VM z;o#%*VaL@Lw2Ws{00U$}`>-UHyOhd;ttss+DVg62d5N+NHs(r}C1avLjq9Rh1s~Q_ zPQDh$sDHgHJYxb&T+fdE^@r6%$&u^PEqiG#6HqKRc98jY2q<=QWGgbRYnGw?RQR5r z=6M^fD0M7pERnN3dMdUwy(t}Wei7`2RWhav2s%vnqL4kUfxbFgwE&*~L{ICIwZA|? zj2%c6XW{TRj8urpZRWcUY|$vRT9qM#Q02~xY~C#Txd;AhI;yfY`Kf3!J-(Q?m zq^06HM?VTWs*y3>xx~~sn0SGkui8x}iC8jiK;6fB^ne;%Ubh^b8mnt{GRuAs)1XWT129Be;u6B+m^>t_HZH@$wECX9aAZtGG(uYUF&+o1MWc^pwbpy)iQ5< zFi);$x`capK~Kqxq1-Ap0M51);zXhwDsk!+(+IkzIJf-a6wd3DqIsetg9_>oUIf1F zd7^#bPyqSaRAW+!0PxkNsX7(KfdiMv@dAahv&#_9=An2%=JFdl6l!xKT`l!wEftMc zcR@|oCK9nbwmIbr1@*_v{}&(UnIy@NTX6zp)?eOx?@u6mIbB8evkssG=p^#H<7c|N zk|eNM_yrM+TEm#BW|_=nCc(#t2awrYyYwrRR73NqrMcv{)St$1I#7I`h>%`YkjJDp zTmN#iz(jygo*DZ{5|M6*6q)l(l*Gw$8m3xIq}FVIp-6lv0wPnP{)D_&XS$tZyc8cH z!NijO>H!L<%`PjP!jY_g7hnD$r(Hp@&{^_OEky}kbL5jN8n3n}r}13Tf&+)leORDR zfG?v)$L#PB#C@XGRSz1N+()G$hD4k^OH@I?NM%a(Dmzw*3K!$W1Ad5&Q?M(%Hi}l2 zfBV_|$-AH+)p{;d2n6UrgAh_0^H^tg6~nj(Ank2P1L8t)TIu zddqmNNK|+ND=ibQZi}JhsCX=VH`6F{KKwF`7_mfw_0-3DxC1pr+@w}qCJ~);LXu-w zBSb+TXJI&g#je>BV+K4!$&E-2;+Ptk>(DcF|JTiYQWL@r4~0)_EBN^xNx#vZqIe! zZX6l(#3v6e+CcN1+{QR0Es;9>Q3Ex&P0XOGG#ad{8G;Rnroz3WA%uwT&JU@liU$gF z)L!BVmus=&FA2MDv>O}@?J&}is;rWo?E;??Hy6>JFq;{_%%%#ym7q$a9g}S#iADSL z%ah#^;YmdBB~tE~DrMbaAPSbm6^#=z&P_-et&J&jSO}N0^gnjP5jh0(ktVUrRV0$6 z_o2ZJHQf!`sj-;=%ac%SCH*W>lQhRlSBLDR&P#+ z_<|I}W)WE8OK^W`OsZBuem-A(AwI?d+WKq#2{eVJ#xOBpH(?3_vQtZ_7LNb2@*w}U zL1t|Z8bAA3u4|ZSkUpCxRjrSpLNg+#LXvIx?Pu`^Ftc-&&6sWl>s2tJtU}kf~3|c8BNxrs# zNfKVE<`z9;N zbv>LaR2+~E>BOTI)oXS@=58o*tV#^bAI^z>C=}_-+%W#DfQZW>%5;YFJIm3ANSG zP7!9#kVW=E02*@h)$A>@0WD`d^|X_n`ykf9w&dBFKy0b$&$5DK7sZ~k1V|=Jc^hI* zAPmEEvlBrqd_YsLpaJAP3cFTGn1Ifb9GEe<%kUam>1x(DOkn1{4@=>lny9L$K~bx9vWWfyDqC$-q%o za}t&<7~*_L$qVr|D@uDrWB4pm>TWVrxq8qDVKIwO?_4F|U&4>qDu=m)KC<+=%tlc; z(hG=VlN%GASTpw4PG2dikV>45-cODqZ#^1kq?JN_&~fulYmnr~*zPv&qo3zvsnQj# zKQ9bA=M?ik?d`mO`1JX|{`Y@BeEfLD^nc-(e)Y@e4=-Q8Tt8L3yNZ-ND7&Y9NpT$J zO12@uLTwIw50YcuCnnO*6FTD-;m@dtak1>H1>G63JZH8?-nO@+Wx(4>g}NP*^qWlc zdJtapnrXnoM72aEfOWuwi%${r0*0n+lwPVXlimwAV!Pk~t>zUfkL)X(9u%LY<>aKS@?$l$3TQ z0EImq8k#rQy-PpOBq%FeOk|_SfE14d{K7}cqy{`&{4t1_MMaA*6==C&bPQ+Pp-rAl zd=<@}Q&2KY9|ff>^m)8hq65XPmYd5A##F_N$|0q(SDl2b6?WVNzNfSJGcVmnS@7KT zK3!jKAcxZeanUVVPLM4E6T?an#e)TuV!`7^bdQ;j{@Tyi`-M8Muf31+?60ri+~oq1 zPhUQN@7?=rZO8XNyuaViTRW~qo-dz2Nh!bb%hz6jzhE;qxu~ZUk(;<^A^bEa9uQq) zo~&D4(lL(zfhFs8$s%B}YT3id{4tUcNAzY@ZaNGho(Jf$G4&t*ql9JI_u;;lGeF7Isjj7p8k*5XZGdd-dz zcw;AA+y$W7y*QLk6gf$D;Z?FR*^{fbQ!q+`08!4A0x4UHX$oI?frkrbHW9du@TdVgd;?@I_E$fN(rqLC72RT^5^x?zDm-Bvob%c!f z?aRlidTnj;>%aP|O{CvnyN0+`YxK~R5d7Fb*D#cjUy#p4o0JHks!C=6I{Bx@>K?3u z*&QTFiH?Nylq6Myq-!-B@h=j_V#y-i8gC`z@x=`1e2xwa=4>T#$4#P)-h~Sg5*WzO zI)f0sefv^dyC&NrrN=ggtkJ1{%--EvEq=beT!SIn9&0ZfsYxz!i&p6zLvmIwrrKo1 zvC0hVRS|7saB57w23be|a*ob$vd0ieTY4_w#~uZf_=J%ar(Icppd=Dx>eB?j^JvL6 zv4W8^^cK$Sz{8+f;Wka6+(dLw{b;2}cp^*`F2>3efWKoXBQ*+<828YG(8OBezX;F$# z=rHFT{T}PmUa#d8-shNaENbKTQsi2B6uz5ZyA+h-ElMBfxQ@KPm`SULTkJ`AKlPZZ zdad*5Hf-9p7fnb`k6loI>DqGrr+(_|myd`3f*m&prlW}a9AlG&osI`qiggxG_!u!W z`C51-s**d9b4ZycD?S-toteM0V$YImcWuty#k=Y;WdvGn#J090a(op8&qDwyYs~`jYcVAHd7u4$C0i7?5j&8c*K@A*e0$Y)i>$6w z<-l(-o3%9`Yyw{|*EQs!8egu(_p#L8r)g2P`zSU855~U2W1$a)0j;~rM&b)Su-1!+ zYk+VD&*JMvGJIneY(<#n7haW3samGc5gB%L=jwrO*f672Gx*<=%ToH@Zs}LELXXh;|TT;BUxc=!koW^~|D^Ao9(G-TV zBw!}8#4KotYl)?MiXC)=g<1j9%m%eZ$E)79puT53m(K#iCYk9hd+{<+e*$&}2b5aY z2&_3Ru-%Kq(9y`+V2XzS@CM-~(vd4$L$hANyo`QlVOliU=NN%mkKJcJe|rDz^ZS7Q z8rk96$6&?dMRVNBTAMN>GFf}S69yJ(ENWo@UryPzh(=V;uW#3WZx`rzDe@w*EdIMO z&fag+GF`}VhCE7G%h&?sl@ZQrDgLP*T!_4q2>mH44g&xc8S9*J`^+}b>OFbY5z&|B zMc($@6Q*NJ3b_<>r`RZ>qjwu!Tf=iK#R0N@e~aLeoUd;ZZBV9P)eCFPMy%I&i8wtA zYa?bh#x$)u&NrV!O!c^xwP;}W(_3RDdWp`GC`RCKcAGJ;1ufAQpyR^K0m*0Kn@8ET z==9=Ma0~XaQ*%T_e1aZYpuVQ=z3}|g7vnK6U2eip7sP4&92!Eh=?b9;HlRz!o(xrV zA5IQFBq!Q&V2c+S7eh!|&0XP$RRp#a$2MV)){;NqV&eSd$NO0+RR09GFwVw#6oDkk zTapVx^6ASkuY&e&5z$;!f@q()r>ib>XUPy-O=gwJ@J|p%%Ho#YMjk{u^|pk7)#2C@ zf~6U!DJVO}9Z6=>+8lKE9DVlF1{N zql)Nxp4W!=k(I>Kd%f1<>*x^)daaRi8@{u#G1w(VU)%I(`;KJoU;X7@zLwN~{Pbzg z*|d2Oj)SFTw}Tsxk+K$-NA4n%JiL;{Cn?4O1FZA9)CQ2_O&6# zScv?nMj8PbaC;g(Y2^efwH-dk+L~)#tS{|l0zhNh>~2$QE1{PG@fSwNRyOTHYO30_ zE5c=CAd~dBH%yJFna#J@$fu0^E38=JSnvunmbDpqMQsj`L6VHvBs|D2Iq9dH?@0-Y zsbGGK_=7lEk|x2bcuTGjWm$LBcC|Q@Ggk&kdIC%*^Z+rgS_l`5=JHkAxEkBgexK4% z6D6r7=Hc1gf;9sd*DRFXcQuql48+&s=O5ogbb2mmA9 zIUm)8lGS;V>3or?fqe@3R65VP(x|XYD% za`S^%?|Xc1K@=E%_5-%QLQQUDSz1z637W?CGF;}L`H6n!*SyS3XL(IS-GV~hi43Ve zxz{>pcY8GLw$xIkG9x1*gSTTZ!N>RGc^n6fa~WVW$a4%nakN&`v=C?}@BnO!Y9y@?RT6TV!k`$WqY`V#=#$=Ezy-=82}0%@GJdvXT2moLH*9RQ zBSr+5f}LX`haSLirZPxEgB6mBQdgokw}8M9L?XKNtmbqLws%B;0B(?ym=Kl>KujLU6#&HOUF%=wZ%+Hpeo{AiC0vS zX$J{SlvP1q${~g#+lj33V1Z9w(dB}Am+RO=a^M2lF#6;mlyQ~KQk0iDhrtJI);{x7 zF0pljLW9QpWg=Lu=1XF1j5(#n4~ zKO$3puH?vD^ar;^4m=qVC*N<)P?$rob|9>nRRFo~Sn^^C*T6fJ6>ncZ*&N@V@9!^< z=Q)=9ZnxjVlKbZL{CK*N;SdJl4M7V>B!|T@7eY}LeMKoo#cNL?Mn04>?Lh7wp7$mg zDI!_CFa#(Vsrig&>xZvrA3V&a`%8bmfBfm{?9f>y&mm?X5o)SC?cYU;OEg7P&O>Vi1yum|-UAIv}N$|t$ z7F=AZMpCO(RMRpMv`FsKms-zJ*w2z{r|1evy7V&EEiNN$IJ3=r##1!TYS2%{{v4z= zHue{-V8JqugX??TRHVXbm5Kn-o_EP5rL^!O9eDCV2T3jE9^+$4hMN`?m9jy;Ocf=S zJcopJdT>s3_@xo0sknRND74DQe#%tXdX-%7r;#fnIDfgo6xkn=yxi+VMu{lor$rAr zB5`A=Vu~WX^T*ScvWJ%|ik0g!a-pjv>rYF4&Y(*1>O0v1MWok^8~O6MZQ-7(vH ztLJr%jwD&>?1k-jCoPcBJkPKe7f{9z!{Y^3LDm(|PO)nnJ}?E0t29D@@7k^(dLc}Q zVr|4Qn3&=?sn&IF~?yi1lz* z3gitgfDXeDYd7#GQ^M0C25KpU5|4XEN|Va_iO9W5JQ&(qhV0F%HI5GEO1_lH<{1Qy z+~Uz&glKP33^GLpJ3*tsq?jxw%pHs%7IFBNsS5yeZW7yw=$tKgyDF;SY_a9LQ(x)* zWUVrWwWQo&vk<#ufeg;9E0*wXh8h&t!&)j;e9sc2xsqAvQ~f0TwNLD!q? z%rLY3KYyTPBXgBoW{c2w&bY;LB=5GmxD2!Bd`Z1EZ9?SWGy4%0T@I1kXI4&Vw)nD8 zI&z*RbeFQ!ltC_Ggh4u=@pWjFH^`1F7t5;5(K_=2l!Ufz+e;*NSprPmU|g>OVsqfz z>@fc9@BaP|fBz4E_V?fRL$Ab|?Q}RGM-35=GOyh@Sq%kNuy7R0gs~W162~dcx$&lk ze9JVYXxvJXVwOT;&1~j5ggN7J;$0LMs-(oLRoju61}@h3o-X+^ju6krzJL7whwuO5 zum1Y)e)h8;U*1ns7)CdZHq5iw*&AK-N-K;c>FPP5mSO*50HL{Vnku49D0Uq}s+D4) ztuf5R;KvaXgAgwL@b>Q9553kyO!MgwkH-Tv1kP1ubvm77mU*wmPK(CKkSkYY>X8eEBhiDNK%pF&VvWLBOev z@Tee;V{Yz-v4;N63oo$-Sz#PwN{3UEQk0P7r@n_hrgdT-tLMZTF2Kn|@&y!n^c#qA zbx?E_b<~;@jG13x3V){CP*%FGn)%%@A@7jrKWB;%xpG*~8|me{83-$qEw>{>78|>z zjPm9lKNqq@FA+JBe^U`e3DsEgVxt|4+--e;Y zNGiuED~P8+y{i`2f=HNl>BMDWYmv5bXZa41Wo#0>4HqEST7V_{AOG3I{i9Oqa5y%*V@hae*vSvYP#bNhv8hdp3Gk}; z4HeQz7OR8+Tdt<=+MP6!FqIXfhIdEZv`V)NdNpVHQWklsR7NJ=45-9h07!^8DonhX z-}_LiDso-HT=jf=_x-ouefu|m|IIhwy?J~4IEwA6m#atsK7!LWUxpzBx8LvZ;W7nW z23nM}FtP;~@+XWN9 zo9=hJ!_D#JECxF7cDv*LkUfCrG>hrPQb9AtpmmOp2n52=b(d=G` znw(U72fJUTQ`A@p^7%>>zNpFgmIx%A#vloQQdeSmFea$~8VkV%X|Ci2TGQev9HLjjkb(yELyYLeQ@$x7O-9fPyjZ;~6mLU@X9aB?S@c^>AXK}cfhs!BFWlr}4%yD&fAK}9#F zXxOthVR!>nEdMtR!TjQVcyJzouJW()Qq8MV4uHKHmfLt6MMX^Q-!(IkxR8Mxegt;= zd<6V)IUmGa`WA6HGXkRPn&IcfMrzhVrkQ{*UywSyL2pp1E@}4Gm@*fH8O~lFO+3wa z+2zcA=67Kui973fjyeCiw0vYr)HEYP$Kvt!{O0w`VH`q?Fh4FeTq-nX40afSqA9Sl z!x**lIJ^{`OTpuI_aj5H(yE5LY-^fEX;dM4oa)^{w>zWTg?bm}tV2#^Nf9Yc#maYN zZq)cF4qRea8+p@=tbQ6U{o~!MAHM(LhnIKvkC#htt>+xnNQWpN=7bQ&Z^MP4Foot& zVdJ7s!!Tgls@B6WK0ZE{W!W^1uzp}9)V2pZ+5Z0057Ri=%QyfPN`cx%NUG9dgpua)QJ#y}xEK`W1`O%Cm+{sClC9q$epE6uViwWVLAh} zEr*{hXUWm$s%Imw*e}}>6t8;W+Xc1u83j&BS$oLfk~5pk-&yW7?#=q`s}9wi!ga7K z;~K5DTh>%m`6I60mQsG5lQkZ2nct&x^x5|I0A4_$zbV@0x##o03d7s#j3>s=-N4-p zp4m(cIZ8(HGc#gd0vKMi4;-8?Q*qqiJUcw^Y7lvknEcr7YPePJF@Ut@xDMxR5Wq2s z8@$vg!Qqv-KnnI;!-`#VPHnq8wl+3<-*y3%R&BD0!P<|HWmFkS$0KRQGXN~CRygFk zki0vM!^7?En>Vk&|KX``eo1NWy3Uw3h(n9>&N{#1W^CC(lp`MJFu@fgXB%oALWx;` z(l7VkpU-D&?Ox{g7+AGp)@D^b^yBE`90m1mnnn(!(pBxz^0x-Q%^2Y{^!@Sl;@5uV zH^2Y>>!+4{T{nw4gDMT!Mi{mwDg=~Q5;hWXmSqkli9ABIK~(br&wt4wgmeD1$s`8v zi>eGhz^}DH{SutzouQ2(PK@`cicLj?66aJ=Mo)?`G9z*Bwq=R*1XorNxNOl+dqKVTu5P6-Nkiwwe$Z%Y+?H;ADj_ z02@$dW+qS7I&Tn$mIf%Qu9UPfIIkH=V?^?07%?jGa+u+f$oid4E}|hDfHp^- zcSm%v1mmK}JGUOS`QHpNd7}mG`u{S&^U6yoFYxV(&d&TmcMrz&JV9% zfB&;@zyIdPJLl8sa4bq?Cm$o?_ynA4!i`Fgm^C$x#XJmSStfWm=xUsvUZcfhm2FH5 z{K7dIC1#KNr(){3>-J?)?C$Po8Kbz806@VFK8g*rzVr`OsTh^KyPO~IfA!aXD@E7H zbj7lm^Xw7iaZ_KFf&oRppd{-hNpm!nz||r00<8jwNG6LH0Vlrr$oyaY&x1-n3TEUb zRq>MXB@~~2Y4eL>t6D-K>DNorHP0$UY4~Yl%2*YWyg-*HW!2Kg5=&;@{nId@>Xa+u zrYQ!gh!KcJc_?f70897}efJQO;4(1;U6V&TQv^?|AU|0;`C-tYsd2W!RZ7XM6qE3k z!f=D8a9*%XQtB06v-=iHU8dNWc3vs4s$`Rgb0g?4ya^~}u|+dP$#LytyM3{(yf*M| zT5_OFgUpFU-@ZNs5Mr|xLPK12(4y?eli!}8lPTD+)b=aMGm)DdafLm$jv7Jj^`a} zj7#$I+2QzepMCuC&B;5D;iR92cP} zZEeGBeG(ULH8X2M3%?fbZxPUpL-gh0p9Q!O=PPzv&-h6td)oF87lee)0B ze0zI0R+S#D%zhqi@JY+2q*iU)wkVR5O{ucchO9lK0c=f^W3uTzij`f9`KvJ^8arnN zaYCK-^Iv@aczGC|dpJMxdu#WHx;E@LwN1xs!EV>9SKoc{<U(hi3|Ifp{JYjgVgy5zLm#dDdmO)7D@9+YZ<;qkFI7VKK1*cz5#JWkI z!-W>dIZyi5JY=HH{Pw zaRM-;qfi-?Y_&FBQ+qo_zjI|-Yx9d=d^TC%zCi^#48x&oQ(AbTyWN3?(7=z{=G@=E z{^8r-{LOz}8LTmpL||YWtrW{tg#=BRsdqR#zZf}VfQd|!lkpEu#8AkHSH*@eBykBs zz9s{(t}5mx_!Svb0y+E935B>$BGrpRz*+b^F%A8rc>V?%9H8c(R5FDLD(DiGNRy0QtR<(!cykv0M_G$lovOpM0W3By~1m{)y79TIE{v8g-U{{oe4r`3A2? z7)1VM*z&)bz_JgE+&end8+?I@y)qw0%|&gDAgX976@{Hebufe6#{s8)a{lGraN9dK z7hQMQb%(a?p8jvN(YlVwjpGHwt+3JhfI=UGSSI6}ldx07eyq%8?%K79)F$MGk%{0q z+8EE5^X>ip-Q(l!-Q(TE6y?Y>d%~bqR68ppY4o1)OJs*DiW z9MGb+c6nx?8Oo4P}K zI1ZnD^y%0?s%o9log@l`0_P5EuC7e8KtD#hW9!4jd%BkdZ?&omX<-Q3A2?H_m>1C? z-ih>8RO9UWCBbQ>I#ycX5?KREYv*mH&C}Ljc$>g@04}-7d%4idm}3N_z*4B~I`15^ zU1cS|t0gRg$3TO0nF6YWX)e;!x{*PT9N(?P_tOlQNBK!gpjF0BU+}F|GD#nv&bf@* z%lNZJ`8Ze7{C7Trg(!^Z!V3vNwv5KAMS&HnO1-z9y0SdK{#6zvNp1C>56Me;H5y>V z#1W8v#hnq|{j7XVi&ldpRhJ!`>SfNeRo|!X%!X=;JL%h2WbJK~COXHYr^VZ9BkNiw zICEpEi%XMFbzZ%+Hwss2OGyZ2n9YKcpGN3x{X0|6E?YL(V+_hdu~i8?gEj^T!UpSb zABV^LF6`dCm^6cKtR+mF-Z7{9-_y`8#U2XnP`1VsRfIYw!j2>mrY zo_nM8jp(X`kZnYEv|!kBSiDq*X#)t ze=p~zhw`v*1UN@X5k?yxu+uck1?e=Dkc$zD7|nPa{H~;dbdnKdLiL8W%RK?zA*4?} z{NNY9{POnxZop^_d_Uf!<1YBCX_{sjV%zS!ZePm1*zFI&jtK7cB7v9MZd#fqGR3jb z6~~hZavWrWGlzeU1xQr2j2KceA$g1`Xk&=<+i4P8yFwD8*~>wg(nBQ`$rsO36Kzgp7IfA=*)tgqz9SsS2V$MQWX# zh!BQ=&HKpzgnkPD_~!AkcUXa!!N+8bl7t4BFTInW9Z$R6?qGIRA+v%_O>%@7o>t3J z2_zGf1dQJ{;!Oo52evcLJIJkm6FV>HjOr%tXla9Df~Egb&VR+IuV!v&)n@gYjQUEv``s8NFyAW)#47l!O=tT>H%&>J zN4w)`;q?TMhz2xU4d?r^tlF-n%Nplt9-lpn^XxA$uH}&4w%f@x>WNv2cfua5q}=na zX)v#Ck>-l?N!xbwTzK%pJo(tTUg+FAFnZj`*)d3|j7Bz$IofFOHw83q3;=E=G2x8j zs@Y8uVbpu}Vx@S?-Xas{Ou*Tyn)1xG)p$6M4B4tE*q_N9ep&CE zNqqcVi93r)&Q;r*ADBdmlm-0cf8;k@6*t$mpPNlsUTkbH;doVXu0n1cOSThK(dXJQ zvm95~@HFf4-B`=Hv}~AfATdg!Dh8RHF=gwFd?xsH63~0~73FHov0peH)69aR?~$4K zASzAI^XCQf-hu+Rj1oj@4!BWn*~vnP5S)K|IsbS+J@k|F$;(Xi^QEs=nl>{zZK^_8 z_8-6a;Pa0^x;Y(hZcc&+mrO&%-y=%SGCMaL)6(~oou-YiGi|*AYp)6?Y_c|Vx9eU! zJDxB8;f!hSEBj-~B~?|?_^K^T*L7NeNkJK+^pe0|2|lDc7-JGPEhMK@*LBzJc)w!e zg~vzjcKgFLo%{3aw?FtWoQ}_r`;*qq`SLhNw+>^8ZP#k!L3d0uRTQS(r7({S4AsXFrt4J-Kt6d%z+XF)tRK z2PHesPf8h(Fh%C6l8M2KS8Ewnplmoxm#!VJb3BqU78`OeHYpIva%7owU91VOJu##d zoADvr*g<}u#HWk|C4$~tf&2T#rCU}cAm}WT$g`vw1abkyhB0uzttq&iRi3PZ(_-GF zd3X$hImKe8H%s8i)TZ1o=U;S1j#|!~g(6;TK%yK(#iMhUj;6Dt?fdA0c;*D=z^%~ZX5?|-6RCzH<#(V92cB(|Ei5u9z2EPjJv&}{=K|0vdD;b4 zRc+gHZj^j`%gDXPw5QWiX>~rI`3&?DZbJx%4&x79;j}IH`yJ~zPGdE|J3=KV22p#2 z!r**Eb0N{Jn_aWlWhHc#G+pj*KK!`a@3dC?(+A!0 zWSWiy5b^pVBNrR*QXs%+GKQwE$vmEh0WZcO2G4dZ42G&|>M!y*@iKFT= z=#_$$lxR0)@on3cvL(50h=6U!drb7bq0m|=UF4S%d&cWRsl1+Z<`QL7vI(yux@j_X z6Xmz8eb%ZI@iWLBDT+jO1p~c1?gk5)5r=!$6~{+7f1KYlX)AY}WzSWQi2b^iyOTAX zTag2!Se{~Xy!!VHWLgOwOeH#Sh z8K&vt+~lS{cqf1hHoOSo)x+h-TYGyRC+i6ucrVEkysXmV3sXXbb=d6=hr{#J%~O(c zx{;=}4*)VmLCzjF0-Uq*cTCxQ%SX?-eL1`)Ia!7t`5SRqlb!l$8b@ngIA12`(Z?lc zNGo-7IyAKryb?yA3b{<&4lOEW03__noGoe9JR=yH8WY*uy6M_hiH&t4ab8eV!8-yO z5>H>``FwZj?~cc3pMU<#hy4NKx8!$PeR9};u-omsww`Cxwl^Pt*0sP>z!C<7G(2Va|}T2TW~p8Z51nYbyDcxIsMBsflN_k36?WyC7L zr#UP)pg=G|Sn`65dr%x!vuf$pvb&#HcHt684ARDA+nFd^RhHIHbB01_O21eLfzc<^ zfGeSqRoS7mQrNdhzvMb@ds}?&E~{yNhi91k?u={KG~msEwIUXI+wy%#;^XHKpIWVN zuKWeBLr+$9WLUaf<*12zj%9mSgO!$FuozsNNy!%2M2R&6!+!Pu{>OjKCT!bwAHS?j zI^gR(2`kSTq^%!Sru-{ta+lQ=xy1;S&JFi(UjOjJci(*T_1E8g{p$6b%jGhR(_~Rz z&72~O{M=bA=+TXXod$2MyamE>mOc;X{)hYV$G4Be2&)PKI`cRL+$F~7wgL0`!>3_1 zH_vX4$B&-E&tYe3U6r^+ct?6S3_}l8*f}p7F{Tjkaiqi~K~)*8z>^68Y%>5fd8i`J z!*u(2h5*AMV-uoEMJ%E=`gqutvhi*oCoD6$EiyM6e7a@*2-#9vNU5#su4&o^bU^}J zF$AI{DTVbuE4#M4e|S5Nm&eCf)f|8R(=R`MD$M-&JdDS(JZeDGtyn0DEVQ@xZ__j+ z+Xvf6Kh41OkMLE>xXlUK1VSx8-Jv`b19i@j7w9z zX}YKXYpr6C3^yk79$7W`-~{Z`(gO`l%SjrXrinatI!tn)jh3RA*IH0EhsLM~HN|ml zQA=^UQAHwdTAMR@PE4h!IwupA$l!JBz1+yt3;rrU+TY)I&M(9uNw2o%^Tx`3J4J%) zmEFH(=LQ$&o^4cGg{axjmC+|H@66GFTW;$wKW4@HHsEnm)|PX`S%}-zj$xN>RQddg zY3J@wo=7C}Ig@8#Ww(tav4k*JhJL&ofBgQdC$0Fm-+X^Q_dcWwroV01)~&%x`|$a3 zZdbNlz`={YPtKs6VgQ^OhUU(oAD^4 z3fizrs)%Gw?4xTGHCli2@t4j`Ph)qUqEA=Ga+;?8v2s&Us!(XiBSTPbDu+vj7@{O= z89|3>Oxf!vOh8tTeuTwCvfO?8LhJPjCQkoW64#4rWe71n0Q^ntz* zTEj%;^4Qawnsd#pwPJaqkH^zo%<_Q2y&-u5Vm;lIP&5Vfur$rUB*fG0iSuTaTx}xa z;46`T3I#G}*eBH2OiR}(o8r$Ojj0x#I7DAKQwv&DDzFe$k%t9kx6$0I+C&wupW7yz2k__y5AsvoSnm`Z!;K;aPiz+;s(luI6*)6pF@US?TJXee>hjfBZ+k z_a}e)*I$4Aq}aSnQwVWp>pBFeCX7Tr`m~Fu!ZZy1IE}+NTIWeW9<6&E{X_4@kPv{d zRYsd#*X%k}TV2<-((!KV`mo<0o@$Vrn-|ZI$3v%$D(4dXxM}cyq9KEGF5~a0>Y1)b zY-J(zeL2$*1Lc0~z6&?kKVI%$-`&2seR%oiDGCk3sj1l%;N|03dd3#gnguBBv^2b! zLazl~u!yga8gXzLB2J}F$NlkeDCvwb2c=4R7<-}ZR3E(f^z+YtX}>>et^f7E_)ow7 zbH8+~>oi<+G26%6DkfnFHMk+f_7Hr#ah&Z`$+(sxSa<)*-o2XcU(aacSQLNb7^Vvl zWn?892z{{P;qHr6a{2>&7{v|8N$4eM16sdZOsX z$fuD)DR^^aVj@{*5+fE(LshLzh|Y>pVN79ph)ZbLAR}Ut5~3uDRnvAsI%2?Mv|7K( zd5J`F@G&jsuz5P7FqTXRam(?Gf?0%!our%d%?evKgoF`2nNYKkc*VVp9)q$;I++(1 z4U4&zD?xD^8LpS{)vTIKU1rHb@AVwt#JfVC%pBmxG;A?BDayRfInUaD_;nk8nMt<% zn8mJ}DMMvGMrxk(M`aL(0+kIRvQ+=~fBd(M?kszAwsReZ6CZ&>058zC^1W7?0he0}D_5O5{MqU)E=vd%Uglre088;%B zp$#v_spC*ciUD=IL+dR_|A>Q87^c^;VzZR4U_#?P5U;bJU?n4A=?$-FR<$Cb527l_ zGkztvm6;{vPlR=HNstVVaz8{p*{rohyJ`6Cz*9ro%caE*CT;u?S0re0*}R!~9sjbW`+wYRtfv6(Mi)|zpC9inpdag;ABO4D z+lRqkY`X2I^Wa=e+US@-3&fId4NCVkE1|VWM4RE=(bUJ??g{%j93ZWd%vq3vsuYxa zq1&^_XJp~k6(ppfmPd)7`R_sO#6Em_@E_(VG+>brd~b!t+=OobFiw(hggd$Hgz}zpz$#_SJ+His%O(0O zo;>oalqXivnvp+picXFR+y4EP;dHrr$>T*X%$N4xHcImI3!W@z?{QxiW|7~$s_a)Z zDG%b4kg|5`hsJ2> zn6O2}FodE7jAu7GH~4rCvr8BS6OgJ@ZPU%g-1oh9RbAH$r^bz-5ei-k)S$+HR}KJf zYN_M`PXXaA1nF=m=RGyC8V3ihsN~biF9N7 zpVdWmc=qvUzwjH!)3dTDy4@c9<2m}rTX*-$K0bg(1Qf}Z0jZwtFO`shv_}IPZM8NQ zb6Z4TMT->CbZ&gK!(hWSqa3#3?XB~1|KX=pKUZ3#4jq^2kFg$0RTr=ptnpx#wYT5z z<(rwk3;WY^^p89ei|WM(!8-39BH1{4HvzvC`K4(jm`;7ub}}bYuakUyS(S`FTn&sk zna;|pN-0M9Fj;F7%8#Q50(kQ zDMuW<|CQxew*yv)O1uJt^PoHrvSOR8r*n$5u-`((8q=1c+GO! z)^U_=PPG+;#%QjFXLFq0fNi8|KL;w%sP{``%ID;C&c|5vhYf33#q5 zuXg2b*Ov4=%Z7)@^P%u2qV3_1Ts9HWFwNy?{hUH=jB^1NFl7}Y2H-F`m*I9UBV{1b zS*(FHr^!uKsoJ&~Cuj$%5>b!57@_SY#I#!D@#P(@GrX))Jhwx_p4wE1fTJC-iR4Bf%rMak^2Dl0P<8lXs#WlgNM3q2=LiA~|E37>V8Pn~J^mI!L6|MoS!;Y=2 zZMNHU-La+&w27U`_MBBBkd#6sS4h&F3OAT(Q}SrLNRyf*AL=DD+L}L~t-%WTwb{ z=rn-)i{jsTkHmN`tjLs{3zLN`^L*)1I@Ko0r-k5r@PcQDbAR3)T3Vo@C%(3mEsjCP zGsHCEbUju(wqkVt?y)b5^y0Kjzh2ljHKJKzab3t3!aCNas8}Ad z4PQvG4j~i#QYBkdGI3k_kF&2L>J!LTDD1onK$-pcuHsT+b4G}`Ur7YwV%F6RWq;f# z-E&w>iC-p@~BAd>E$IE63n_PQw3 zO0y*CQmCPNN!{L!LHsh_tx>bi{hH{VPnWutZ4Amix#!P5_~l>tC%XRn&5kR{7rkw% zm|1A*wws3Tp_$7JKVN2MhX3;A_netC)8sY@OT`Q-`F+8aFP)+q?ntuu zv*#B}^YcrKGg7s`di`c~b(z*j;}f?zb{o_J8P^Vq(_%In^wZD%!^1hMl*;xn;cE4$ zvfLHriX*iAD#Wb*I`TQ^?#4Zw`VCHt7eS3bqU)g>pYYpJc)WX8#%>3ls z{5uKTo;(m0!%1OgVWm{Nr)Q+94;v?I2S)c>don#UGc`N^!3ReZe5q%;X1tEY?X4kC zv&fD4DFwL2{(dGtiub1O>&4#o3zz+bz!`LWi>(z=J4&?Xn6JXFWbMMSIte_*!ZNh@ zXidfaM)y3)xbABC7?6eQVNlRIzT|iv=sjrANZA+ClN<1D*x2U!imTEQklKXrVrVh^ znlg#Euu)5y0%YaMRTJUac|ZBxAk6@$h)#%RK5Ge3>%oHEC0KdI`%Fq?mUyav5clGc z$RVI~KDCkcDx;1uM&+DD9#P+E@1=u;6+Xl?J$~?T^4|S} z{r#H#cKNL_v@@6`Nwi3N8(t;h9!93yxvEm>ZMgNoi2Cqf{KDhWdU$eTG|P+gE@(ss z#ihjsL@woE5@DDHc3d|TAGpbq!)-(Clse0h6<`s(?`-W!HM)91Ygb-#nfVcOZrAs&e) z87H{3vSQ)!OS^C0eDmw??6|6@&d%L@cnKfwO<%mlHcreySd?8Fy7-VU9S z=?$koqBhYUON4x6u+|41j*WFl5AT_u2GK<5`h2hpId_^X?`}g?qXK_B$`eMO$w}NO zA_t}I%yX#xkl_YxDu#Cib`1G%7`UUixt&{Uo?9Ytbt5KWux$Aod`zG7bp1d6$NwgR z+lae}AEOU_s;TJn1a02+AAe8gaxywtD;XkDxL--eXgZlN;}?(b;k*loH9;5Eca`9U zRLHh|K>#Zj<2>nmJc3Oi&z#c%wOTli7^zQphfRZHy+611na&CIU0+*STAG){-jUKn zdLs}(<97#w72`V_R}{B7Y3g%-u;>ZH2?#}>U(xC+eu46~=AnVC|Fq2UOKP8($}vbM zjkp-E{`#7^G9_q`6#2M7EE;|;c;^3ZV08cF$=1L8&-_#W>c8|a{p9DrSl@auGdGLs zRav1%k*o9Li=&;(gB?b1BLKc7AL%gk!wWH`qu@%i9v++=>|c@(Nowc*JN?~P&-M?_ zFFJxlP70qDntEMa``qTnJOZ@wV0vkJc5!8TVF^z(y|6I7xTI_=%qCrkev4L_RZi9Bwh z64^T{8yiA03OW)f8~H23wFN?A>IP>Sd&(TOYCWk-a_&b@6uqdEKlCSlKM_w(-bE8S zM3dYS;*$u7{_Ec6BsG|u3y$&Jt4Hun?vG_@l>bj_qr%s?Gw|=%sITujH;?Sak;3jW zDN!N4r()07A){$rEZBdija!Q2v2HRi%H6kI(P%3t<8x@j^O@La=S;%3#4Z|f1~n4D zSc#2Vmc?$Gyb3HQ?O#&hm@-wstFvFhPvson5+i zHfjPiXr`q~W~{jxCsj|zXd+ajWrmjBAeq1^D#FmwQ#8E@_%{{`+@}(wlwsbb4X-!_qO&QW}$Bpgu*Cj!(?a&u?zc zuB==ae|K+TacO>ag_2dTO_(v_ziiHY8lfQBB?Vh}aaF8C^YBS?pN*6ZONvYy+^(m3 zv#_|V&*tLf_`pQE7D8jLM)_IC2m~Ef8$;A`c*Dr08}>Q2dG`lMbC(Cx_;9~y#4V5W zTj;%?o1=l&lo=Wp?k;MB)>jlh9@$uOXPf}s03KsO8xa`(**Y4Ts*{b&#UQwB28v2# zXu;=J-ltRH^b>uUuL)wf8)l)Yz9H1d%{PM*rNm}`O(R3gjJ_3v`G5Y;|80ELLzv&o0vUhFLG}jz zyq{O;u4r@{PgJ@^g1y}g+hH^58B9n)V8wFXCJ*yb$&~HYRvn&Qwc6P^s)R#sK141- z2Y<8L@iYeS+rHT_s_(66h;RH*!@$pXNh66rY$&>_r_ma6PAc7hW_|EFOWQaWd z66uaS1;!m<$W!3ecWs(Kt$M7<&IF&qwC77Il_}8fPD0u0@)9lr2{k3LXwJ?SuhwAm zuJ6D9iywdTZ~Ytp>c8;M{j)#$`Oh{VK3!N@t+X;)$qbuQWcuvrN@3LPmuLhdS99+T z_DEZs`HPO99HKr+n(=05_t=yczf&9gi)~(G}DZ4H~x1O<29OzM-%1uX7S#i!A2ROyyKWmha`&$=jZs8#efnP;CexFh&1nX^r&BIoN(6}XY67x%h#RS2Jr>LzW z)UAa(&4wuSS~ZRCX&Eyg((`^<`db43!vwM+Lpeej(9IN`8G3W6rnNb4s{jc zVpvqZT#cBEdJ{u&rcI)5=J1G6_2$DTNR;m31VGx;5PQ5vjL5*mg|xP4o-l|2Mx8`I zN-`sY#bKJo>8C&bT+CMpCrbEzS!``9L*>@+`@BB7R=dSwU`UGwjt{76#IzJGxhb;cemxY?Pn?Y+_TGe(uCSIB zZ%S#2V%tvXWE_e99v|$Uog8bnW~`HNhkwZgItrAAR&$`@NQjf8kt9sf(4^#-*dIcS z>e*17P%d=Aks2^vmnC)YJrj*~qMfau~a)!nf?MuD+ zv6ZDd{q+&$On2bsR^pS6%d%Afz;usM(V;7+Cr&yQd+3N-w4KZJ;5Qo1DA5k9Xm9+U?y*#T@4=%3&uzKEHG^ zCRKcfURhP-JGB!+&9j~sDB0B6!2Ab@z z-RDv!qms^I3-eN>$E+h{DSXTw(GZM7fUSrLdVX_mh-lm)?<6S6JND^iJ@h(pKwJLIpb_NRMi zr)HL3NgXk?8_RQidUbgA_TcR8Ia~X}494sp7wY-hh2n%@r7ID@!?*D z@4bGBk78r3m0niXZ&!#A=r;PfpcsT6naZO0k>gY*Q5iTV?Fv+(SiK>U8 z0trKiKKQQQWEgbvqEv3_dt4bVg~L+As#Z)G0mtMZ zC($*;BEzi-XBILo49yKvH5=yI(&_`e4Bp|x`wO%4PaZsevhjFrX<>bBS#FTEKCJ|ko)F)#6b=gD(DyL`W3n7zXy0;ut zXB0iA&RO1)4g{ZDSQ24xA&-s@Oxp3$p)A*6V5Ato%k0E%={yv<1BqejKH8qW^|A8t_m&HkEv5CfU3&??yE6Clst*FH5lMD{u2`HVl750n~J= zd7qJH!=y(5Y#|Z()dOsU<%fiDgiwO2#a3;b?_%B!= z0+fVRL*X8enjAy8iy+st1D*m>SMQwA0mZGkP0--gmUec+a8v~<{J~yCXt2{33%5J^ z-TP8>**8%(dJ{Z26|GQZ`HF;1l#;io11XzH4a{{Ca)lj=<3M2i`PZIFZ{Zk4rSjrG-s8|xoEc=*NVUp#z({w#d-X#K~ZK6$vc0TK2O4rLpZ;mE-WaBJ%^2I+pv zxr#8NUpYBKl_>?3S)URtKZyahGrFlh4a@o$dK z4^A)J{`I{nn^{~CnY#IKb#--dZF9pctZb~4(GtCFbaV6bW`TYl*I`iHhUOL*r>5sz ztYdcihI;Slz@5^^rY5{5Lhuv7vCMrqNd+^@9qblY(1)6i6F1ZyD9F{&^OQiQk!KeO3H1dGYyE;5g( zk;ojxZEtR->1N<|$m38XkJ1l=MH(Vxh)vVqSp6b*-D~iP?@&{p`n#7U@EKe1AK0Vv zdB>W3mqHB=F52%<)}ed8tJ@-i{g=iP1N^N#X*$N3<82c%*t}~vkl5$^4O>~~InEfd zL@9GC3J6>sQHt)o+1`Hh=FRr&H?I#5j<{JEVOVI5gkc$J2A!nSBSa7!z$H0{ZR$aF zH`N{O>E`LlBzv&FsqxjUcr=KF1JunmsW~FIdkW==`wz*8kmU|6T7bzh6q%la`9TR6 zB}LbGATrEX!~_eyw6siTluUXuchLf+IO4;t&CRvdM-Mkw*EZHRA3pr(vj>kpT3L}J zWMh48YkheUBJ3R=9UZIserzm1|KgAQ!p_YQ2SAv*%5m9EtzO24NpVEdLEV|F++*Gq zoGI_|<0E2aad1@>^;kFBo532CID1|$I~6;k>$6MqBuf+Zr+@b2we{6XcPMqz{JgXR zwTX~kOF*6WAcsTaoSAL!Dw#L@;bPzewHn8}4%NGKTwir5ZD|woN+;PY#!guRgGSF0oW%%=4wCy!nlVHswupeJ5yTNJkNYX?*hti8?5X1r9XS9R`Gw6c@-q@~=wkF= zc~Nvxu`EST6j#fEz;{cS_Dw^W3Na|`@EIq>h*fb}9=QXD-_0QF(b9TTAEDdcI+K_fWG_g`SVxbK7aXYTlo0y*?Fmyvk8^5rpN4! zm3F=lRLHuAx>v{g@QUloX>+SzL>O90=t+3n(b(3n`Rd!+gM%^FQP85rnGU^AzKhE? zxVWdmr3p%NTY<1l?${XZU>*Z=0q+!R_iG3c5(mWP@e?QEZBSBlE$8pm)s;t&A8u_u znx9)*UfqDnvr9{JOG^tY>x(Pv3rnl>a%;>i-v6MYCDzu~{{BDl_kQtbe{X4H0}E#g zWhC(j1--MVV4HrCIf~m(F2u{BXe(88MRlSP)IF6$5-mCc%sMMJjih^cBC(ss4zsu#yI8M!krea!Hq+T?m|*G8Q_} zgv7)MqGQ%OOd`Pbo~gM7D`|F;oi@8xo$%qOw~pb;oHxR?hNvpmH;qE1isU9uUq75( zI*QZpvtC2y3S%Gb$9pa(aqNme*L%yFUjy;O|2VZwhO=+-fV+$!hZ_p3m0yQHQT)8a z5rZR>7CT)CALh}J5VW`gg%i^Ci zBNjD0TKoZhwD~x5RnIi7F_467p^Tc(730FlTaD?~k-WRhj)j2#VZNQ>7JWK=hDV8n z>o1~M03*>7>7mYacO@kIpZ}v?g&UCO!J$hJRT@hA_7})By^Cf%W6p^3S<+@^pi}CJ z@ZQ^g^~+!V`mcZW+poSueDa3{{RU(N`oSpp#kwA-p&3 zG5{7VXC$?@+`qM-8$0R(F-1@yIg!sv^%XgB8LQfj&4@t?sJM+vs5jVh#RfDBtLt)H zWPwLsxc>B(#0p$%EF`ixC>o=gIl6Ep2-8KR^e%)T$epd*;Fh2{0AhZNN?d)`R7B8pY7OtP0z zR++Hrxdn)Ja=1q!C7%;kNQYq^9tYcIeD9g#a7iQJgIC##qjcwms%!PiAy8hX@R zWnVE_aN)0i^_Tyj|Nmco_iTG_|B(I%S6^_@I8To^H%$U6`w@J+_329Gzig`sIcesj zKkX$J+Ab44#YlS(6Z{#Q1!whY_?QSFrk|0@80P0Fq_f?2CK*QyQpMb&7wx|`ts{Uf z(+35#ouCl-U`4ah(Y!!K4#zq*zqA6#iJEd4F@6(X@Ik~qiKsU=H`D>5-bQC=Rm4;D zQ0mAE)u3iL*~~q7yt%fy9ysm{iI+%^Oh|88)0>+_TufRAlwyC10$Ck!3~E6h^zXt& z7t-;_V0O_SlHL$W=GB3E`&FM0i}v}?KKlHNCqMe^(ZXB_Vp|U$YUJ=>&xm*@|MIW? z>YFdWwwm*c^N2FHLocVgyWcq`lll|;YN@kFLCV9OZBXOom>_&!@2^#BI%nL#DfWKp-n!~LDzSIW)WH^5WSF@$FLqrZm?DFie)ph(a| z78^WE2-aX;axs$B_;uh9`h$DXa?;3JIPFD0;BYd01+tFG`QxHdzM5z4|fL_Ua} zEB-;2(p7NRe4n0|b}ZPGf(Xg{bqB-!C0^73Q9!Q0nDb*Y$0dvT&=gIig}&Q@gznV! zc34K0x|IZu!InFU{UB+8hKo~9eC8xCD#F@*NcWO_Eo9%hJ|KL9H}#ix znt@9k#P<`;Ug+y&UK0yf2If2xS<;N<{>1Km%%b1oxZ{to3{)Mjmec%+LV}@J0s@iyVxq=@@J1%N78lmCn)QLw+2(oAx zlSmf89h*~%{En4}is~lYDg~ylu5PWau1)o}N&o6S@rVyx4ut~Zmsj{G=DhQPAw779 zwp(9#ou8PuaUvAU@yTi@+exrX^5RDWC)5T4i3q(CgW}pNldU04Pp>V_a~F8Icj6f# zu`A2IPCj~4+HU>SWqKv_VQ1&fiI}g-1XxTc#gMRdFTv^YWq*D7{DTjltgg+F9OF9T zEzCtT^NUA&+g)sW@^EjjMY9R}xa+jqWV0rhXWD6vl>xVvy0hBYii(qM-ZrXzM6}T> zUWgX-4@l1tMk?|1?coY>!GsQ`b)?h?N8@X5i`pe$)2w&J7eChVA)U1rvZUDFZtB@= z$7wCWUwz_+`>C{1h&>UaocOO%R4zIB*d8ZTSDW|{@E4GGF{sHjTrSoVmW~DI9I*~E zzoUokVEdk``@RN&{g_Muap)g0cTpiZb$4r4hj4d)WRL4dZ&RMc4Z3cs@@3c`1W=YL zTXQTzAsOj^5uMPwKRg<#vk7!hhTbYfvw4}Qy>f&yCUhe5E>apKM}p0N8)Q`wwLauaT{L@^~Kg%$Dutg$~M+e=)c+guuV)T9Ug zFidQ$5|T|^V==BSrzS2^vzV5{6C+mE%RI%kh?)h~J1P{meaTP`Eez#cjlZQ=SV8^+ z_Yj!Bk&MZtBc3mfn^jR2EYVOO9q#L)MpAkuSrj2ym~Cw?EG$u=WNTesUY8NQRYLfT zIa87CE}79o(V~wW3z^jdrls-ey@RykGaEk1?Y#G1NRC2MKYJcmf_9i-P2s{$1%I6! z;X32hiM5+sm@lP%X_uDe;GQ_Tc>G9dMk3)l0wna`{YWpZ?#kvCDu%YQh*?Yg8y$}} zb&6j#$&{zE$zXItO^y!suzG_xSn9a+m^fh`!FtjP9u?}(xi>B z)!2H~nw;jAXxXvr&dqW_!)w+fdHQD`K@rxnoNqC=;5ou)aZ02B~TUVe6wMZV+JF2%l8zJcr zeTWJT{G9UOwS9BVrwBH^TF%Rc&fH^>-$Lfyoxr?M$H=LWHq|y-khw|7Oc*S)Yvb+Q z-B-t zTxm}$AzsVv(30W<4$jTbtSl@oQu0o(6qAAQ&CLJ|v!zSM8zUw1VGvDZZ&HYvh;a&k z?#W@a+&vXlzqmS4-i&1|>sVI(A-(E}zQ|9=Jr9Hsw3e1PD0Yn0Fuf!UYy3XKs0VX& z{ypl`pSLk|otAh?48*A-T&GARtuy)4pI_OQa2YpeM@SWIi>Nuu7tnxA+?*V(tZukj zh`GtWygWcFr0lmnu-}<3Z?yjN3ybI9JQEqp z83#M_`17A%oE%bwZ+`sI?u+N9?Bwj&gqS%~jqT#G4QR!$qENVKvjcB;cXsDjm!V@K zzszT8-;9Dv=j7NhDL7FgCN>Mn!92w+%8GeYSZoy#N2o5H08VGjQe+u3pNf(x)tiWA z=Q>Y1H|dhW0h<_ALrR7!dI(~1Mzk=zMO-cQvR=hWr}J3a>)rx~#Ex-mdS?Zv5cA&8 zOOS~Ne+*unFv1pn*ent#kG`p4-ksMY)fw6vwTKnwa^dCvg7 zRYhD{#Ec0zwii8=R3$Dls6kZDss2U1Fh&}DjVd<;CD;7uJ`<5lBT-2;;f1`#--Y#y zsVrthZy$PRJq}M3QsqrHl99M_Fi;~LBy5qNA~>ZakF)##<0_ucFL{q2`8c27>ujh|G6)+{a%6x0Y@d9Y!-UBw_! z+$L*kEJ|g;>K;e?;{Z94yZz**FfXO;oh$(~*}THtj3R=V(d!Nt)lGsAI`m+jJI*3s zWMJXf4AvWIdbJE7!itAZN4Y5gglW8ks@~RIqBj>HntX* z7L5q=1@%$~V(@)#4r27*?_+01TJlzfIK_&0AU*q6k>l&OeEYFhj@*}R4f3EoCql{F z$x}@EWD8UW2M0VG4gB=v6qyiEW};Znb>L6__@``un_HWfO}Ei*Z{2zQnxgmB%V#wA zytKAozt+AtdiLV{6dAP{%Q(C9;)S3(=J~iR7?2(&uxn;%kyL8F_<5Psz1{uPp2bH( z1Q3`MOkFt86p}>e+KSrN9<$!#ii0xl$<4_~r-i#^)H~^qW3q`zG|IJn5v;Oy8P`qE zRa`BGLT7}bTJ&V3Gj9o8Oc0WXOnO+1cUg;8<|fatwM-|iWiR@~CMaueph4}a}b&C6#xY~e~994-=OxYr|SRaS-ySSI` zH0m=pwj9lOPYp#(rZiFowNgAW~`&Dt=7i2nOdK0!CgNhQ$Q6WC-u&Gs=PwTei!_cUjF3d*p{Lc#4I&1 zCGk=fXkXw8j4nh2tr=7xJZ)G807nj@slkB|XNW?HDz?(e!w|ds^rSf`jX|P={1;Xt z<&3)$c`0WlBc&FS{uOD!Jtf-9opC~m$w|yUxVhs7J#CbMTJA8Fm*1&n@@t#k zz^dDj9;5Ou7Cy*|Kx#^w3={1)eKi@96D_Z=G8U~B{G^N_9^|**~8s8bap75g=ND-`1H-H=?gq@ob2u5m`pXM zCKey$n)v5O9Go76cPnAMJu1t7)XE#3iSNIZEryB0MiK{Z?etKjcDTPBi|{(7jsy$9B2PXVrEA=V1sJ#D?BQ?(b+sq2-Z5H7JwdNnw}K3y5(Y` zjtonZf!wC%i6Z3|i#zpCr2Zx@Bfn2In|}1=dMfJ4=-s=`_)b=9Be7q8%52}u+i!mv zgkTg!99MGWzVtb#HJJbTzyEi2u_;j80fr0HyTLn+w#L!zh3m_YQ5B7G$Dv#or@G{J zX>+JmsE6l6gXZTE0~rM^NmAq_T&r0288<0rft8>@k%M$|VR2Pf)2117;80&a5IJ1Eg zZQ7hsyWX^KKu?*`msZqS*J-;`;>m`<^&aWT;*F~#+As4A@+fqjV-~tB^@S_N%J#2u z0s`Xt-?9ob`56wyWrUBm*l_roYmfQtm{a57K64mbAn8oac#q62E^Iw|MCLL(K5?Bo z!`eh+4WTzSH$VE(k2o|OA08taYB{SnwzjrOW-_9~-90vZ;w2tOXXw<+Ku3RM{m2yW zI!(yY5yE%Vj^G{<+hN{|I_t*VLLoqrB;+P`mRffUl@haMJF#xfDo_pLax7qCP<@W9 zJtf+pO7BMC(!1di8q?(P8}u$;DXq}wrceFRE345|f2-Z4Ii@@^8bq#jQeVcE@VOy0 zdhH|{7r%H*U!O!7xpWl?=%f<5MDP6wD5VOEFmmS8mI2GB*0=~WohllpZ$2^dnWK_=jUaE?+6Jl#kJE!s*1;Y?3xqhnLHo-N4UeP7QA*> z*50mC*{DGb!7wZ*#0=`5cf~;e==;-+nVYJNfjZPw*v=KKXQiTW7A3AlWHuT#YK}>E+qG=#c7pJKfFklI>;)2BhgaDN%J6TgBleoA+A$RV<7*BEyY0K&HI~3 zzsFv0zfS6CHJUNRA6(+3YOcn7MkQ?WEL_w*VPm}z^}40|N+B#_!7PTAjdUNV#@68+ z1KZhF?Sihqr$R!q0Vz`J$zbVZQtN|bn9_-aGzC}G;@1l8v!{(F-qkc+9`Wj6BXuHW zX}mhZjMrL&fhN$sD^p?>sU0LN zl8ZGeIsa_ScS6rRSbv$c_X5P~xEq|2Hzt}PWiYy_=%*8UJ4y+_+euwb#-s?0BLZXe z+ppsolAZ-_dA5qov`DYVw+eRM#u9d~mseLGKf1(M9vmDJ9E_nt!!3wS;5LRZ^V>4~ zlt<#sXd)7OJq#1Af~X)b4l(U-sd?b*?3w#-j|{Kc#1&*gF@qv>!Y3%XUB2n3?jDwd_-PX zJgP;>kjR*y6zwt|jM>WiCXbM703_qb{DNsf9S?SP^ymKU1O>B|)f4;|9WA!#)vFi! zzp}cv{_r8s@6+Ss>+3iCEpci1I^!{))2%*B_Ej^%YJXk)&omvbC=!F_Pl*^qRw*y{#$KiXU~5moZiD}jGcb~U z7lR!l!?DxNfX3*eBE-7%o;sb44Q;y1O5ce}H~zsu#osSv05?}HWU#kE`7PNrRt&cT z7e>kT!JdTW9E3i+(hzw2mLHaHj$IY;eeYAgn zEq@Gk`e~Tp5{k00byeWLFFYGm7`1tC@%Z<$~ zdxbul+tgp5j092hP_RgXBVgFD^g+TAzA_1euxcrbVyB7jW(Z0QWJs2hUlX?*)%aG1pBw%CKHVmhZ=I)VctRnsrV2N*L>iy-Z)C z?$y*hjm3;ToG}9GQOICTX6b|c+-8#r7g^sxQisSxA}#wY?$neyyfLzq|94CRG3Dg= z5o!xMpuWy4#mmu&VikSRE3%p z1#HK;qWvXz(E=%eU#^z0Op4m*Ut)+0Z)-4`IXgYoh5#4Wq|`9EOKZ!oUc6pgU+15~ zS1`TOGOYH<9GP=v!x;7Akn7O#v0q!Kb84WjxK7H|0rv~!C#3G}?o!^(H2p%NfgXJs zriuB?_!F-eOg1P?jeg<{AssFXZx%SgPYM8TT@n^?lE%p8c#v?X)J_g&(x z6EG!>K{9z&1MUe?2vBA_i32o=Q)?)lwMS&Afou{Ht5eTD+d~ZJ;9-=mb7BWtSy`iS zME=+ly$e0%rhs@w-`r4F@uiYGaJpDu->O$*WSZsiPEy|T%9<_B(t~H+6d_~D{74#Y z0Jyil^|&&8w`+^BAVAc46$jJ)T+q-WHM{XH9{=&uyw&XZJ+9E9D-bQ-C zHzK_x7D$BAwYj?Zkd9f701E+%_je9mT$oveN~`M|DirA}ilfp`bex2sxY$@WQsh^^ z{ngpg+5X`Xy1M=R8RWcd*GEK;64|U+G=rc6+TPwVfe>`ph*=kI~cN9#L6Gi`9bkQ#-r$_vZTFAP| z(aFDyMr&<-edF1)Z_P96?h@kzYP_>2cbyngso@~Dk$8@KrvLk(_5=CD2=HGD?a zYB$!e>B+4df?m`!X|W_Uc-M}R2`DKZHJ+5?9o-O?uaGNg0T7}D(WO2~lv{w!5>Cxl z-QlG5>Oyy|LN94FhYbpG(V3zhCzyjaxBE1xq9IM;9pR63#>MvTyid5A_WJgl3cG&Y zR^Hl&-%gI?3Gs3SlOe+mvUaR#U_7a*3jHoR*&ES>Kk$Laux~fM5oi{uM&rCYVqsDx zHy(sVSyDTs`hC&A-`O-8bn+AeL=HsRT7T8*#zaqEx5H5~!?rQ#;`D>7{XOP?ZQ@+! zm!?k+51}7Y0(z+6-O2h6?v4A&$4jycniiuJKq@B8+oq@Hu0O!3*shliM0kW6gs29D zK|j&)z2M1{kK-K6l;@?r+s(Y*K3QAc&@rhCaAz1O21r)N%F=N4 zJ&UBtPoxe{F^n`oTitw8!dJT&E-kMYQoiGM~o`|KT=R83hceIrqXfz_wG#iY~L5{Y1`ww>$>RIehETQC#`j^^S&~f z0Kml8<};F6dkXL^Z)PDeuKlJ_C3YmskAR)J5mJ`D+IjQh^{ek*JbUr#!QWu1$x_q9lk17D6V%%B*l#Zcgm#ma)xZJ?2Stu1?GQl z^A!lzyY9uxQxTG$FKu*3SJ^9(M|gm9BVTp^?Ul_HaEL~;#o|f4(~{bI7gx8IR@ODL zxBaH-=ae|G!5GMvmu=19B6%4K<@D;-7IfhSg`&ZKEmY%ZHu~6UP0WQkOHE`*Y$#TR zmz0&~nYpmCgxHy0YZqTZQ|TwqhIy>rqE)<+CTv<+;kZTFlT1cwh;IYdldIcG8mTwN zfygb*=9XA53km{+ZP7K;<$e>aW7F*)h*nxzvLrtbC(7`$OMr7rY%1}`T@-WPsC0uD z0_QJ}udJ?OzfrUlR`R${Uk?xVahl*-44oAu`Fkna(SqXS^4f;#L-9Z0%^({~Kom5Q z<=FuV1)_Jjl<_q&^D=N74xTQ+ia_gdd&Etf?ryVCFYaT`-8}Tl4mXGTgaHh~I|&td zFSSsP_xyTtjPvpJeSOflep^AOU64kM`gluj8?^6kEg;<+cOowR-Qtbh(-D$y6LN*7 z##N)-odB4JhFY$m_=$y$4)WyoB8+~j`=9Q2kuCUjzPYYmJY z?(Erq7`Y?^a*v6!yTU7DoM|ZFQ-zO1yqefbhxpj&O79>iv^i~=8NfqMZ|7PmEJJG- zC%CRd#w^8VQ^M4KFn@Z!w7M}lH8;cM`uGspmQa;5f3Xu13|L*iz;PIly*fEN#YZJSgjYX# z@~LTj{rbg&t;erkKS$s!?ZZb;q&#^2@)@EG4w^eJ2SSOp=jP1blD*6T%m>O3lIIqe z@sGMqNXLg#qA66sFX8R^*<_^%p6T9B1f-*mNn(w7BO9sEu;da;;AL_)-c{~6A4g;# z7enX=#d+`ZJvGg^@}7?Qy{9w?@5gOUMuv=B3?CF(EHcw*Fjkn+Y!RT_0o;Gz={d^k zgqOsUrd=m_Pk5Tp9Crlenw&MMPJxedDCONl1}6c}&b~{?1fb(c6Cg`LRE*E|&hE2U zy9dWtM+cWDty?w83yU9Ko$K(pL z&Fz=rKPezmV&$e8Ih;uHRikHe6)i2UFRe)EypCZ=w#@_}5(Y3D5vOgr$&?t@rhkV* zk=t9i9Y)GlD(jwJp<+>PHPuUb7K$!~+YA3mFj9%3abAQ{$RVk_UPN-_MkuKT$|B=O z+GiTO?)Fg%Tk*;9k)>H!T{e@`#j%war#?^~;a=93)=m5J#zxAqV7IvWXoMH#B*g5X)3az+b3(AwRs5ho6I7SjiMYKuX7tTx%G16r+Ef`}KC>-U<+%?3#Aeh3#rJQNU|9V%0X_cEgDb?7YZbTV&f}P`ReW z`uR1AmrFO?R0Byfha)*0nbiX>+k0XHLT<`%!u4wC0kzpI3Fd?~Hg#Edz>;}`|SGa7_g zWR~n4iL^{eZLR!GiMPd!UEfv!7Sl%VHf=GoH$62}AkU6X89_p}`q>z|e4t`+1HmO>txIxN0{@LOjW8frtTUlK< z)1}nX0iuz+5vTWbK!4DX^HkHy z;XL%A6{#a5+Ix-XxpXzhc2Pbp2KGE{+p@xMXV$& zsVJ(E(kyjF%}!Fsfho%W>V8C~h+=NI?&4gf*Sw1RnQb3Kp>uSK*n79nEs4@!<^ETO zux6X=-If4q>3Z_>M}n(dAGg{6ZPp#CjHjVM#jkk_H09oM=6~;6t&Wb+YLLX*2r`TH z$D&=ML|vJ>O(8S1;hw|zv1Cuuu(wWuxde}PdUPVP;c)NG@!Z zWzkdkHQSzA!$lRHR4B47VN!T^)dXKt`B$bo z>#K41=%+>yDZ~dYzUlOv^yuD$gEQ^s08>Lq&^1=iob*XeVZcf6?H%>o|jqW zJqu3Fnnga`+nFPmsu|hIYN@won%`XAtbifQ%`<3mWsPiWP@I3tHg5bK(PI+y&*r+Y zv;;I=P7<x@!2G0WQonq(b(M7%4fhoBqI6DbA#yo@U}sT#>Mw}wrIiZ zLK2K zd2^Wp$0>oAfR$DozsvghwOx3xf-Mer$JI5t0lJ_mt?EqTxzI%@A{EZ%x$4Ge)FMku zo2y$7=5FJ>-iC~|2Xh&flgqA6^+;0J1-XV@72$DuY7q^ySP?)1r(H3*H8-v zi~omLNb}smS=+3^XyLSwOD$9o4`PoH|na@i2yEiNxuH{EVdr;@a- ztnrRn;Sglv=)b4t<~ANZ+2461$G5y|*}cULGh&()(_*AP{l7FRX5zARwDIGyFct$} z9O)v3o0I0tg2sAbe_33rm?*075y+0FkD92!38QhZ(bCH%mr6N}9lR^E*kzWM7p)6{ zNOErQ(A6R@8L%|AvbaR{K^CJ-p%50GMBg*>;S#ae&85w0CJiv!349NOjNfsR)R#IsI1U#R5jrY-4K!>W zPF!IIm+&s^QikiipNgdReszj*3=;D`dCJLPbG#N%M~|LTKo}ZPiwc>xyi4emMJD+f%o(mz=lwG} z6DT1QR6vIKI-(tB&Yk+dv_hCzA+w;m=twhDZ*d2&U%p5lC4@ydomxWl%Ga0Ac_S^Z zEX8s}v?BE&v+x|&-L#iCe8%?Tq`;~%PPJGx5{a3RVgw+bmakIl!W&BMesHkMzuP~3 zjus>7f!bBN8r_N1b`QN;m^1`pn>6mH~)=9u_N{H54f0Q%YPe!(u@eC$PJ_VUPPfV5}`Y(#`8YclXnfS1ips` zC3+lU<$7)hD_$P`&fsz0v!KP9D5G$3q#}9my*csX=y@CVMlAr}E55NgpcH-XBV!Bo zd5=Ke5$zJl?Ie7Wk%!)RroQHuw*1zl9>&J3c&o zaZC>AQm1aYAG1I4gFbI&LWCE|>o!i;XI7%JE&`XdKf2m_Od`jHMT#qR<||QJ!|_bM zIsAx|&|Gk#GMrS7G&^Uq&_{ZskS{+ChH}5d72=d9ns}5X(jpwhoUw!jp_s!nf zuYdC`X+p9^WyPe(r%1PvQ)Em$yf?KlZ;6wmntlgCAyV~3T$qA5987#a<^OmPg22I> zHx%$ym6^7p5l)A_I@{O%Bq3o#!6zHbyakM~#hWJpJ^egPk|fSpN}g zEDvGu`=-9kibXXhYhvkZjX&NJ?g^Ko!4%9YcsKdY7x+vvB=Z(yqE8Uv>BL(#BQwjw zZ!D{;>xhe498NAH0khjKF7IoY!;(RTxTASQDld;802^47<1P^y zPJ@m?xk5*NkTBzYXQE`G9~3T6#rrpv9Vf<2Mm=s+gJufBSBdRER(7#+EO=B6jz%6Ei6 z>HHi^<~aJz0Hg%~V(+?1J5To|r!~xBlSq z#>1zJYikTonaXmFb+=XQb+S3a4~);j{w~Y?DP6_MK79)2`d+Jed4foLBvgZ84HPnE z@9phi$R9iqFD~G)hJZ&vG6)*fPrN)NZl%}!pl6uaR#Y3$aB*x`H-;B4w^?fB_5bDX z_76`*uF*RjEiKOP93Duw{gY2Wd;DlsyZ{`K_6woI#f9wlyNW45ajbzTQ&;J7pcYob zLh#dpHk1_dGtu%QG}NVtKDG$KwpL{qE0(P&LgE$Jm-lv!cNZ&wU67m&Md^g?ax@;W z^w5TwSPm6*98t+#y}dMiT!yhkX9WKN5QGm%#!0NBave8WCOx!LK1UJM#E7*Q&%CV* zx9Mh~8m>n}WKlZquM*Ls54INYJv+Bz?4~0A(=<5LFHO|mRZ7P1XLVkuP@`ntX@>D0 z7ar3>h(>S=!@35JzxnxPS|uD%2!e#hrn@?qIR{A}PBUp~WE?|K2ojBpL(~}Y?8yw3 zV6L!0VR*Fs2RfI0zGw5wTWUH7gWd!yR4ZZPko33l@~dxQ>a>xRC!`O`^3t$?Pd+v5M|wp* zqDSZiW=jdLqnx4b)-zoP@$qhuR?N*_ud3Q#{_=PK=+`fP`8%1yxE`NE9uZ>n8gvy~ z8(V+&$A9$kr=P8EZK~{E`AI|K6JebS#iN!agV)({g=?Lj?7n`*R!vWZ$3ap#UehQ; z2A<09+<$X$!7zDxZbihkLwWQUY*JViYVc&j#W) zCrgymv~qd*!RpEb6bH<$tvy^=TLD*H?(f=1#8QU0q{@b$FdwdLzj?m>;>GUno9|vc z-{0Q{fM5Lhvv*KiA;#VP*@qiFCvTrW`&RK|$mLnPcfA*)He@R!hVy}3=#;dPW+ajD z7${Kg9GCQFclWP;_5A<$*WbN*bE+gP2ZajG+Sy-QTYUWB!6#3i{P{olvroSG@#@9{ zk-au=<(1nRR~zjdi~x1I50nD!Fh7Xe(azr9_G>OphH1cXM~)k+@q5L`6w$i4II5Vb z&GnPx(-g)eqevtD?AvoROYG~zCyzUf8#OyJzXVB|5u6|-tm!509oBq(_QmJB+i#AK zk3RT*#!TW7!rGvAfwjoO7#lh%FJit5;4wI=%yU}g?><7ix3}kF%A5omLGM%(O;2J$ zaV)3?rJ}cEmHfov17B+zv0t5zde)B5=xBU#uy%h$Brl8|+K7S!pn~viTo5xdRJ0qm z5Zspy)tzn+>m8&r&xqHs`e4GrJgzhJXq00eHGjs}?990`SeqZlf+l!nAUS}sb&pJ} zzn5Lw_AwMG(TF6bzdl_VX{v5@4k=ZLL{LWX2>d?nyw(1QcTaHeI~2dyi;5(#DEcrm zqC{sEDp1Yej0k~xy9?t;-v;Yc1n50QPsa0pnXw*`sT^FH%Y*zG!d=on@{inJ^jCL9 zs~mbf-Z-LiZ9q$_Tc7{;-)x z@Q%yu9HzRpo*b<0+g%08Rwqnwvm%?M18Uu&U~Vc z-tKyJerig(B2+yq>AO&?UU2k^=;^}#QHP;-8@umT$t4M;_^sa-1;v`p2 zsfRiRPN>+hMz_BacDW@fNBGeGNe6xy-uXkrIoZ|kDX^oH!GnQt?g(9TL?1I0x!>om z7OZlAWRvC&f_K%fAkL^LeMH?t_omhGCs797%|3W1Uy#9gO9>F8glx3JVV#nT$F;%r z1S2!*`1FLsinsK{-)3ENUqr(M+DFgjLPkD3gL2$$B(*y}C4L`xcdnNX$1Q`SD`N0Eo^dR3N$| zKylJJ6Vbf4(;1*|I8gH_7rDp`#IvY{^Svs9V1VpDP@QC;t(uL4DuBpT>XdE`r;?VQ%|rS;N~(; zF0{WVa$}soc(eci{QrIZVw-C0l3o@GUd5)(O@I3E;e$=|;qf2;$=~_t)6d0$LAFlS zTDn84WjZ(>wy)C-&e2aKBO~gxXS}6XrQ_y!PHl8{WHh_<&{ONvbwh(4ax*!N30OoK z_ujlQgZL)D={&s^2FFU6!-pS#dZ7duC(E5HM+c)`*&E{1Y%J|CeL}@9G+}vdodiT} zY=++p)JE%Vu0!I`ftwauQ6cL^uqFbfHA@3Fqn#T*qA%J)LY0Zt~U-FfweIj;< z*bSvIQaCqA?No{yuhw!Gt#}uMk3R%JW&cJ{fwZVoKtRJDjsK z{4u&=m~%tc+&#HaAo@emE{vhiY-PHU_c>3XWjm!5G`q`a`bBkL@_3j%_qK`)Zxb@a z*5)S{h6b)?KVEw})GBeNgnLaqi!-AYx^!@}wJ%kVi<5~dDx%XAhP?HbVNiiow3Q&< zP99w~#-`IYuMw{%J2|ES17Y}ssj$tiZ9b7K&7=WhGD5_|R4wtzC07T_DVF!~?(1)! zef?%{+mE6n5eTI?5J-FBF|F{-^xFCgI~~P29K#<`EMR|R-N(oXprEAijh0tjXWXJ@ zdT;JIW?ecop_IMJq*coJZ(jZX|K{~?zuD1|#YLz%ySg+Bz#eU`Jy_ZJ$&WsN^z`E& z|KfAx3;{wdk<3!JP1f8px>1-Ron|_?kusVPhOEk3g(BTsUWCEN-yR(8?H?T+&M(hj zmVSJK#)Ryt5k!r&B}#sNX7%MvF>B39UC4&Qno$1^$ZV>q>_FoZn3^pjypjF@9ZJd-G*$Z31*JyL1(< zrv7x&jv-p^S}0S+C{;d((j}U7ujyoK8gYHmd*eGNZp z{S1VLmB1x0#5u<2?1-BA;^d9`JT@gzzju7HyL)tX{ciU_J`BFcOh#wF{q`kvc(l3n zv342kT4A3p4&mUVrCL|Nh+a>e@rX*OYD}K(QZPo}5Fu<|W%nzqe2b02_U| zI66f`_^P;s@)--&1^9Qeury!1o+4BCX3#`Hq&y>1X*PCVz5Ma=&(#t*+S~OOSJ0qS z-q1&~2(9Sy`sTxS@F_DqAh8@eAY-?8k-yX_;y-Qb&dyJns8BZ<@_ADu#Kv7|-l`{2 zg(gpE9nra*6zMquB%{zs^R{7tDswmzkTL)$dD8WB0YskMgjI`+8L#oKkW=5(nI1XF zqXuzZmfxf(#w6mAy~a#5dfWsuL579`ygKT8DVrw~@YsAtvpobz-d|C90HvT&@&XRz zigGxqkqO40amNS4u_aQ28+tEKz4;IC>Wm`g)l08Kf@0Pg8V@IAOp9a&N)dcpbTDN> zi%p5{^_DcmjW$Y*VaYNE0%lW_+%-6tdxIN_K$z4e`jH2RzBt=_>UqR|m!IA<&kq^= z_d1{`mfRWgGW>-!=+m`O!i+thsZn)1lH|4KE@ECk=Bn>Dx3FzZ_-k?AcKMLuz8Rh^ zCb9x9*7o^Tnv)jJ%A=2E6||-ntrxKwfw>-edD(cOr+Ss}*jh-mqIoR;7VW?{zjaql zt&Gk>s#YvXBCs`E)krxVpC6$+z)b%w@c!P5=P$l`_Tt;aqw*NW?3mI{a7VHE?(n3- z1dNVzAUe3pRwweHs|e^r)rOuZA4RNxUA_h(?fs4gn+PlY%D*0w4t{0Cr>_J(d_KZ+QTQ!gH#Lv4lklCK?Y7n$ztk_rozYT zYjwjd;2!tSGDWsZ;oZ{9h&EVV>KgDXwcAhLCv##B@GX=cQb_N<+TDDxx45>f^9N_U zar>r6NAL*9!P2d#AMb8I$7f!g9ft#5m|H+%sGq6ZcraqD;t_j#a(I4@L(y1=Wp!PO z5m%W+Icd`^8ylNQ1f~AOM0`ghmBfl#9#tJvhHIWfaNf;+k1O&pD>%PSbi!Bdka#r- zcoI+Evt9n6O%Wn8#0*zFop}I))QJgIb0qq&ZP=-`qN@`zwRkMCU)rw!3UXVRC=z*IV;lKGXGv zoAjc(9jOa=MX0Q|x8#9T*wzO_;1#~M9^ktBem`7Ts88L{;!f1vLn%ITZj98RL&IgY zq5}V60+6ASALU0(^ljh9(XIE}iqh+t^Kd{h-6g?A1v)fWmb}upG|82IQ#lGvv~J9u zqnvYuy6htF(gzq)>qKg*#a{A*IYkm(L_|E8GADG#h>ND1PU2a@%%>b{=@OGvdLr4X zY)!+-pikmsY9Vv}4v#)~`HK1b%$o&`c^Iv^ zxJW_3@{Z@Aw6(TG)4aMk^U>ptm8FHa4&oHzd_-M6(+Aw6v4-L7Hn>8)N6a`&^XhKZ z1Nma^m(2O!y*WC$#FISPT=~0y{KccKjl~5qUYje+WXbvRhAOwZBJ7rdp4fS3XsbLA zEK07^rr2KN2Td}@Z=-2Y-uQ}n)fXUG-h$8bhBbCU6Y=G zV8ceE%F_mg1MBef<+E>|eEK6k)9CVoSdCY&Y#PlGkb9=Itfck)VhN*$k0!dp;zliv zNg?yrM$+rr;E0{J$KY)Xb?DLXiFP1_?zYy2r~TNl)>ROah2H<{G_jhuiU+4;WPug? z(bR||Po|xl-`AEX60Lh@vriRVqoP?L_<#gjs!%ewcHb;6X_*>Iw0L@ME)@TTBNGZR@5}UJ0`*v-AH_C7rd`u zl1Vzc|6sm$hU=W?(}B!Jm3-`J>{1mYyV*_CsJ^vEGh}B>*&q5UcV|*Y++q!XgPY{` z9}_zW&`s0qsCWj^832DkfWOf{X_Z;`4)9?X`BTg_X&~64i!w%P>}qd(>~eFxTOUd>vrt zuTcHce-OZ`744~_K9X`ColE3u+oQtdATl-W{}ii2mu4uTE?%6y**kjqdKZB*A6)>a z0vAbM09$@?+=_{O`f!<9>G(**_|5jthbRxSn8Y;tu(Y%kMTW1I<#2z`OJY|x-GV&X zY7`EkxUsq~U%w&TB?qMr{nLN0N^CY$l8 ziH}8zY*nL<(^K#QRvUU5@M^Q8mOlT!6#R0wR*uBE%hMPfsgR-ZR4saYGps+@+OJNkIb zAUw_7@?$V9VMCC5AQ|1VqY4>+cx@DPf5zuK#g3avuUIPZ=$>RN!>s@SQ)~}U`G@4~ zMtOmwlQ;^ezOb1Y62eGmS*cWtXcz3pE4K-8>`Q^qR6wqcHiC(_$0{G4O_hym4%&p9 zu_hdGj4FM93E-8V77OMybzdU$)~*|B?r;+Gum<(gsNl3^#w zz}miv>NQe$mwQ0h@c0k&V&9IgClMQAUg7~Bi2)30u7u1II}-Ii6@@yzyLsq2arqCA zmPGUxTtP>(8k1a-CeojJC}b4V;I2f&n59qx|0djQ4DmpX6TKB~X3^hO`&0S}mjW&? zJ&p<&JC{hU6#3|eo3+t;vPx^%Bza|x*88LSxf|&JE<9o(jF*I7yEWN{I=@{w$WFRo zp2Ej!zE>#m`uu}xT^ppDujx%M716R$G%=IFRtx@@$Jt?0xzyr<3T8$ z#FApsBComVzBqZu!=TKbLAl`qJ~3$!$@A&o4@*0sJwfK2XVVSZCpgQv{H_t06XSVU zlG{zbJ;M}_ z%&O^vez{AN@kog3wwkTCVhXy$BR05Oj3l1DAX;O@n3*rpKtY{|2jg)Jt?)!Q2WJ|oSh@O@LCVW2DhY&@lhrSV?Is$nPXmeO)v z)1Tw0^@z#UaQ1}IStc-HXiW&1q3K+q{v{>F*~uQ| zh?GDa|1eC3TI<;^#S#R99rT%9Tt}AXr$2u3fYie!*dop`JarsAtCE1lvdVQ9>Vt#5 z9<(Q~MNvT~Km#V{?#&w&db9WJIps83FmeCf8=pVlJ~%i*yXaG&K6!#ISX`D>d4q1Y zGW%Ur)pu>7L_{M}mSrBU2~v!hQj!i?&pMcWTpKCI1PdDR(#hGGw_TczpQn|cnG?N1 z=YIJ7tKV7ipi>2UkCMeyldQOZrwry z76pVC>EWOSBVFl)9Pf^n(Mq83Y&YRjd=SM=gy{XSc1;s6@CbfJ=vI`b{YX>FK@J?L z%bbFRF_t?`UNHzABT0Pjh_K3o)BdI8uP|z?|QDYVMY9n#2Xivm%FKTT`i&rVL-b(1Og`>A^f(x_W zw6r-DPiE8}Y_(IEtay-CTdBC5+3-^l=;$e0*8{(b^pI7wr9-XjF#g#;tl7(=aNVZaK}u=CNJi`GPg=DGsCanYm}Bi6-QkB(0C zl{!c%&f z3jV|dBRn^bGo6}#(g>oe?0sg``6@!g%||1Y^kF6Z6xSO^oK4^rEE*PH?;17ElaD#z zkjCN?eh_0o-b=c>?Cj6WXVI+zTPzGakXTQ;>w#~Jw@7CJ`3q?n9MY_eJsbuAOu zCq;TS6$4x>+|@H&#P=Vn8z&~qJbJCSoz>UVz1`=66vFZD9v*})No#Y{tR~CcS!Zs& z^wBx@$Q=@d>CML4;>PMy)5qGZy$InsI6C?I`8G~eDq>i*yMMHQbh$8JaCv%FJ=t1c zm5@W_i6haWtuLROrN&*7<^o0e*-yUsC;s>+NBjGW%iqt+E?n=uK!#9CDa3duzr8H2 z%j1tfi`5)3A$$ZwnsvZei8Am$7iXn|5Y)AIaB{fEAU9E|WG8Z8 ztHulR@lC8+AFi(^rDK-{nk&9zwmx6Id@eNn^|R+4wlbw}I4pk;pFTY~Jbv};#pi$Y z^PN|(-@JUKdHDkFS6uNtQZzUYAWxS^Wmg+OTN!-C2I|jP>#Q>JbaNX;YzGRVNzh-) z*w$wYJ1H9%;U-3;YNI8!1VSfyE`SYrirRs{A|g~jW%mYG2F!{|0|KV$<~?(DTk$G~ zRwoka-kJx$dFpM6V0glq#-f*uM>3f2T{^m(Vt@EvhUL1+tNp<=HzyX|Bt-p9?btOP zw9EQC6t^B7%$J9Gs<{+xPLcu_?2uD$%YQN ziDLVlia4c((CdgXCDXMja|FI8y-&K@MuzHY=E~o#LcP1>uz~(h|J8pP#l9c(tW}ve z@#tx@x_6^(pti|Db5uH~gFO$O%OWK)On`Y!*^E*xxgYBJ$rTTb&xxEi_bpV*j!tA z_4>8wPODG5WO{(GyljUjR~#sR^vOpA9D6BSz04TELx+#{cJMP2_rXS%{hGL56XFuc z#lBJZP%E*Nxicw24tHN8B}KI-iqpg2nNpL>!P{*ohx-5$S>aWv|5Y9AicnT@o91!z z(IYA7&8(S10(Ay8G?T1p>zkX*d{%aDWkpgEznWH_)vN34y0^c(`|#<>5G2n6jC73!7bt)6s05(QZ?Uyf0uJ?dUM zKuds1wT9=s6f801SGTVG-Sn^Mkn8P1?Zyu1?oYqQ^q#qj6)aQ`SO8_iaZR8_3* zL?4V%HQR>MoWwbs6z^oV6@Raue~SPiQ@XDgCzS{EEee8Z&`F!FK(X`Vb1yEF`3KBn zrMp^~n_jDctR=Lbzuf-8{(R@V!j;fkP$(UrT^^oXpParsyLfjlt3EcP7a}%I={H`y zK7iFvA3f;2*flrD$EW+pXS@5yM)}DnkC#^~y$%du$>#WLE8cna%&1?zc((cQA={bG zE4Ha9mO)~COc~3+Bx)u{Qspp^yKlBJCJ;CkC93=}QgwEM^;=wDr}omRscFs7h)rn( zOkW@DLg1tiGE<}{N}tx&mS)tR*4w)|`kuVTLwsdx!z6f@-mo5EiibJ zzbu@QF?!S#L>Y#a`{3FJ3F(HFO(UVI;haW(>nOavRNveXg>Fm_31vh0V=;Ggb1{&& z$yW5+CdB1985tttP!m?qxT`R~=t7gr=FtW@PQflW>oO{(4a<;V*@UX5+G*;b_3;Pk5MuxYV^vLa~P_m$OD;B9f5Sq$?f5DY^@5q0>1iyUSlepOE}b zXBXce99^EP%{>Q(0`DcIp z*=f5`|5YGe7}DA{P9O0J$&@&_3PK)fB)>kqet+^+EG#C+7SQt?z`3F z*hM-LFiD6TND_xwXXM!^Q;|J6fGk1Fs)kVLgs)X_$b*q_$Pqx7K;`~F$bb=lSG!%x z)jFc*0i}s*?*Ce2Qhb>=Mtcb5nl}l|-4+4u+`h0EAx>ztSG(PllO17idhwSLF+z(F zza-g#-C7=spKPU!lgw?zz}}C02XAeTQ&?4QOf9Clk$<O{qlUwqoBt?3rxqR1ot@`=%98V>=Qz?kMrB zP$K+fbl8N@P#R4}=LHdm=V1a(TJ?W3O6fsHO&VcQoxHQ@LOq`43wzo+&YPCNCOzyP zO`DqPv^8ixCplPsB3Dv_Q_Y&}0 zzejrvZlc?Fw_nHx^z~QIe)jX9+B08#^R=Wx$P=cVCErkw_TH>Lcu3cSM%>TdZ=gz0 z&CWaNf{Q1(DpUaeA)Kzzw-b97SC%!k{o*Ckpdz9G6%KJ-&ak)%4uzE8$>wa<$`R@D zQ>OH@qZ5LGO;2Fm%KAD2CpDYd_TKGRufF){Pc75o!NJbCyQ&{sDDXM!2bbC(5Qsy?E@QE;rR;WjItqLmCj)Z zz9JY}wH%}b;aW_JDD&i!r!Zp4Y839tM}u1Oau;LophO zNQjrs*duYn$}1=61Z;MO+_&c^d-O+Kn+)NmNfHG%wl|cfiQy$?C6?@_Q_7wfC`Xys ze4T;xeBSoSQa($A>$SeLg%pOP-0iY~M^-5&i7fwf6nSQL^(CIW9@; zVoDd`!XYC<8M4(b)SN*keZszM`zq?1F&#$b5%Y{)*N0PO<8`-SY$IiAfWRko_wz_i zV$T{C7lZ1qM#UT(Fx*a-@g`)sGedR-mkk7pe?6 zd~&geb87LElM3lih?!}&I5Kd-V zNgexpZ)Aj?D;L6hUw!pEJEFZLRq9RQ>c;wSe)Y?zpMEBPj$z@6SnIK7INQ^6DHB(l zr<=g;nFLvN*0&zSds~#&;?m0Y%jYB;TinF?l*`d@o1%`jV&-2!wGvXFIb@Nt&1qaR zvqv92eSQ4`^MX+l%X)csA?LhI)!dzMf=2t*m%oz_$kR~d#kb#XJ${V-V#-ix0th+L zmKaDZ(Wa&57?{EbW!I70&^)6-SAA3`+#id6sRcr?$P1hj%O=Pi*+R#qguksDDPbO1 z1csuV2-g!&8Ye*>Koet7NeCS}+X-8em7=7nkS+AiC+|nZ)7wXF^{582Ri|;F5{Fn* znbbG>L(~nw*5g2DOFvs#6x)g=If_>!5ZWr0eAmoyXH?6cLrUTeq(4S!n21D+-eq?q z@g)&#+%FqjquaTpA~MF-@Y;h)1EHA|SxT{U!x3Ge2{r;iSQ?zku%D$BYI_}Oe(LsH zKg84Fw<##Sbssmyid5DP5=0vfT69OZ7tF6a6As;t_$j)nKTJ#;quB5f zO>dI}C6ZpGasLN-=HYI2=4-cJhykp!n)nWVLqX2YPs}4@yr&Us>SucZ<*vR>1K(Dh zy17gW&8^C!YVdd|BoYe?-T;apuzV=q>%9)wxc*R4AKr-UQAiJ!5XlVYpNhylFK}62 zm|9<%<2}8yx?+>#CvAmzas2vsFOSa7H&<~a^U=7Rft9W}_U60a6=(5g+phWi`Lm6U z2cYEZFTa!_gZ%jN`SabKou$PkS+#+g5y^98^t30Y18AMKDu=J&@&3MW(085rX7}|A zIlZDJJlNU44c=*yyv*(Me8sQpxRM^5%vA=KrUl1``)ivUOKYodUc6e}SYzHhIXp@6 z8F+@MO_x&nR!ZURJetr zIE`z>yzjBMHuyoL+Q#hy?{2vR*RKm4;l8JEDB7P4S5+)@Df=P{r4U5XyY}Eu1WC~g zM9Kr%s5L%%6>tiA-||)yakmSh$z~jfX~+J zopGrK-&3T;h=WN5mu&|H2XLX_e_J{F6s5Rhhw^v~-t9$|DBtmGK1ZnQ@0-e>^=Lt< zZ0GU*cB%iuAN8|$v*-RWou#Aic63l*65OUAuIF$yjuN+1R9@np9d&wXW#i@ZuXp#h z*=pCRl9_`dh;glFymYCV}*bAdAhIME~^R>d!xaAZ-xm7+WG&BW$j>?Cu@y9bPh7 zsOu&7xxQXBFT+Lq`0xM)jMOg$h?Y@>z1=-%`QsmN?Yw@;%?-&Xd-Xy^5!~~HfS!TaqXCO z_=*lX|N1a zoddpgLb7bJ^;d6h83!@yeJ*x=bET1RQ%UI?Xd9Rj^4FTnQl^Gr;R{Sr`x8dhf^r&r z%-&RB*oJPZS(|d3RfvgL&?V#|sDRzl%Q)JI<+C}N^LAgj_R4Fr;bj$Q?ZmRvA4Ixr zBFN$bj;tIZdb;7wRFZ_5gh$-F?_>>;SJXDJsOnsMYnpvG1Fm^ zA%CQ;=#1Xp)%An`y>D*lUPPngi3H+@t%=q-Iv?|!@wxYIB9gl-Pdks7J~b2yHO)(5 zS4f@PLx*CVwoy>}z#=GdcDygMsySF++n9bdL;3p6iB3N#swo>f++u<3|g2=jX2v_N%o0Mcml^H#;-= zcYgly?9Bboo^GwJEPH^pYH7K&d<-CZ11hEt?uj`@B9Z z1D*8iT{-r?^$HL#BLDTvSD$_U3r_@plYOku{9KH^ZjMQ^b^1lKjD-W!@t`! zI?ewTt)f^Nc27WUD7?Zb6Q`$J3EkKiOQK#~V!e+~Z)R!>0j#HInxyKqF^>F=P`(2$ z?xt+-jELD`#xapqfXzAD9#q~pTe&Clunix)5Gi7fPQe*GI=`0m;3AU;fR{zxat8^nYV>1C)rNRWs+i z@4i8m^jRng59Z}Als+?Ej*B2z(au+|p92(o6Y$#H+Ei>_iNR}B`_?-V#WAZFA5>Lk zON2UfuunPn0#1f%fYqQjvA=hq{?G0!I<Ig%RF~n~Ra&Z@2l_gJFa>hH#960)9oEL9r~U z$$<$S>X8Ve5@g;>mL4B^n%(46tcbq~)b8?+fxvkBhN&r~L5M?nNaI=CYN10>d=@Y}lA^vwmZH?lmvsabG9! zab4{@LFlHXBjeD4P>dq|O|IWggk^BBSY^S+@(-~*R3MS8&FqW?9-`3Q%(2l2-?~U7 z%qP^RKX23VCreq_m0HnTn7Es}CKyA>0yZw!b2MihQD0{)WI011IhVl0zbFV2T= zwqNu7u?ZAyog1 zUS7gupOkfP+C&LkZCA`woC}4Bq^PEjAu1`{d;-(C#W{`9WO?r1@5>fZkR8Sjg^dSO zbWPu%otbapV<@}Zq!D1fCt3k_C-@6kVQl*ps)nlfHr+UB#A4XkQ-7%vW*jwqkl>4(k)asS|ALqxk!!|edwlpFLk&* zIW`l2(0Nq*e*1sZqfWx$j>Ooq9H2wH>&m(r8;>GsarK)OOk&QPKAq!Z?OxVyV^aCl@ru%KuMRNvZI z?+h>ZD5K5WWu?n0aIuz_91kET#NPe^>cdWr&P%uf*5zKL*-JVuoWFg3er`d{h3#Eg zp-EjgOk~DtMINj#{PX|dXO--ALI%`XUP7J{_^`{f3~=^Y*0}B?4lIL zGMRny@gw#!=jC|KR<)zb#@_16XTSaGw+F{ZIHPZ$Jzra0N1H5lCE4i+!o%a@aUdt~ zHJ#pgc(5ORn;9U20aJ+M{)MF#JUJ^nTR3ROL{JtU9qilu=4Z%;Rapy^8Jc<32aDmW zIIrhleQl~Gw|#pF5zxJTe4{!XdprBYF&pWZzxwJg{>8tLDbd7{&a8s=OK(k%`{0nL zwL*_)N5>#QNE#K_)ZDD6Bq9p%ylsWICq~JHW`$B#_de8#PReNKi@erP7}?3zhUztc z0^kJDni zYivv>=ZoN``7XKHYT#1kCWBxU`Hd^so0O1sfH&Ai!)EGEFQk~$GSb9-Na}?u8|#UM znbN*FdE2&$AUsbY8;?1{hL!q0X;&N4q9E6aU??vQTA(=JnZ)4*3XOu=*BBW*e)NY$>y~v)>7TXg6IK*-E#FOrPFdaa}>G&;SLH#Y-(O&e8NO%rId+IX}B1Jw1EzdV8Ou z_AL(goO^Ih?%SognVcU;Nph{`#+feRy#A;L+ypt2bZ%^0!Yv`RK3z>aPTO z(vhE?9r13Hc%UO%j#k&1@3#1VS|B)cN*me{^VPFXUV)=bgleIER}f*>4~;00B{sCG zmBbeR5qzAPfx!hJJPB^@M>t{{1XyqbO2rAVNb2_!37B-T!W;b(keY1ccfnefEvnwc zO&_p1vdLiyH9AX{gZK^sRmRi2oPgpVTiBULqvl2i1Fc-42X@M_WJ3AjX(byL0nHY1Oja=h8gm}ji8=4vZ@)y{wzf78_V(|$CW!C_hzWgE zOC}0Me*F@KO7dgmr4Y@eHPckOSZ|SO=%&qEe4s}JiIYCAf|2)9hL7_uN zuIn8(n2j20l-LeDI*Gl-47U!#e1-{XHn7mCL&PJXRd)e)&OY!;TpWxsKQu;3S6Bx-K{dk=aw%m)EZ>>B{a z4EeNhbbQ~xyFy1an@qA!QdB0+1V$FBeSC86T)dH;6`O+|=_Uplb-)%HFCMuCzw1t9 zT$juN$SXpr5=5s1^lNGXHPyY%OYL2~c%33-M!3O;?Pg)gTr?ux(5X)6YLMk`75B`E zVYm#MdpWdXXX94G`rxQR@&i0FRdAyH-eAg?(d&GfBugoBcH*KbmxiEz=Ovje;*03wzk&f z2-C0m>B)ci*T0q@admO_>BIH)6X9g9A&ov9`jmktKo0Z-+fYG!VG;Ez#^J#KgJkW-{~)K_grW(v1snGMPwOSK2VI}hQMfOT8n$<3)_ zb#03>Kv7fZTA5f^SIRH4(?Z+ds0315CUqx`3j1yD=?-iTrc?&B!~Fxf6Gfj^Zltz= zBv05kzx^G(&#(XSZv=jP_Qj{a{>$GS?jJvT^5oTv7hM_2hr|3%bj3jin{(hd!w+kD z89PHu0iuUmFy1aPyk56a32#vO6`7M8;h1^M4@^TMC8mWK@7h>n5tHDx^gc7wZoa3P z_HYRy&=C8NcjcZo!rpIrsu12wD~B`y8g^oP_`8dg-w|0bgp*S4w4o@5bWHrV?je4y+ol&Tf1=-xaB4>j?&CEDV)4& z4@qwuF-+~5=jO{Ky2)I6#sq5Ixa_hTa1)MF0G*vRDF>4L2#3bbg+O^U8T&)~u~+S$ zM(sFRtr8C!u)HuHp`++l941OFt5;J+qg^r zVPUZqu@&j(Z6tNed~~WNCucwl2F41yIRkaRfv=n{rc_8wA(0 z7Xh7n$&A;zO(|kxL8c&gA0tMaq7sRkVii|NOe;8ReTJ(F=n@&{hPut)WjvU}e^r#O zsYskH00wO46ss#!EmTB|60GA+a;w24eX5XVuK|N7aF<0arB7zWJhT8&5|3{4ip7Js z@OVGO(0LV)>Fok8(C8p`CL9mcxr`VDFvW;P!F6=>$(50$!(G2_&fer}SVW|4EeA?I@e0ok8&2rtf}b?O#Zb*)Ew;V7cUNvj+Rz87nfEquL}FTmTgF3Khlay zUi^K~8qa1>$0w)6uA|fIGkb0OKo@#hi3u=$!wA*e(6z`pOX``~W>7ajUYAV6v+(y< zUp`}Kpg-DJUihc}?58hYem6fmLy-P<_plQ&)1FK`dGLV5^z6l}&pv+g%in%ST-x0` zVA?;wI@Jdu9cqhF-*hR63Gw=1OZytx**{ES22_;LZY;Jnl!Egi!5+EgWX1%xwz>w$ zo`3gkQV3e(h51EQm9anNK66o$oow*M;Itnr5={CAX96$$f^N5uR~?P_6O!b^9soyq zk7f}OxCW`3`t5COG4|@$fBDy+|Kbs1T3r$8iY}-dO;8EP8;53RF7)M5o%x&aEe{ooVx-lffP~&Qw9=-yQi#))&oJu zE5%Za8%``nu`0LL79$oyb#`(H$Pr<-Q@bYMtoL+AS~@{VpzM?eNUPILA+2SUFh3kQ zr)SV3jq7=NSK(DX71b&tlHJ|}#Lfk1`!nU3>x`1#uae}R?NSmc^?(fSPWJ>VUAM3( z9iA0fY#g8R($sO_FNRtf;=%5ob<|E09y1qPsCFJqS&PmN7;l(nj^fvf#G6loaEcjP zBV-|FrG9W^ZLOLLwKb($P<*6@1!{=mq2f`vB>zAHRP$@X`T}01if@V%bDO`Rw;mC6 zJ$I$aPvF>1C-J~#=wk7PC&$Koe6)AQOmx|0i>EB{lxfrQY!{q^V8)p|_~?Cx)AcoL zII1&JqdGmc6Q*27C<|(b$9<{@9+ejj_T7V1hA=EKl3Qy$(pr37?j^! zUdQxUBej<=EzIc)*^STF*49?%Z$5neX8+CJ@yhbtgOAoe|Ma7|>Dkx2+k#B(JXFig z(bVp=Fg5+vv*#M}pblV;nxx!&u(kHxtCP+XCf!64K7A&lZ@+$n#2juPBQX}l7FSGr zXPm*Y!)mi?T$(T;o11G*E+<~Uda3^%JMN%Kt=-b5pplKOEmX(8ggbY0Dl9odhY{_kfBe(G`G`V*M=vLRtGqdjO+8T6TYY5wm$R z9qJJsmL3|g|8eq)-#m(-@-6mP5b4bPq88$?A3ocEaG?0R`>Bs2gcv6Cbwp1qKgP%! z0nv3Kl&cigCJU*!xGeyfs;Z@wAUn7<4j+XY(X^86=@tF+?fZVu5p z(i)H?^^X*7W-Pf-eQk+VFJ|M{n)td7rMhQAOsZ8IemrI*;Sk=}`IDLh3LP1|%b`is1)fa;!_ysRuW591(X=E=&-rjy+mkTQ>>jSmYPq zKl}x~DT1p3Tse;qrIV?2tN=KQ@1LndpLsUUFRdz6pPZc(+vbYP)9KO4gQuTNPJMku z6?G;bD}M9&=Rf<|^xX8p?%|V9KiYorYHw%f!PBRE+pn<(nl((cBg}Je+E>=XNbwr9 zw)eXBU~&xG5!-{)!ozT5KeIaMES#}Mww&1e01O^ao8{rwq0SYoG&5JKQX81U2}Y!i7|Hr$7d;3`s3 z5SSG%6=0)Kwv%#3Q~AFr#RXZZ_3UqfcX4v2LHN$h)Qs)}EkeJokuL>af_tHDSz!X( ziQD3vi*Y3cSX`8pp6OmEU`uTb%ZL|C<}AvUn2X_dW#=d|2;md~UJM^+XW|5E=twPA zXlZH1@KfyCXG<9IW50(f3qhllo~!6jk{tai|u%$Jy*7gY?`?1xEQ3o(}VUbSR_X^@^?MvgmUw-#F=?S0E^t=XlnUBvpL>hr?aH#d|MlxP z-+uQTHPpQ)Pag6Tdc3vvyKlZR*%xn1(6g02d%07_Od0%J>0rw;oUP3@gv80-!SS1& zeLUR!{OotnUd=De;mdSG1c#cQHlMj!%A;ORLP8C-277w%3L8j3o9hf7P?31mOv4Rz z!p6!1Etf|CxHu_9$1|IoD|iRV@HaOfm||TyI^L&;;wz~<1yc(&HkmT6g`K%yM-c!o zl)z!((Nj2PK|z&zr;i@ZgKCZ7DOT1urQJv}!{^^sjg>!xIFImu zINc?Hwe#}Lo2QQ+fpR;QE1jmQ6PRK6+7$?D!mWbRGQcMgq5itihWiOwgoat;b$pAbL|tgkM9^68IkE`Rw~zn1?HI?c?@eERWIqlY2?pMUh% z?lde3hPF00e)Z+IcABt*dk05q#yg|&Sz2D)+Xr?44*8JZV#!wurn&QX+czcZn-F-5 z`j|=oYk&~bE_6lE=gV^vBjD2^7hP#n{gmT_~qHhBUsAu{5K}_F3#+BEpBJxfLxyBKbvAR)oSAPnxlrl}^ zXYstcHK@$c5a;u576Qi|oz5_vj#44ux9Ll=f8E}e{P3t~oQmb=P3LfoUoLwD%7@`+$5opyvtTxhvfUt;rzrEHH%vSuw!p7>V z9SqycS?;oew62NM2%1%K>&w^MB}jKJ39n+L*EA3Te0r~^3-hy^8!I$2ruWOQzLb7U z0Mq)~>Z3$XQE%cdac7Idw~|96EZ-(oi}^T^2j7d zjKYzy6L6qdfay0N>rOKkLb*_e6Fv`q+&_GgNKn`F>!_`6Qh z?o<_LG8ggab?{916$WH?`}$&kdtpslfsNhm*MFyT5cxAw6|niamh&m(KZ3#J9I`M`S_#|)B1)Rau+^|5&Ru6{Ryef@w$;f*={caUIJh|3&$A>?Q_b0RjxOcA22-+IH?(eeYQ zp0WeZRIpA-5Oy_-6``ba_{FvOPFeS+49zId=StWYkQ08?H;;sOafDK5LHyH!#HL`U zQlera{OWr&Xi%KHpSQv3sdrH?xf`~gq(X5a0tumFIr`Dmmj@mN*nr zmr{on>_B##&d^Hzfo!A6@Z36DgvN^Ey`w6!$1)Faw`r@*jG5orv*#bG#w_;WXFvTI z7sjCxi8(o?@IIa<7`6%4TeQPW<4(2X!4pcrGag;Qs$YI^wY0pT&0>`gb?VX!5uMZs zro)EGrbQepR~?Zw$g63J!}}yg`Qn~v`*yNeI66LIhVRrxMMj>#+HOy|>$T-YK;>P; zjlKOte%KbmHf6#s`QG84K(P42B=BA1m@X%Y@(Z{C5$wxM`&?ag$h)c`q#^e>(SjKh z9kRFmW@TlKlNBSJ;rn~@^5r}>Z+VfXNzIH`&(CDuz)0jQjA%X&chFF^w4c^Jras%4J1Z14}=m20!Rh zf_P!3HwVqV*R)!!eASm&u|h^VBrNB#R(X95-_g!NJExAUY78%fZ31QOwVihh|D|GPi<^U7VagdHjeJ z3G#3sryriVxvg(=;z$k-Pcez9CrbAWndLCkbqa4OL|QPpcW|)F)FRz{XU>wK`e3G& zMxc7>h5fUg9+%YJvB4` zwxdAcKk^rCaUu7~Td&lu;r0T0Qv_Ko5RH$>o%w|+!=#L3T_|Vg%krD$!j8He?;ZMy zQdioBLY5NIDsYeJ6nypv{xz*BGP`>)K|w=s6=5&}CQ5OA>liJ>FV>;FMk@ z`T#0)5Af?pCT}X#4dHI)#nP#qQb^sDJop=ur-NW-@H_e!+0cMB#rYu8-$a`@lJ6-# z=o!mI7?i>042dj58jMUX-aQ7RG%BlkI0AMUv*f7_{}!cVDzqdBqZ6(4OzC}l!m-Qx z9l*KS(}n;|wAun5Y&%EUuWs(y8c!}R-Ict-7B}U0I$XHor^`ew1ZjFzrm@#=c9L5X zSE47D#JT=r1B7ijgaA^ec|9}JkAL(9n)a*Te0Q*aBq=-hzoq%vpZ>i+Szew)8a()y z|M0JlPcLJ$JSocz5xw?rwDR=i3<{jL-ke%c9}p#$5qeWbE`bEBz+Zg+$=BaJ7lZlu z(F5um1Ds!&mD~qjF?E#|^P^P+&J=MsLRIk<5?F9V^CH4@%c?@yupx7_rAB0PZ883> ztUD=E?iMU0PT#EX65?WwX22QzW_t#ltWcBj4Ov_QQub;yQ0vv7Wx#Hi4%l7{8bM8g z=A{+QGJDsMFOL=8`Q+12o<0AL3YPU9$)g(LJvH;;Ow!2nndXJ)bL-*bZ%$PG*_E+q zbz|fCcVAoDk^2RU33PGd1i*ONFw(`-12xjahNn_UOOTD{;zcFXi1saI)k)T1=H1Hl396Y^E)`%)F48IptONABM1c5htgoR70<5m z{N;{5K7IPp4rcq{$c=;{`-(S~y__@A!9Gy;s+4&2YC4Q)SZ;#bov$Md0g~b*5t*Zi zpgGP0HDp3kBxxqx21`86y-9A_q{L72?Row8s>m=?v(g98mw?kGk7rqwGuX*^!&tjq z8Fa(mLM!79e)3Y-8l|w=NC^{>V>N{T>A(4p1|9MI`(Dm{ykFAfZ=poVR_1-V)%y}= zANrN@iTn zt&hC+`Hw!bDjbMp9<%HM5)AP0_zXeWTw8!6pk<<_i}D|8&@FSz^XJBfn|w z<-*ML$};lRmg#q$#JYr5=VCYx?S-a;o{Kss~Um+bv|8%&Iq8(y@w3v`_YN8I^Ai=39; z4}PLh(*b>x@p2kGa0N&xStv!?X>q}501s_;<4=k0$>;C&AhW|wYkGSRxSyoUzNKQF z=|Ze^#&Ft@7b`hKe$y-z?tlJo|I<-nJrwah?q<-Pa5_J%^BgicI>^ytHq34j1_wl1mvN4sj=s~1MS2%|0$*;fs zPLe~QpKzZ`h|($7kq@gTT9vZdl(Wi_$ZmCVV(UdvG3rVAQX4*%%I3zpzIxbWPo6xo z(EQLa*z!hO9RdLoq31r^E7!X^ms|5Txgdjot15IUKA5ldF_szMLJou(q(V#21tv16I8&W%5n{O_cYDQxqa#S0}7qZqN<; zG+HDl|G~jNVd24phv1TiiR()220pO{i6a${>bxYWkel`-xK&aN&K-5CNkcLM>B-sL z{CIIRF%|bN(qcg~4IM=4yt5Dc#ZNJ_)cjn#0Uql)niAykzIRmtC1%;#*nDF_PQrB4 zV?Qv-0c>_yR6rgVvBXAAOTf_YJ{zdn!1Mv3m>nkia5BHR#P=uR=4|}KhbH>_An*M? zqGL+Fe5Asz3*iXqhRE~NgLH_LblBk{{T=XuOAWb%nCM%Z5NF-#d}MA*W>qU>m=*%b z4*v1O^mE*Qkl@q_Y_(3PSP`J0wy!D1UABn-^}qYiM)LN%&Ec@cPXG*5mQxwV(g=BLK-g2VQX? zv=2Yr+<3UL@n+}E&hFmy#6pfB0lcERr!8J0j7NSK{=D=jc!|mYs6dBBl z2#yt*0a#^ZOcmhMlVTrMSC>&JqP;n=*qD^Hnv<%oMP5u zw#no^#+D|6)XdWtgb^ENcwZBJmgMN(P3aTHAW9Vk2wtUfoHTM^!>=Y?Zxi;#cImTMVJZt$M!^65 zfA}v3^$EFoFBSZI#`vsd?z@}xIiK-1i~~j3r8rjH`eGLa%8{yX=%SXbV(!sKyK>~H zh)|Opb~*+fy~o(2tw~)Jn;=d#)CvSseu6|+8+U^?OwGpn+QSDAC{yu6`}+rP-t3a6 z-s}p&!9U-B`ef^me*T%^e)H|C=P&oL5(uDG`^D!^w>DRHcXoZXWnwdos7(y_W8-5e z;IDmLEJ;C97mTgsPZ}VFQK!H|1eS8gY%t%fGIVjFZoVB)fpjT(h(M_M1Je<;qoXs_ zgcBMx#G#O(?1yn7qqgGsW1_ryZ|~GB5dbam91(1F+3RpU2S<&LUTh3!X!YUDbWpHWP}Q8hnz^x0<#1$ z`dS5oPL7X9)f6`%3UHMwIuToBShs-)hO;DH)DQ^9AjSiQ^O4KhUBxEL6H6tZ6#)Vgw~^RBJqQt|HlBG9q!!9?iFoy~p(o24_&B_mtE5Ov;}8zd!HDV#7-@ z#~n?!MlyT$CDN8&wO`YfpG<@=_sWxy6s|BU?IK?2n0J6j;724(Qp~`6kY(+00E{?Pp|%e|M1sVX=!14Wo6Dj z{9t3{XJ0&&txrm3;%aJ;**)~!{lXdTZMOWK^MeHA(DI-%6(ZCSP3E$F@kp`{lCr@s zuA00?4@@~cH_r%izPK<=_}<>#r5HLmINII2e4~cEcfiJuGsIy5$By>kE`T3QhQOLV z5F(QlXTF8m1q?jWbKR*|A3oZ2gAv-c-n`lQzsY(J^+d94XDlA`2(j4J)0R!MtL2pv(bog>!j4xe`=i-o(j0mS<)HX{X%0(CwfeQXY>qPsACMf$mDy1+u)u2HTa& zWVB>6JabHB>m8eZcf=3L+>Xsf3l#_pH8{=-K07m1s+<)5Epsf&cq9uUvZzI=Vy9e6 z4ztCCJOl}r@VqS!UBJaCfS^_hTsdj3g)*G(0Uss3BA94*QZq8Q^2>L!qbNj07!}JaIqpFl78&%e z9!4f^?ys-p&6h|Npa=T`3S}Q}iHB>Om5SjF}jDZ6+NcUv`P=dH}wpeiw z(GFeqAAY-MQh}CaiC!9h9nU2u37=@K$Knm2O!N*uXM6#=?LmY=(qoLNy(6-UDbkC>%DszPsVJB#4>}LSwxb=l_!dw_Qr7K7W4{um}L*>>pQAeR80PBin$LrGJ^dxNnPDMO%*c>K4gFl$j5O%Fo5qK-hW+Q+yN{Uw8MrYlzX%lf5(z2sCMRV0Ha?>p4IBu9ph%U|#`=jqNvNXF|4Mb`} z7*b+0o(kUX->}*7g#7j!Y4j;+;9c8XB7uf1JCwT-`kF`&B4JO=5zBYRo=ziiUgTaXF>0G6cEZ0}J zlZU;yM&+C$ZEC~k4e5AYMC0Ik%Nt9>djsA=?~KM*Jc0pYS0Luy@8OqSs9>bU2Q$=F z8F+ooIHW!9H}FYJefiAaR($9~iGMi^Hc%c@i;@~Y@hj6}Grd>B)LA3b?ydQgf_x?b z<}x!w76*qRn*R2nnpRoia*&;hf<@41Hdl`x79=!08JnMEV!wjtc%uN077dMV^NN+C`h z9HkiyS8An41IPmGm`@_Yf|5S8$61*>$;v{ZV-1dXwq?3M_rrv@2T6z&66($H4cl%? ze$}^0fjbxszK?k;oyuVG=q}RYg?p&f{hH8x{r~>&|J^$`bA-+x+2$qx>@*HE%f@E} z%}M99zqCZt&K*goui~yYSVPv7s4cvy)^$GDYbVoEpmiagtB@@IAJ=S=Rcm7^|JC-~ z+dvP-Y%m&q{CInFGe&Sxy6fBhcbqKWzJ34sXX*~V?ktRGJR@b`52N0Igz(?}H$VRQ z$6Gexr$^Y4m#&#n1H{O#w05@i0AYsz*}MbGB}=K|L*=wdl?;@Rc~8W?qiEF5Duzhg zkHxO6Sd(s=^dh7Ny6v}nDqBd`!(l{Vi+b=Vf0_ppYE25Z_S%}A2oA(0F@F038pZ*O z(-%>g%Gfidt#*J5py0Pv;pg@1{Wm%h>BD)vOE^;)Y_uoO;68pO`JqWArBLDBM<7?o zZ-UlrX?82N@trkI!G(KLo4spdaRELS0`|-)f7K2bisQuG^EtJ;h){V3o2!$TMY!y% z`0@z!N~dugQ1t8;8!WnBf7JBBJT??q?w$htXC~Y8S|fjw4@j|_o6js<*3W2i_vUb! z7I{**jZ)F3YFxE;S~eabk7`=0`m1oV1L~=^APM9xsBDlalyI^D9QsKnJ(E?U#KB|x zR*`Lrdixc`h zq^0AXH~O4)EXL*&N~OlV=T7Xn zUW%lgrkX=ta^YAO=qn~Q%b?GKRq_D4D;UD>k19_Qev``lkwM#01C{k}_PbgStk9_Q z<;r!wP?Jzh?p?&>$VrJ}AB*FUJ1*ilo%b>70<6ZiCEXpQnZBM=5WTgvb=XOEq*} zw5<$&v;Zzk)w%|lebbmVhJzjhP5L_|_6Qg5K>j>Pdq>&U^d`4U&!?^v@`2>|tD9SD zRG8wE!@|;K7*KH-6mgZQlDd+`c@{Pk+1z}4G&s@geOwMk?4dB;zBx^a>@cTkR?K^m zM(Vb5*3cw8+!8yLy-QC0rT4fn$vdPUa1texNrZzsHS)}X-zJ2{f`L`G*B^3*#>g2x z7$T}nJaOc$qmK@*May&F49wLYPUB70N{bP0@d2OJXwfELH2s5TY_>r@rc8_V_V@sr zo;mt($bx74@-@j`wu4E_INqp$VUT+XA*G9jKJgaIay2@JmLvVSCL6hMhC2gf;Yi`e zh^K%d>N@C8<=i@kpjW1oU5LG&_FDkeE0e%tv9ds7BBe_35d-DI{D{5Km~t_F&fTP% z1twtV9bLe8rQ(pViw-76|5EpIsewE}nY(Okz2YV(=@Y3MzTG@Jk#5QElY~w-LL=^* zcaEF$aumxY?#exlDcuF1}qUr5}6ml7EL=Va=j|HK~ zXFlMALR{QGEcqM&XI%p%@sBkwpI(bxGx?epf+LP9#CjzQH5Yzl}Ps5G+lE z(Mkv)sa>g5hwS5x16{a(d{A9O1NL!FJC}Ni(nL3tR*uQY8&aF-?ylLeK}mPARmLYZ zf0^QzqK|_jbE$|{rPNoGle-v;vQpQtS(1UeiD6>-LTWw&2sbm_59Fp^U0v_QdBc}$ zS0$;~tf6qNDJZ7kJqrBZuYcTbQL)XtWTu{PyjBSIP2Z(U6DD;*VC5X61Y<@P)6vT9#mAJ9HEO3)kr=U=Ac5*5pPjB$s<#GFp6j?pdk&{BEICg zutYT&-j-8Xa)oGtv=wJbILblpVGGG`s+0{JNH6%6lV>efq^50K4R-P{d%e!xgh+G; z>iE&HZ^F_QFU|iFUhH>KFQlpUvLN;nh=?ACOJ4FQODdP+z8n|6LM%g13* zX{jx`j6J-PHRxc&@nF5yJJ1D3IrP4L@Wl zfBE50EM8a_&~*5Z1Zc>11i!KigsFvFSLLhr`XLLZ?dwPcLvV1zy`#Hv&!OqD)h(R3 zL=SKwpiDMrzE-&|qMUw5m$ITUVx9)ul8JjNb&2jGF_a_#)l3BtFUch-Jh{FFE=LPS zgDWawIJn+ud*9~j%qwwIFifY?EEW^&7@A8dscfWjv&>9Q@95)|ZHkDbuoZjer)@eJ zoUcQ--0(O>UE7D-azZKOg5BXI>xB5-8WXW3T^A6LMM#4;;@!g%8OmzK@64B-<<(T+ zUWCGYr(79_pBy1+23uG@^GP>I-B%WMO5A8IDDvxwyW+FfR`v>p`iFGmMaepr;UTzC zsoQlVVzyF=D2drdzisY_Bg~Aa= z@9C?;%lv=-&;MmG5k*_chj%!j^6hz?KH5``fJ)1KqIq%!e|fzOoTZ^I%LTdZnO(*O zHazI};NmQNIT<;{8Wtu6{X%-C!lHEJ649j4tzI(XPD!B_M)E%NpnV&B`8c_{y2Ju{ zewshr!_c~HHRX;OfDZ0T`~b^M8vh5SiK$-z$fO(O;J^{%dT)t?ZoSLe;FrdZ7&@|c zcRFgNdFS@-YqJ-90Nlt83CSl*kXNV0;Cgtwu#N*lj7uw31H@h36{Bmtkrk?m!U7JH zDR0JY*G@>`Ep8`QNN2UnFa!KhUq0QjH$6P4SXjIXw2W4=pq(`OSSPgzCT_qI z&4jmVWCb_L_qt(LgQoBf`fUerBhL6h|51?GdTETj~+j1w_&au^XeKdJ`h|SjZqv;Ie!jriuWH#R@(xB?5JmR@-Ka zR$_ip+5einio#NYT57duO^5h=!f-8gBDysg8cl2->+BHch?dagj%;#LG5+h+lZ=xr z4fBgmtd%_uNrOO|BoBjoCXqV4i!F`F5v$UZ z-6XC*dohU0$xx)_2u=J;0GWfpsPEcdLW*Sy^VRqt`d~cZNyZ6#|u!EEbP%LFO|ZoORm9S)C^|E;2I0{ z3pyh>=G57y!xx2Q`=#qvxF-|df`>3>yS=YZP z+iKbHd~hoKu=y$8 zgKwKb_XI1*&zVMvczenn;&4frB~|ncJ4~x2HSD`lb$J$y_CoL3f~+|9adeD0;mPWk z+@@->+ZNx5gx)!Aj>`sVRvW6P26*zP@fC+bhKhE1hqnJ)1nI7Tzw=Cc%WDP#&SLP6 zjcB%*v;7qY?qgLo*MP16-~Y>hEnyhJ=#};^=Zky2!ziZF7Pq;eid4#Q<+5`Ck)Kzn zqkWxDs*6vaQLWbkR*YjJ9>HunTEPai@U{@egH}$)iS?vbugCmk-yPmg#`t%)*~eUL z2TTxR0p%8 zarV)2m6N6R7F%yE4*yVKR?y89)i6Q94%O6HEKD#w>0H6ecj$+=AV6NZcCLVA#xd5U z4Yf2CkesZDpiu&)VnP8mVEw1$3q6cnyB-fXnX=KO*Md>x^VV#HtXpTd>?MG@mseL4 z%oJ*PjjzX?$AO(lzl6UN(w(hZ!9a!|6ct>W5ph7cw8b{*WDc7)&dy)(6yzNSRUN{?bpfx67T!)=j5twZUr)FdeS%8G<9K_d7`#F>kDd;F#MVB+@$f3ikSJU(&H4f9 zW1pyj>E3wa*~wfA8pLLr7<`LLoSAMNVQwulfOP1up}XT`&BA#jUheIk1sYs%tJ%pr$ht zC=nwVGt3+GKc~4NrVnG%Awj|38JdfbGGBeap&x>=R{ai|juXng{HCO|m9k;QTqkyV z6{zP|Tr8(mg(amv1#}H6v-s`Ni?WMu`Bl=aY}4@y=s1^yKE%rmeB?T^fE!ZbCpq6B zfVh(8aVuQWaKG|2&STL~Xo3FZf<{}Otlnjhwa(&46^d=;Ag*48+tMdgG7Xaf&!W;H zDpKeQ2(CF7x-DWkAE12@4T1c*pKG-8U|O75JG`)VOSQRnIl4xcgxWpcF8zv+X4Q)1?nbbik+aDlEKYp@>%+XgyeK& zKJ#s4XoLcP-}xL$Bs-R!J;6$xiKFP8Q#5PA1u5nAH`^H~dMSx#3|y3s*hUTEH51RG zS>Ukv6(@sxz|5iFq%;*dWy3QS%>oc3u;m(W>I$_yZkJqOMV%R59G({6OIFGmN zaCw%bjMRkA+dx|6{!qtfvVMA6B0N-LvO+vXds}Ukp6d*rqQb;N0Zx<@ss{18O^K=IsCUd2 z#?+E?nvRzq)I@yPy&_G)xieCD&>@l}$11X?urhGvSVTXzn6)u%VGJ1tMFd03YHKIA^^^}86kIU$2_yliE&b6zrJ^fE1Bse( zfwA2x(2gQ6gqyigGIqrKkmw|Q#k9fhvX7$?XHcf#)AJ*w8N63o#To9**rBG;Qv~S| z;-qfz6JBff^nEz_G`yZX-+u+o20jQO7(I(y&SyV)O^ddQv0&+Cd!oDOmDE~bq-Y=i zMB&6DL_$h6tmK&v!*r%QpRHz#7RxqqEP}nTLO$|>aT*lO1^ps<;TLC{s2vo+K=8q- z@2z81t1vwn+iRH^H%Dkd>;Y*?+PbPwltineyha8zxO4Tbi!8>Sktdq;6(^&<;HPS* z#R!v_l9ZWFlKU)MI?Iw8gNTD@ETY}{5DBm!i88bSIAb;|Jqwk2CXsC}5y7wIJ5xb= zb!18n)9tdMViH<=h*`SVh$PvikzkpQfCbg`ydZyziDNz2c3BNgG5D`_LThqOsceSt7}L$OD82{*X=b{<9c zuIv>!Q>qjjAx0*xL`g%31oF{iFh^*I;+?KDK?ZrlSOngZ&)v2(3yQO53t-mB4=v_! zrB-$$Om) zTNeCcC1V0|y!tB>B0VYBSWS@k6#T#<+#=1iy&xbuP04*163bp4MyPfxybV>P9!AY6 zJNzgq%$6=_BKDs~MN98YQw$AG>`eWk}l63vbGXMG-@Wf^FwM zK>!m*v|F!(1(Cf}={BxGsF7$!FNik8`5l5js!SwHrkR}=`e!Qfp0-pYo=r(!@VEY* z$}82wX@88@=(AO=NE{99(T>O};8}cRwrMaF0!vP0qtvL0^>$G}xjJLHw~xmp!cN4p zDXbW1k13h5NNjR?m++!XOe%(ig@v{&vPRxqcowB8^*J;y7pODhEn!0duZp0K9Yf#h zq4NDC*4{8%vPxYm3(RR;aiBy)4E9w(9($QA&=EdbU3Nm%l?{i6p04-X7Ne)|@G7}7 zs-f|DYRP$=>ser<%UJHaVGh4X`eo`TRLW8F4|o&+;UG* zY#U^m#a3pC$mmlDEoz)|UCiDB36Zvvd*v5CN4_PTY05*RZvJT>KefnZ~V zp9W62W*zV}ZXL$D!3Er*lFH*GDQU(MWNZ4mH5v8{xe~@frBe^es3XsV;q#1*(WqAz za+Julb3q`%WC(D{AV&ED=q`L_kaaN3JtrZ^00N)s+5&TN^sW9cxx$TDzh(b}=g53=a9f z!?VYokq#^Qi6Pa7_UthpI#q2q`UCODuTW(gN!Ez|A_j&vIyvtnch%bpI8wN4fdL5u z*0H5nu7Xie`p7TC)k{*shcb$KH$}2eV?KP)lree59<|@ zoi?P3T4r|Qa&abWi6x;EoTmzT;ZXoBFzXO{aU}s9x2F9LAqs&J=C(5dU~JH zgYj2JiZ!=}g9+K^;(%9clXSk#4ovI=7k$ zb-MI~NDdKd*rSmasE2e(qOtKA%k&HG+}cziKXJxHl8B{EAa!A4fg{laVV>4$X3~7F zAiWJnetD|$9)&~yD2eOQMYZQU?!;}QyGR2%w{l&TSaYs`n~NteBNQQ0OyYB`CwGjR z8cj7HtTVoOW7+$2vCd>I;i7br!IToKlGB{l?B&C6F;U6$*&F8?FbkJi=;DW?8(3_N zy*N^x<@w7!bO@r2HJC1n3B%Wf{87QE7QyV1 z`GpqOZUrGZ7Ljt219&v@D2P>-8w~?ow6#8kj6f=< z$_W2mkFQweMztJdnc3^~uTt|(RdYGM7JS#0EdVa6@4SS7L$YkRo?g`2hR(TnnY1QW zPv~GQRjt69_=Wp$L2$GIrWNigr8R|!!#;tSeq|abo_ox3H>5uD!fQoRLNdbH8}H)C z`JlLqUq|W{O?BD+Ji(%o3(btm|O0R1Dfo)y^feThMIY!H%Xs;sy_E ztas*pc(_s##7=ijozOo8U%-pRQzpEHbmVB7>TF((3R;!<*CoYx#Vl&X4+sqBCpR1v z@Kmi5)lQ~+E5nI_)`R6`N|t^7B887DLTEP=P7hEwc!Dby ztY7(047fGju%!ex8UyQ?kULQ=~DEz)K~XJGs8wL?Z|SidsMh z=?X?!I*CUFwM>6bCO3|mO50+=)8CvI!`c2oSa7)_vEBi%o^+Olav*R~%K@?Cph{Ou zQoNyu#wRjC#33Qe6B^sJ)DN;wRxh+bPK zM2NzN2^g39nvy0QBB{t`jGT$@Mw2Ms^m7Y<6Zvg?^SV?9K~h|885|i}10&MPk8`__ z-X`x6JubxT@j&rj*|~SlB9!<`Egj2&WRvmPke|4}Q<$`o+*HVI5KqT2oTxuiRZ)%` zV-fVE;HQxsS&3Tm633@xE=&rW>IfTl{-B~5X!L$Os_~2QEN;(cYwGr8h;*3iacnUJ zb`{0?+{f`BfpB6wQ+U+0wVK}1k?a58|Nh^<%L3yWg?T$&A4E}LT$0@J@+g88*})MR zs-xV;n@r;O

Hfe}ido2r$1gN+L+Yite*hCkgJ3%Ss4sP3?^4Uzrt}Q(K?nA0I59 z!@SX{F%|8ak^8xwe6TdB%$6zrFMn6%@vijC7}g zk_gsrXk~-~(=wSLt$7m0<8(fUMY~jeqUp>Pyv~=yfmfG=5go&$l+iB#OFb|UdQs?Y=fr`9 z@N{PoWqBEsS4%V}Im{uo!pz!R-fPRHdj)v2Ss`<9>jJq7Q>q}U*AE1yh@GHSZ6EaU zWFwKRjB83WG;x1Qf&}#%6M{eDS28_u7!K>6b^g}Vr=a<~k_CWrD8^^_5b>0I<Y3A)cRzA>ARSVv2BNoALQdJz$sx=R3{F{DEx~6qBj>z}Z8n;~I$jikn3QHIN47AF)$Bn~bRXUUVFnmcO zvHH8_KNF;NdBd>Nr~P(ICH|OfIFYegmSIn?Wd`y%gD0cr*Z;r&js?IYR1g&bq%>*!;3G%5 zfHkbetinkx&xg05yD;wX0kNvFzu-CJ@~7DgT+tY4v>y$NH@TUN5tZ13Hflt0=T}B{ z2+Tz8D0j6lSb2jRlTNUtFiBWS8yH=2s@Tt%;qh$#U*hzzt9jZRI= zOz}+o?Q%SmhoO=I0zfT=)z0RrJz6zxTl~s`qT%IZDg;CuBz9KU!!Qg6-#!D4m|<#K`Q4yKsZ_jc`goL;(aBFNE-bq(%Z*an z>3U@iEmFEh*iGZ?W79|g6?^qO8w2T+E|35zwVclYJJF_+LK0!m5a(9uof2Hg=lqQ> zTHNC?s>24t|5(;Z=yLOtj%Ei|{oo_Df=xDtq)-YK^Tu;jw>2tU^!_Xo^a8GQL~}d)}KWV%6Bu`;_Q&sb6FYJ%XgU8VUTuiiMz;RwI&1dBxWiP?}h^cHm zw7iq8g(3;Ev=zbPqZOLrKp`A2uqV!WF#EnlESo)uVQdKa&Ig z*&kqTU9DwLCHX_`Jk;*LWwYX_=OL*&8IP?8ma?!!(?c~W`(MYG@Kt#KY>aB&x= zRZ^B1yser4!5{LVB(&R|jzC_m0A>> zAy}ra0Wu1vsUs45%>*xOOQlYY730x5ag`j{hPOM*`)eU}2Giyg?s&LWD% zeGWEek1!D}5E|n5V*TTzp%SE+(;W3OQ?pWMc=>260eR#N-V)}Ie6{_>W~@%I!_o_U zxAFLzl-{I>fvUT_HN&*SnMg*C0caT4b@NjAR=c?NZ2`!_K$u$oY{5F|hCKJgXU2a) zmhrUu|MS27w?+EoxG1jdrX!L1tfQz%<+5y~Md+Xiy({0GSstNyPK{X5qQM{eQW>ZT z^~LDrX?Zf1M}B)=BZ$_sXNq>fxp2HF_hT4={H=|<0T`W3Dm&fwaLVu`2d$-Uf4rL* zwnrA_D_kCEIlu!QKi*NcX>V$0pgpX&t%O2#eSO1VJkMT*l4vRb?3}C?ccDG=1uI%9 z(GnuteceYaFFCV~&0+R3NZ!)eMOF+h+6Y<$E1cG*+z|SWpw-@``Ds9@$ycHk^eJQa zaWY2fL;euDLlH@SJmecpbpmZom6StAP)TTB4FO>ydO6E*1GCGc6U92FJDIGe0onid z?U8$pMuYE9vw<)~%S=;-fDAA0A2A1u5V{Xb0*!xkbyFi#d=~!k4F>S*tqIpKBOKV$*ZHu$MNLj z&F8=C53dO-bDvDKa6zCM1YsOcq8BGSXyJ;XDa^NXY;C;9I%t@|yUpA%ihfM4LUvyK z1}p}yaE-+E6>XF7d%P)8Rzfl}#HR4yVel zkA=m4#Cl6*D?Y1*y&nA*Sr+3%U@KQ8;|vDcPOubdQt|y*-wbd}oYO1;v+pVw?7u1U zyWo`*=1cR9)3ZZ6hV)s$KDwf>im80&20Ayja`OD3XU}pz;*5;`?|5*g7vralLTh96 z-6lIlLM<#-in_1{%qZH7{5D4qo`ohZS=0=ZqS`NQzX0Co!DY9D)VLnv&Ase5Z$=bj zy2Old4lxc?NsOo2`Wp#Jl-t{AB9Y9AYm*MZ$LYHA>!1yp4NR8V9BlAb{)7_@pZn0B zsQ5fPSPU9-dD$He`f3t{yiW2!05Q+_4SyMmR5sfmbXgFyA_NofBiVW{-CnGzWtjPS z3^bL{v0FFEP>oYL9hL!AeVC`%vOoRu^Y^DmRcb&S_Q>NKW3mB29gHS`7VN7&Fn_sA ze4MU_)n?O&7dliGK@#&xL@hoetH%Ub1rRH%El66USGb$})`gSIGAQ;}*-4A!I->>s z9evI30WV2N{X-$ogycNo5gGDu8CNOCrGv8asr6n## z17am}TRM~UdX{J$CiUEw$Au5bNk|DiS!Yo?dva(~M=QK2QQP>y%Hngl24YER(%bz^ z^Vl93P`Wa1zW8lZp3LG;v{;qH)`@vl`;-QgzGP&BCdqN%a#&s~=Z{nytb6HT+&u+t zTjnA^*boP)^LCgVu-ZGq_mK#bJNZ}#PFzdnz zHnPmmV2HVhb47M8_B;X{PZqWud11^e*9BBSmnuL{ z&o3W84oI1WJ`oZ{Iu(qv3tQGnAz`-0P(VTfLFwIco5g}GfwOSKM zWgyt|kmG1x!GhGJLur!GVCv1&V>dT9Km72?QLi2VBDg)sFlJr}7B3eCfA%<)D{~T_ zGEP$nG%B+q!Eqx-1keLIqgi{(@Jazzbg;yL;;G3bG?F>eDn}OK?|6z`60_iGllZQO z`wq$bm$%zX=67TgYU1&8eAR+3GG!YFlnWVpMq`K`%qy1vu-J<1i^OA}P%5uHXrjTx zUa}&i@0b+)4{8){%5#+>oQmh<$W>y>$i8kDkwL2%#MDq#Bkd5&LJM?;aYOWEe?{?0 z3DY>3nvzpT(Gh4OSJS*Txd;Ads%S+0Nom%ojeKZZFC!Q1zbm5Q%q_=Z@mu+P)GU_P zR;?^q!s(S_njH~MXX#+MVcy&l!iWREO6ie|qrIr8AZz8e`C!haRV=e4h?RCCo{jN! zS6HO`{s(omLu2R*@E<-t&K9mAi^jvW!B@O-Cijn@sO8l{r9vG-|E8;1z0dPG54!B5 z<+R^qOxb5I$ME6f?R84u;+u%VdVM{rOHwS+cu^2(F)9r2JsFdFig2Pjm}Umq4gfts z!oLhl7tO2R<>uOrU6z}lS1zE;JFy+1KQj-Qa$GC}@E~{y9)8oF)?MdYg?$+r{vYW_ z8$b+<$3s;V+l!Wrp+Esa8AH&FN8_J=`I`e(#K<>z<*z3yhBJS83ClnGK8|PF%sols zeVV@DI-vdds-_5=#TvQIb2l5bd!!icT&s2v#Ay!Cqmu$8WJ6R6@Q4@LVSn!I;U3@K zu4HfSqW*9Jqo1vh|6y0;peRi$ny1=KW!k32%gg=4H+GsIfBehs&1d9mvWHCLuUZ`{ zG83XmZ{fzo%WhI~aO>y=4_>1%dxi{5VWS3(Xcax(z@3!nIVKgQHevS5C{||-pPbwm zD*iw;B`h=>OOp0E@wt-86N8FeC}{vprszI#W%41k><5zsg|#y&pw(VCPimfxLPpa09hVJy-6=swq1{ZF?eY6mhCKi>`5 zp>MA)`Qg%a`*^z5dz6LcUw@g81}YJUf%Eu=X~6!!=ZBN=<=x#CkGQ^8>12wgV&kxe zQLt!p*L-$kp44mKKM8D1n{lS>DcwB-Wk*%YPY)*&c+J15JP*q zRs{`oh98pIuxfflXG5j$BYgoTvNUZcrBkTa+p{9i|m?#0Xt3{k)^St zYj$yNyp=T~QXcyai^zy2p1KbJkMmFn4GpPOjEfG5@2wMcrcvQ?mbavRK-m_xrc}HVb!Bs(F0u0(hPnJB+as}x(ved{*X>c#V&-O<=JWeV6 z1ErbAf^{V%8ehE5r?;Pf#PVWa=?w-T&B5uIYgY_n!O+LK5U%xXkCuu1sI)wB@iIq; z8jmNVAw_GIenGF0^v4|-Sad_ZqJi3>7DPTF(T=H};kA-QroQCL2y+@3Cx~2VW=E7P zD2h@Fh?`94jFG;0Bz2DhkB(pY7)!C1Qpj}Pc{YQAjxdT;H1;`BV7Ydp7w`_dgEUj7 zbFxMv&ge%R!FEz$$ z4%iE&={Pf1ew9Q&RN%y*>_%SyclE!b4e?o?uY}<(L}`azO`ObV$q3h1>KdAx%%cov z*&JMaN}uzUJ6eaHB8g=&XHeK#2@Q3wuar6!VZ@p_XZFydO+q$!KQr$|uH`{U3T`$Y z_I~`~idLbXAeE@IjP6i#c^r1Xy~)EcKz{wUm`+!8Eu0)Cyss0?QdZ=8%A80+YVqr%+OS2+ zm~x~_{cbcN%Wf@Tq#=R^Im6fiq=qwqfD+P&MuOC#m$ap=H2~kI_jv0u6$y}0$&+Ur z+(Q%|&woyq6TMFufKZV17w4CYtS@zAy>dNWLh69X5i|aW&?I=XiAXRCPBEaU z&YAY-{&FiFbWYQ^EQfB%e9Ou6Xl!=JESz9giY;c7+V_`aaVCU3Ct2qz=wuy)wY|%_ zU_uI>C&9Np`CqDt>>#JORs3Zi48h@DLL;Z=GCA2pt%5OkA~Bm?k*=(t7qaPaLA=gd zW+viSc&zcWh0x;sjc6}e`JkUz2^*(;I}9cFe{@6hG{~Y)Nn1tfCrT>liNKCil@Wjb5`#wR$mS~-!Np8MEMRscZ z=}(_wQ*}e+!Mz!@e)v4;clx+crg&j@f;iJfZ03w9CBPJ7w$AwUG({|LZT7q@mpFNV z#%Q)ky{i7j!_#b};>hEnl9af)Gc5o>Z*FfzP5-jo_Sv!)utYC0Ox;w;1=J0CX3!s? zGPFm1^9R+@ntGk9t(PkzEG~xG>%1)tHFftkA9ja+UmR>mG*_C&R$7xP{^@3nSESc( zGjUy#ykpW>!NHW7%X8V>d~D6mkx`h>Uoyqo0>!3!G6&OARq*$fm{J=p*{V%@o2grU zHBxpDr9Qw)UXKQE)%suj%fEj4Jb8Ye{q)mc|MUO&KYn?g9R%wXU}QdhMD{Uq86#Xj zG-x%SE*3LJicZMR;v7hY;j*rfjFBe&Eh{9~|FMEzM*J-wP77Y_h~CKXurlGIn6vB+pZ^6GqJgP7TWjc`NxkxQo5dN zaZ0lYh`!G5XNX>ADie(W8TO-8tA`3a^9QPE+&OQD&cHYBI&k-)zaa7h=YlLn#PvFws+DT zt8&J@X5)Q<)uUY<~nhQaI5#Sc=q1UyVe-N_|# zT60CB>H3mo$)<+YU{+H^m*9m2<&`GZQOXPf^+p36BJ5@_QhX|zCrDjZ0Dzd6SBh0# z0`sXUjvlc^3OLZYOPaKnCs)}RtV5618x|bhjCYw1I3~fV6I#^aAn*@5nFu;ksn^vu z6#5=sbL1oTDH3qqH{Ffha4u@v^&@s9i&Fm!rOe>NRU-Nfzw|6B)~DL17IqWw>My_k z#+1eKg7dKn28n0ycBpC6+KdJWi>UM?BesbSHfwFRE<_CSofk**TC5$KRr$jW0^R6@ zrvU`5-4^N(Kez9WlCw`2mK{ykCZ=1AuxXmvdKdDoc<`9>v?@<`EyEQ<1>OgFq%;ll6CcT+h`Jqg>^b{YKA-1>Wd#+tbOAClSmO@cxkO#DWCb(_$k zR_A%we-!R@^fIzHm11 zUmDLL7nKBAn^%?4iHP&eG-O&PJ^z~+7EMoDf`q29cJ_xP(x#<(EU&Y&@2Qln zr<<-UUldWD+K>%R&An6a{Q!%P;-!nV9A=Z?kkU2BzZ<87oMC^GajTn0yR&qc-T>Zo zNmFhqns;~i5!EI=KSV$tYF@f>w!5Z|Le7jl%GhYs2Zce-{YHw`nzg|lc+&`ax|=pT zGK%uNUaS_tqqzVuHy3ox!3gq^-uwRanmS0Bb-```P$b9G<)t9*z=JLlYd$~E7yYho zNXQ>O9fSbw18M&8&T%m$`_~KRI;>(u!Fb)lk~MThi-^$4jod zycfM5T_$rrDL4js@?LucK^PIOrtOha8Xq@4>`RG)sUM=e1a=jzO24EH9;8^a=c20R zH^~uE3BwDKA(R%h@tRr_V|8!6amDoVv$VWu6!YTb|IPqt*?R|*%DnfHHn3G{$U}tf z!!#5hfDiVtH|mlAqhYtQGD7%?CBfQAjqvHKQsB_N7^u6C6F|$~?x(+gePkj3`t=!W z=;j8$#tS{h578kVk*u(}2$p~PIH4b=&$H>2?HvVJ3uZQZxn!+k;=$2{p9Z5tTly9` zFc<=OAXl&AN`pe{psWXjuFaSS^-!A8#HW zC6`|otCyt|Oq)MPXxzj7BX^=OEbES3x$jR?O2TqxG+e=Q|I=^xt2KGHhktv^(_gtB zQU(v!fdb9A6<+7FfL132`a0!;Z;Bzf?zu?^#mvOw@gn=q?1)5PejqNfy;Sris; ziU|b!wh=}tZJ62{X9m|hCUr?4dpXay_PD)~6b|?nu(iad<(6!V7zC;2D9Up*45waq z%)^h@(i?CiC3!g$>DDwX6{)p+c_qgu#acKhAplbnqE`vSazU=+i%DvwV&x7}P^tt` zFdX3o>=-K{S`TCe{T$PYJE*Cs$?JuSyNLi9*P!m?5wXZ?#c7-Mo}zf6!!j;UCFJ>J zdf575r%e|1C9As;r!Q!-s4(%-SRO~1g`lKf6wcDoCA8ts`*1j#FXmXJHQ2}0`za$7 zQ#MhtgYo#sv3H#2VD}qyQ8Sef&E^cP!{|vmg&W|;YW9diS2&~o?v60F_Cnq>+bP=~ z2w)};d=a{{ER8{i~mT z`#1jl1?~Omc}k1^<*(2G_Frx>01=cpgV5GCnwss0``@_cx}&_gKypwvD+AqJ@llg@ zIJ$azyf<2cMlBe5fZ9l{;rIscHMH|ewj!T~-Cy-c^y_poXOy%hSltQoDoRJkXc9Sh z3Z`uY{ToPAtJ{sFko!}U@KkNt$eMHACyO}%mK?DfxJd1(xe91ZmsoCRNnY=&nU@dw zmQUs6hnAw$IBQGw9tO)7GS*GUk@;tx50t`p|Hyk_^+&mTdChpB;cSX%<5Km}wx*n9 ztd{n%Dms_(fo}nnZ#N4O%k_t+Whg|lmaXG%Llr4$gF2;Q+yrfhNf@KYVpL}J!S1K{ z-2&s(box*%7&)iolUBZv<&mk7mDP+%6#PWPT%8fMdUVsf?zt$5d$?hJFS4!F0xRlI zBpes#+f=;FcqGRnlyMPfkzBd$ha91^jw+cOCj9ALj_@qEXrg%c1g1n1Tn+m~E%?A} zvofrU34JK~!Al_O_@52^-sWwwDL@i)@WO2VPk1l3(qpCUTz|Z~xqrB4X|vmqoWukmm~F2_tz!>MqxJgu zOyWWh>T%pC!3tqmja4m65MlZO{(E``rHP*qNZ0?=4JnisN4qGZho0O;!;gRu4T4af z*^86k$f$FH7|I0Tj#`?$Ywh+;?uhTeg~H^a(}w>2@d+h7yz$qQn;>hsG1pY$SCuGa zYsLZ^414r5qiHZ?8|`R>67+qB*NKt&`~Ub~p%T(-a-}b}2myAo{tdY6@%_VNr>kuF z-E7M+Vj5F>jKyOG;<#N$xyvM@1JQ`1^zjm?v!I`WRr4!Wd-T2tj)R?R5=L#UPZ!Jy zCGm%_`v$*i{lfX>-hr7H2tSMoe#n-_!7Hgeyt$4dK1!y-dz$ltN6CsvYE>{IC6VX^ z@h?9wH!OCUq{_+rnYdVPc?4%i>Pbv9tL5V5v3?Cr(N0GLyY8p*pEs*y&oHI7ZyQT_ zWl$?EHp9nT+tnPJ^YdK)m~~LbxZGRC-H>GrI^QXY*0n9Am&Cc*6dcBJYi_O|yyDQ11aR41$E6-|pVtay?^np3R99 z6N+L|viiGp0tyFfY-Phxh~&EJsdEzoI`+%l{OzZoz%`o)aB+8cM+0A8mh>!PHD-bk zXAUs)Dm1M3xd*HKY`$o@gf{Yv$4tgov>mP=#tR_@;}`vYdC6TBw!-zT{nfw%6e6aA zj}Gcz9G7J#m6^>mlhYW~OZG#57E(`mP>%SGtd}#ro1G?^m+0Dr%)ml1wUO4SUEChb zkd?ck}KjB=OG5rCJlfkNtw(i*Dj?6A2Gg0fU#QUoy3n@33=PkwjZrHs>P4TVv+D z``q8A$WNH_g;0(P!R4tHd6U5*6U)Uo8ak`>hVe9}w!S4;?l?U0jR>)o~Dw5B0WMn85!ITD!gy z?y&QmdB0CWX~rx5FV?L?S}EO;B+yVk*9z{p$ZK3j(oeaI=n^$myz5JBCGFNST@xrr zWvf{T+T^KnuMk1R(#x&+d%gyu)bNu)bwWdRLW|-DsUwY8;l7~m(m#j9w74vH;D(5v z6ZzX4>D@8M#14{uMg-$qk3* z5gX+H!>3Qw79(qZ<MI(JC+PO$&BxgkV^Lf1&D9OkjZMm2 zJUiqRgg88rRVu+JP3Hq&)oYUXB4mkaDo)EL(Sfo#bFOU7sc4xYh>-{4b1;+3{yBxlmq8L>Xrb%8laV|VWv}b&M%{J)e%7JfTg;e58 zwpk8L418#$lhJ{{{`9YKq!LK6tD=56x&nZ~4C|5b?O1!2a+<0G;ADl(hDiW75Hknc zfoYX`lbq+u%WwZkFw)cgR#&4Ygb*z zCj43mA2TA=Tk5^#V$PhA>)(yX9nDRU$nZruRbfDdGax<`p8K z9STf&EA!x{ns8V+B3UA6-f_=t<^p59IcMCbZim2ItgqN1r z!^8S2LR)d=;5@lN*7pW{3zHTZn37hmu7}FQz$s5vEJADvhczNH*E?HLg^l2*bKa$aQu`?%_Oqw_nbXdrW0Xn9l z=Br#}%!=eR6{Db#F`1vYgtS*HXr#vE$MqD&9gw3RY%5T!0PoI^I zP;b4=zOy|rCs-TQGv%#$F1d_o^mx`W^|8u_{m$J4(%J^}(f}s;#OAnSH>)IYlkS8Z z0nkKzj}fyb0yb59ZRS=yiHtQOCyP6q8G}M^lM8q8!h8VddCQl)_RI818IRmxvtkhY zgDdP%Uef_G1B#7XiQkG<#Lel=)C=RK!P?(m$#;xzBs!i=()Dec0QbT}FjKTdRG@o> zv20wZR=oq};m|Mx(bmN{RtRvkE#{I`YiEhssZ6G^vy1EDT}^CD;a812FB=mjMI62L zVb)~t)VtYaM0kY`!OKBPE)uWQ3u_cvm1r(koL42~bFPcZ#3THq`1W;3*@=X6sk5Rq zrMUYUVqL2>HEPhzxM=U#9Wy`d{o4EyMSH?G{1Ohew5nBoh@ z$z6di<4K(4yQ5!>&w$S3Tu8V=e@|ue5vY>E02PE85;Va@V@zJr?}D7fqXYg$k$~te@jRP8^H01vlz48qvNZHb#(o^8H6s74 zqJSmjGIvnFLsBkUArBglM_hrRPgU|@FUX>EHN*j(u+h}lu`^Mgi>h`QzQ@N0({wh3 z)!54+J-WWh(&Z&xC`P7vE!pc5tEvo>l2`~oq2?q{-RI&YMSbnj#FSn3M&(wgrh6?l z-4lmOm0@Wz&ax;)H_?yeXH2IXLBLORr^=8N{mNx9IQfo8v%I7r6eCBB5LD~u$MXNKz=InwU^-ZtIPPn^ea^=wAM^n4=GK&X*5Q< zx$C=z_=Mj?2pHnTwy2C#hn2;^Nbr%%zLlCTU` zJbeWw?me8&AIOO{?(c>Mw)UItFpW1+)+PR&*>F7?<7{Sk1w?s$t%NMZ6dj0#NXu8& z@~9RIo1&21O=aPKz8Mog>~^Tuvt6lJnk3`R^%Z*|s^$EZSpMS6$J@vql)Kr>tg_XK zcuKrRSR_?Oq6ek^Q9v~Y181m7&vR9kY{Nt7ie_&(YWKU*76^F49KVTH`hAF^1urwCeYBl#h;*Y4EGjBJCYjrv#5EVpZkEqF<~x5 zz|>`Q9BT$EhrZ*GQycM+Djn9{@WsfP9l{`#Ck6pIN-&T<_%TS&I@>K-jVz++pDGJ&6}eo(FULmfBEx|N;jR8>!|MADk+rItvi~5n;ETrI3A);$Q{$b zBy=>q&1OC(F2$@ie5*-EM{Y9dGTQ>KE53s)jN?BofO0-;&H-*E=Oo@u-(=5ZK50h^ zEvHZ__N49>ksU^59sEL)eL+F@Rz~Q6@mbWzy z!5Fy`@*RTiu0i?&*csu=uppC|yU&la1dnON@Uq8t&j`~gxPB&AyN4+rc}(CnS-4DG zELqa)nD7X==jjZE@p(E$JVgC2)hMm8D|;Z50Xz#j5$&NRG33>F8(f`1kLo>;3D_7B zPYn-atBVSmRbAN)jAh^|tmsha5vs1pl9r5c?EpY4ym`uJs+;4vm-vYG;rt};(j^0v zBUNr}F6lwhMq|$CR~Ahna`peb&q_VF(fD^Eet4ga;q%3(uKk#6oLh^CmIZ`ZQGFZf z0mJc_a3TGSP^JxRl?Z5Q0p0>paJ0yqZ6}Hgxx-Dh=xV*%PWmR&$MN+gFEF^e6~~Zh zFomr}%y?^yU%Y|z(J*<#;M@9H1e|ZtdX+lww7b#u7^xS!SbRN1fl#7SkOqk%$4Z9- zf1>&=B(O3aHbEPl=KopoQ5;T)8$7-)#kG+m7<+QJ+AC4@O-hoyfOXmbtCe)@6n->C zHVb}-a-S?-?o|lAQY zK98HS7i2q&fl$l-;c3d2JGr~L{d7w|yu7ZsAF_&mz@Q-H_t&5A&5R)lf>4vst<%K~ zf&YSi1LDsi$cVwkV01Mc^=Xm0JGAFC#H=jYgE*5cw*?#GqeNKC?iaf^lPogljP0Qw zA;g~q0{k_-kCz=a%^7{pS}60kT&yJ_2as>uFe{TxY!~yVMcodHW1mfwpUWA)&J3ro zq&#XT!BW%ka5u+IC6_MkSu5%0g!~qS&T%jJ#}yS!5^B_bMOi zojesTjx|jqiL(CH%~2eR;0U@ZV{y)Tn9#e_(dae)Tbw@mP|ieGE&u#u*^KbmC$lWM za;!l@wmN+xrQ!S0(iPUjn z;adqz8?Afdzde78m zy1UC!C<7zh8ugZk$wvvONnJAW4I38v%|W`KxgZrD5rZ)Tj!p5ItnkGWFq5I%75=5! z!z*jN+mj)I&z|&A-pDx$&6EszeSLY8h}mFm;f-E#%`10QDLw=Uh9Q5O7M&~n^Ntx) zdXvC8Nm#5j(&sLFGJvHEyKbqv+c1@gcDguIR&%SU)cnb`dQX7uxmBR9HvQF!xi+o` z_b=1ipbFhK+j|N+9AQ*(W9BRS<2fw>j&uO~yy`4oVe!ERGm>-TY z8#v$EyP7kQrOWPYj<_Fi0lK#$tVCrZ!_4cl>9H0i%7&Q23JYk=e z*Yg#vO0olujdyVcWb(x1-QOCh!u!|V5fKV9XUtr8p=#al6SbrVno~Suk$oadEB|wz z)E=&w0*UqjNBYF@(#ITs)P3^La`IFk^)w&Oq}4^#-Lpfh7#|RR1`b<@q=qJHK?7Ob z&MoQ#bFqj|bjT0~7IT?<+_==0XTq?tc`SOZjs(4_xP?zapg4&N`z!7kel8b^t2URd zRDV5ZH0j&`CxN5yuu3;?`c^#OOh$pTlHbpT@>C!zgyv&0V{VzTcxdJ~$uetAKr0KX*x(g;8V2TmQS<*z!E;!O$KZH8mYnf8It&R<{2 z6>#VYQ0c`;7%g<`@0Np_n&YhmIcI8>JVd@=zNtfEePOy*8PtKD36# z2jqDi5tG7|yIp)Qn%QSFn4!rLjZY49iv?Nk-ZZt3slaYTun>L>3`!OHGRb4-4o&)ceX~Y2NbY`eIcx8>8f_s?BV* zK-Mq2^($?i&F0^qkwN>g9MKMD3M*rB-M$)Mvb#P%!Kp4JG2DGW{q)=QWQ^8EUlIjk ziXvqH_RGCkIH#Ch5-5hm^4bc%IFdEO1fvi$p>%{A4aVWhbvIWNRmEYizwEc#f$llE zF`G&1z@|z^fJ{y{j0gmlX12ZYXuMpKB*B&wOooFoa|DyJhf&PJ0FIAS+(czU zWevLrOXQ|WsZ%}q%(O9NyeUJK`5D4?8sTrHL&h(Z+60Lu6KHg=7g^~Mr`vtQ0(Xui z;^!%p>R}UYRmSTG;`dR0mE5i(Ygje;@UE7OcAXTS_is5JWV5r>CCnAm>xpe5AWUh3 zg#h3b;d!Tmk>`>2a1};|XUCxk$`Tq+><{2kqTYaZVxTk}WwW-rEjI@%?dno&I7{XR zKS2|_=rv5+N-1$Bs-`?qIRQbpvg1%S_$n%=WtmYco^p}$X+O702@_i_ zc5?dQJI2^ri-AN-9rk?9NJ>-iPD+AYE}7O>*Fyv}iWPZD_&r`ce1{aV*&R+qx<3K-hB1DFtzS+4jlM`4l5ik$s z3YT0+0cffERmKua8FdMZ4Q-hK6H!cpJl05UVL5D2u|1#sAO_vn!UP&6xj;yB(7AEr zWWFfIEVEpnm=}dHd+$u;2=wfAIQ$E(pX8pkh7U3NT$Y{%w=cCZ<%>%pI;J`Mt8C`t zo(MrF^3MNGy2CzY0up}^hHJ=3ZK&4uG9w_2fk=yULG)*he`iq2)`=808*(q!Ukrx+PZ3F6V#T`61IuP4T#6a-2yzyKbz}6iupBvP$%ZOUm^e z`M{5gJ1G~Z1>8zqEdwc#x28V(caq?~o)2lpxw7-_Ob%%x{tXu}4>Op0BOlIUVZB&q zi4Vk891d|i`Bt`*mA&=mx(~?6rA>{Hka4Ddz}5k8lDH5?o9q%BisWw5PQ|vSv(3{pf@Z_r;NM|7BxlX&TY-e1pCRE3^Rec&^qiXnt0M*-*dKy@#!t}(KR#lXJ)`SXY?heOSEL=Ovq))U(zlrtUN`VB*+Zu`0^!nabR?0%q2G2^m0=2 z2-DrblT1w?aXlnWID7PYPJq)`ybbXnX`b=0o=PT!(>5@Jl4Wi@rVAg+#L$@vzX4E* z5acjF{Iljo^UV*{U*!~j(8@Ftu&CU!6Pj8Rd`^#~T)>|Dmx!Fp@2GGW#?)vtGN{>F z_j>E{7)l4@RM4l)H)Qnl^CN?b)p()-BCg+?&Yp4JX0$Y9uIQJPF;bEBPBc3Pyt#uQpH<+ z#6-XB7NoaD9_}&w#bY7T!sB!XHaV4tC=U^p~q-?(HSKK(8uisup zYqQbV;fp{VAY!=#)@HD&C@DMws0eS!NKBvS*f=50*6i=7=5KYj+mL|;Xo$j6e4bog zu_w|20`<698cA5AKH60evx}G_zM%-aZ0xCLB%wnlCE>T_7SaDQMQvKn_$U9~t|dv$ zPU3V~jyf%d!ue`69IdFgwH*wrLtij)mQXEH-w5XnKR@G45@~)1(exBgimyaQK%jdq zU>arnJmaP1W$v3KpdM<)RULPeC5Y`c^mi!=r(G9p`u0N30gEV>CG$qzwvj3n(i|UY z4kPI$p_!f7D%F6D!RJpzP?*n@$cVn}5U?xnQ@U_Aryr5vDZOApeXNHG*s5lqzQSfqa)`F2@5I&sV}NlTT+bnjO;k=nboXH$ zivq&Sp4a+HsL_gyl7UV{=7~Z^Y`9xaQO&$~pw+=h;6^V?G0wf1QxQf13;D`a9k1BD zqcKHzC8JO3m#+`WBUcvV4(AIm{@%TPUP8itVtW6NIuSN_mSBkAIq6%4z9Bk@OkjYU znB1r?OqbLSvsgc{>t@Hb52$&vm}<@&4>r@)u>ZW zvn~f7YbsQu?@X?*;j5wRkXeO)FGa+lVnB$R3qo#S96iVdAa*@yk1c)du{Q$H-2Lp~ zXq$v}NWyrTZ>G}?p4Zunvdl2<;5o{G^HNqOwf!2!#9l-xIy zwHGRAJ}2O1_w(9m=PFGb@>bd1ec2oz?8LDSsB)F5L>Q^Z<(LMvg3l;@#Y=Qxu(FR@ z%a;xtQSJSB{lWQ5oQ^JvUy8+~uh{FbiFPZkD+?wLxN@Rdc+zyj9BShJ3f~G!4MF#{ zAU|vHtcuv#J;aVEXJSk0g)756reYMz3)eR+hCkii0Ad#LvVZ}*@ab*d#J?AtGfCKl z-T@*m;iGBRiFe_OhJBUjJ9leZ4tj*_(5((i701Oroqd10e|(;Pe|-J^v_i@ukjx6Z zru&n&PKTHW^I*z~02DTK1?Gnx9~9)?dxNYTzE}7lNnr_+IHe%dDM2dle)&$@wR3Ny z(bxpbRL;EeJ|kPISYFxalBm6ppy3hLPf6ym6k&^!Vys%5iX|X!oX4P{@NL7q5G_E1 zdnlSvzAV1joYXRq1{W-EDyA^^TX8p-G`>$bf7{!*%A{ai@ho^L(rR@ZS&=F4%Qk@g zVSyB!aYIAXu$iOR5ffe|W>vbfWSJ41i^=Tm;6SF`wpxqr!B~*+1V%hFTv0MO-Z@b6 zxnx(S4?`Ax8W>aI9Iw|GakA;lvmE;K$a>0V1i$w#MWbj*x*$+S9WuJ4nU>18J|a;v zO>&cSDN}B%H}mH|CgO*;Ps19sT8tO!eSsUpGet*fgf{$Fxu^z8rA_=~7c>wVmO0zZ zI*x36=TGr)N)lP+=|n>Yu|%s_*>rxSN83>jHs{3GEPYbt(VlOP&5|_EWVl3ymtAE7 zCp|+eK>&bV(*{*4@MtTpRMD7}0Qw3aWu)U~MS5qq@O$8aWBEgrMrFQ<$6BZvg^EgB612=#}ViRYoM3ni&54UCJ0x_J7#|R+Z#LRLo{AhFHV4gjM9EjlO?@tf2Z}$(se1jB_T472Sv!l(N zup8c04>~26xM-=JC+m&2HCHaBc00CY!yx;f%*hL-yzO02CQ%zr>8O(@tv)3_;xLd^ zWka~Gg|wGmsXnijg1KIG#cf@-LER&VE4?)I5VS9v74}O%Obep{fNNjJBa~cl54IQf zA))5tF-_3=9iv2Hl{1F;B5!nDY?<|ngh?&jx`KFnFBT!sCwC%SsQy_Q^3tQ_%n(## z>^SaB0$0N>vqTr`ou-<;5q}7YK8!CjmW3g~UNc%H@+Lqn{+~1kma!0igu&p9dUiRSc3KFzB$P$b)wM+Js+J485nE?`Lc4b z3FZ?T6dBK~XZbZqlAu9?c&i>m>XvZps7nWjIm3{KcHj&;aD$tL1vM$a6>yty6q%1* zDQckS@ELPG(t%j)!3f%y`CM^4^FOGfOWdT*#*AY+iwVtijuNOmuHjI=0>3KS7C~R- zz4y9^XLKI?GvlF>n>tSSQYU6kr1sc$OFVx?*_!F|!^6XGU%&qH6=t!;j&&k}RBaRd zhpARV<~+)?PxD0GW37;3-Rcl)?hY9j*MKe4=%U^p47WJgW-&1FSS`Rc&}`^9X{K4< zxE?BVUO&amOUi4#?c5EyV_4@AEc8E^i8xJ@G#d?++dI1$2^7paqQ5dTQ{_o@>H+6u z@k*YrSz3*-|0zsvj~-9t>JS(kSY%%<56^l$$sb-hnns0_aciX@*!$|VH&C)KYC z2I9jXN#lzQDvAU0+PhK2P!ya*%=GiK^`Bn}RAiEkGY<6m_XdPjrmYXQrjrIu$Fv;5@pg`p|fJFG#}^4Rtf&G?3Yer4q7Gk z;uCc=MWg6Wk@+t~v!0*cb`|gtqzp6Akr6Qo5F@9{h(HA%K7V~_){zFTqp(^(K0eaX z3?bs|LyhSxRx=_#oCyaUtUYuP(RX@hG#=so!6>!al?rGHbpxMtDU;<3qHeXAE^u+r z9=|_wO!1A&S9P&D(NeSZ$Wq}38J@Z-;rwg5-h<6;Bni?6PypDHX4ahjPoMwJdvE{w zncdl$k<=tm28W2Zkiu&BYV`E5*#rp!Rau#lk&$W*w$9VKE`-FZD`l!8vSitB! z&OMRpHEZ*hw)`x6j`|5pf55%aB#ahdR$_Ux1r!x%x9F;h%e38CypgKzIpyrbhfc38 zZ6C^r-w@pp{V927JFlexXF!<0Ipz{lMw<7GrKa_uC<#2^epx{}4?v4jw9jh*AeFpn z=g-^A1mg2!_aDzJlu9&|7pr26 z2v`1O!&D`vofJ=Y1$g=W ztCJ-FmKrjO-psw>6YM0HiOuTU^5Kz6J&}->k1=V+Fx9-Jh$?0WBsJ0A9%EN}bB;_; zr@#(}*&vRUy~>drY?0B+X;`}$QORHkFSal@1 z^bWsczN&yH*$fhYH&$UJ1n%TOy^s#%Fo1fpJcd9|UQ9DJzbk|zfS5K#byx&$;i8yP zH+y2l+eYbf81CtA^}et6FajOkmfsj$EOd<{?Be9Z?MgN%hoSjw)g;~_Z``ao>Ep+z0=>K*etH{3)FVytaql)#&l=C0%l&WLn%8j z7mZ8jaN+ne*<|+ptC$~;Psj3R8ERkGqZ|wzPUU+|GObsXY#x_`y=B>6uQS0`N>)+J z$KRZ+URC6uZDuZfW z0`H`gYCmOcCUq^H^vsg1uCpvw#Yu1HD@>)fGnBmA_wd-=II`{g>$I4!mr_4n=h$2H zc?9+EWOm(2rnlY0IL)hiOV0iZ(>$jDbW#^-@^-;0q72yF$FX{me4EP5escmvr#5_h z#UoZccVZ%8o-o#oMpnocZ5n37D+4l}UCu&;MVXs++zz~v=-l;lF@1qv2rLa3wRqp6 z)g@vkvB^1Qw94JE!;ja{SpSV$-yY@=2C_I?H|tY0wgbDl7fD1Wo8doqnh&WZ6#RNT z!t-}nXH>>3plC#<&C3;8X(H)vz;vjUOT@`cVNh-Z$A|IPYU>#zv2%ZhfGB>wqgwn6 z9Z)vCTxCfug|}poZ+vVOmG&V0eT!Jz3&edMo^1+plQ{^L#D@iASNw+;6MkO!1rOwRZn zm@>|kBl&MZWdJ{eYRdyzRH(fp1g=)FLGjLUZNuO?M{1F%!`1@k=`usr!B$93t=%jO zrk@PMt$HKA(aj81{u&2HAQFc#(q$!+k;)i0@fB*71I<-A1dD3#j>z(J)0-=0u00*0 zK?D*X(>jQ4O(x!4C5KAkQwv@~uvDDYqWV*^YmUU%9AkA|OHSfjeyuY@5mfC*)EaK(K@M@&Co>>{WSp^J_B$&&s)@GZb z1XMZC(?cmqfpLeEmX=;vs6vF6b3~=JBbm|T4z+UEX7q8|pbI#!hXKVRbp`KU#|Jkj zcfU64Q-xN}t@sG9q#g8NO?|rt*Y`53E=DCLf#@`btTpfu#??=Ru#R0T&r_iBj$)Dc zb9#vLh&9RNJnk)VZaox84dU4N9qy(ul;p!djE2j%x4N-5|}2Q{)za+4JXGKDPCveG+OjOD+zhf*s)D5MlX{_foqK z-aA)%>S+t{#LvoAnaOclye+HIcq)ez3eT_m8DTAHX)EypKCot_`gXp3+V?+cpPts! z`nX~C*E7C4>~(*1yS~%?L32wrUylckCPX&Bw}sUKIS#bMtowCjJgX z@LIfWyh)7j6%oh(?F(8+#M-I30${6(xcS>?h&`0;{}K@oL&GzXBMbgfVA9{X6eMMP zZ9_8vXDg5TnDtUu*E6WdsTRv&&8TM_)uH1=8bxqnoZI^LaMPap(ctX9Z;p!SRnhU* zWqy0C>E9PIzrY6E( zJQ$$yL9M)pwRUpXRjSQNb`fd9CG6ryLbuPFll{cCzkEFlJGRg7`ISYP(lwr((N9`~ z>!SSHJdJA`a@BdxhNI<$l;hFJ+&-o`uaEZTddkBv$ZVrOIF!^V`*8Ey9Xp675aoZ& zg8((DAZ@04xs!iO^CM!DG)RRk(mlbkgm&&YR~0GThAunfTj*C{EsW!l-_nZe~ZWJ$wl2RbiT6K~T)1zh-o9 zg9!2FOx^Hl@N0jJUA5*f(j73YG_&ab{xp*38JvUfbk6R5{qFsOz9GkR_n3gCTeY)x zOPs06@}4F*7TUe!G{HWvMcx`^1Mb>fNU9Q797jJTXCW0`IY#T4OlK8&0LvVMIeK&J z=JIIipZ=f!&uBzYwqj+E7jOEgT|irWkg-hjDCE{;NiHaBl1t7!c0JWbRVy*!*=W$-q#cu!a|caI~M=}1ms zOkoO+@7|&w$IXd|w;CP&ks+h@9~6gAw1wnP!)?U=dgb2>WWv8{k{IzMk4^c!e(F1?@X5M+k6S*x|Ay)1W&{@@JVxVY8Sq-DvG{D_loFRUG`6Z?Kqy(0Id;k1PdA-A8Alaa3)5lq` zmB5=`sfyTg?&+j$0~LDv^5qLWY3W_5JnU^zZ_khJy7lQSwm2W22B~ZGbbp}+=5Fpu zMR2px?xs|gILy*MTboM@|n7lBDa z&DZXy*-bEKhCnDaa8hCoKoN*%%#|)fhAOhpb+xM5qMKWrvTw^06Fr7+kfwtG%e@Y1 zs>z$`m|WN~yN{4hwNBG9(<)y#SW6CH$tU28X`nKFGcnDCK!cLu?^P+566d4acW_C z?%vALpNtFpef|s6e}U^)PJ8lCF3OCz%eWnWH-bF4??%hIc0XlMqC=el&4>Chd=lks z--Mz#rBLfWzJzE((U}DgbflO}i>2*8n7mPenfRbm>Z}0e$M|Yg8b2ldUNjZboc`JW z`~Qx^0SGFAvL&GqGB_5qsKVerJO|UYHSCUau~)qexeV!qn>I!&S1;X#X&Fm697h({ zcD(lOl{eLw<5X?u89kpz>!4|_y8!uztf|dsS0khaHD|^BbHeL4rx~X0-}&=&RaUWb zWNSIA0BDel9WBu53soWM=^L5#OrV>L*b6^$Ufpdl5_&Sh6RA}MU!qYGTg8uC8#W4a zUn(a)+W~$4vYmn0m%Y4O#^Q7yURJ(MP{lYWzvs7U&>wr+bX|4Xr# z>4`U;n(*an=jVFuerG$b+*Y&(ZSC4i4qxQM5w__R$GtoiSBS;hADHCE__m(yK+$^( ziLXNs8qRiKJQh^Am>iYXQGn6|rQ!Oe`jhm0{8byth3VY0KYTRzmw_AD?MpwlT^@~P z53kqQQ{#9qa{s%#58WryA-8gK-nPpTP}jm;y{sVz>Ut1%8ISr4qxoG-7e5&x@Wej? z3r2SXV_;VJXO7;~QQ7oeMuJ3V7$bO5TmTFySJAGPW~2UEO7l*`#=nV3+%vUQ=PBc) zj{Y)6y1AX~nXt|FRnJizB2aw#awG7H4;jyH4VxgCAxM=x5z)DWd-59V6ZE zkhch3%OUrsFU0_GfSFugC zth)_kMVa!j{jGcHi2p3kQF=VqPs;o$4xAs42_TPP%EOpT174s2$W@4a#H{sW@z?Lz z2vJp+Lo51Z`M?AP$I+uERuGD`9MjnAkbh$I(l+KAKQCcqe?zbpCbECTk_*d$J)%wx z5qJ|O{4Wp^+H$#`h=cqKZ>NVm>BpQkEVaN^`kNmpy-^<%YTp#=HQu3TE$;Q_*Z1`E zd{Yk}3wYExA2bhpNOvDTn)~&Aq)AbhA9o9MJqY8(m%5jLL>m_7LT}@7m5viGa%vg` zVbEWF9W*cFvFY<=-)pn=asD0xdg&vd6LD7a)#7$lkIBqG>n6 z5#Za&-LW>%+i8B*6Qgn0K%k%Rx0l+GLkYJ_4_!*4w6YxsZfLE6+L9YSLU^GS`${$A z+ficalCsi<13>*~2WE$HG}u6>v-3Ld zx?=uR7sa%3ZikW!Bb%GHj#wS@l#Zz;5H4fDnuAQ8$pbExH+otFLmZ)`qm6>VhWvUR z%Y$n+t3S8~)DmKxR!VH+Zlidw#{8bE(wQBv0%7TBRSkC1l6r^r#XW#~;4^iFuL3c&T|Y^BPpyu+2k~43({>35k!#b^dJ8DKS4rlo&|#|i|=D}6ML>fqVisWMQj&I=#=4B zo}WF0Xpoz)&QhBiGfeK5p=5*nqq4uJY1~h2L>(}m(?yHN87Q^RCeb@oQVjAgRp}I! zBbE?L<~lF>GIDxoD`<@PBtAdn7A22+Q|X~b7IJa?K5KjmUx81v3*{i2g^1F0Q?8ZLEhdH8zOU+AsCd9=(vVtLf z&sA~4Jb+BuDlhZ0^MPMPW(FWs+>p8#1rfx?G6?!}@H7t6@la z1Fhj<$-5{$+E7=rfBg^s9^VI@W2y7^aDJj`S-9NuYzd%4_1p*vMHA0O8isc5(K0+_vh57RMm@;b~mG9Aty|sxJ0=`{9!$e#CfI z675aU&qe$R23PNjiS3Xkvv3C~ttWk1lE9J2 zG4=uE0wb)rAx01;Re-+jlg!JdJDRunjuO54=l}oz;}y!gA!h4VJcgoOq>6ajo|fsb zQp))+xCvp-v9+sv02wGL7#H*s_( zp0fFlI>D?*hI;CLj?9nz{=RQ|B&WkbbS>VlzP^|DXZ`Z3r}wj$K3l69dB=g5{C(HD zk&0?_Xt}VWS*PiTPal8#GkvyXaBbE68t=f=N0fK@e~t8*&&RBEEYU8ftFo z=wcp9ivUU&{bL(}^5->~4+IM<(=2{&wOoDIyD{%vOZadoZ+_#_a?(#g` zreB-$R_Hr2)Y0c_Awu#isCvoOvZ34I!Cge(j9+ceJ3D|h#8XPF zkdfri5JbN$5VQR5^yWl?nSSy(FGqtGtvm63lrAIyIs0FuH5ncq#41`=Tv`qX^pU z)vwQOlX^V>O=X>LyMnJ$r`KM#+_Hh!(hgI0I$@g2OS8ip{@NEIVF;xUDs`n})HFrw zj+nY64X=qcIxfND+h{t`Fz9pVec$g4TDo-MxIo<&<+^65vE4uNke^sq3Tcki-=UN} zT1S~@J3Ef+Xu^en(jcOrXD88+2i^8G`@5^gP{B@{7Kr8828#}1-XeKCfrRu;9tDi) zCTRifaZ33Acc+FW*TLh)dEBRiB(Z7S-T;p0Zor1bym3hVyy(t7C{*ZC1a9`{{QxZKRZ#w|#$|ICHhlt=o<>GdU$c zA9M2<)e&bqhit+)Y+4ojIY9OA``N6mz6^7k*(boTSN*7C_FPVrRej}7Pv=zc zJZXme{xwObR^o?`C+Ad>k*k(Jmo~4%c+yQOmJah*7 zXqsVtNL7Fxz|PANQOv=@taco9l6%G^Wj8rc8BHDEC9l$6+6IPv)l5b-e&N|V)$io8 zrWgNHX|LQ?xo`D#>8$9r3&yrpN~xEeMAnCCmLe3YTV0aWT`x{!F8 z|K(PgexYSVc}KWKy$bJ`!uWBX!0jbm75hrH4Y=RB)XMr2KG&iW9KIFj?963mO?lVc zaV|o4RS~hzsI<;R>$NPD^6QRRoPv1^3$#>anXS`y*gW0gIoUfoTY1=rW?rK2&ODnj ze17C(eWXuAF7wb1;UQh;c08NFNUHSxtAGo~ZM?0O9`yBd&Zf^Aboq%f7dB2j3Z9)uao*%#c@$tTGayDpp+1jZLeF&|*MT-V5$+#k=j9uIlq%I_!bC zGS2nw5VGEd)p8N%X#nY6*Zl*isKRF5ACMM}Dgpi4GHwUqbq^;j+kXBe#jMHXU~*dV zXw#AOJ?-!>tVHtAah^RA#7B@k#yfZp(?Xv3vh@*@`Rr;;c+2q+6PO zv(yzC$p`ToT3Y+~=|)rRfLyRiK-~2?J~jm1z*B&J!+i*($*Vd+Ey0)4ft=n2rNIDV z*iPw-c0bXrt)kH&i~(aneXrs|NAwhaTr^No)!4o2|IhW90+}lQ$h2syHanP0NjKI- z%{n~)n?ns${ng?zcLZ*JP~_aEhndi~5FRg+`I#-Va{?O8g^H(!SMTm}u?6S?fD6wX zm=8Ru2x<9{&U35P=}M-0mbXJ?Q^b~`eDa-U2u2IHq+fOI(rT&g*N&4A*{QZ2qZ@E>|m+LX7>4SC4=8q2%EtLb< z(z?ho@qC75sHC}m%EFN#+cPy7#pm6&a%^?L)2q!FxHg<#Xc2uTc_e!@Yzud=LzSqG zxqz*i)}*)KUZ>d7;f2c7Mpqy80;Vq|t<$a5Rq?Q*OzFmWYEPr{i}p?X^}W@<%sKK0+icouj9O zCk0)Q>#s-7F6vLS-akl^X)@ed4RY6By$q8yEKdAn5ETaMr@s{J&NLvCi(cY2G(Tr6 z0eL~C)`S9xQtR~`viQpbGzR{wf?1mi>p5f#POb_djss1`U>%7EJ5K)A3RG0cY5p!~ zbS?Gt4{~nwGO6As*}sGDW4=vR@r5c;f${5zIbPi68T}`@Kvi)k)yyR~yWqp@w74b2 zMk}gGt^#j3GKB!o&oh$r?A*5PtfkoVhJF9A)bufR zeT2a~zpW=9Z+WaVw^+^}-Bp5zM^5Bz5>KPmS&KXZb$oqw~Rwzi+Qr_tA|Z z7R90Ywq_G~MRPQ`5w`{9vh3OMPgf{n=L(4bB? zn+!->j+yfL5dF|md)uZoBX#a6u3@|O0yQ6tJ#_hYQ&kJ>2W{z(J72kV^@c-r{|X+_ zYCMt#FJs)Bpz<>e!C39Pp?9&QkP5+#a#^mo`qfX~l1)$w?_g7K2t5I+wW~?aYM`-o z&(Pn2wjM#-fr`^}?^?b|GLf=XJ(b+`utiTqb$wSMVm`@o#R6VKx|ROAv9+>S*oKzV zu-Km)=EvHy)nJoRc&Tv~j+EPB29RA&SvwcR@6{!xVi=n)#iCqSb@V<3PpdoY2mff_ zcO|&~&bOEKVmhD{yF+wX2m?S= zn=mL$1)FD0d6x5gjE=nfx>fnD*KLZQ)uV_+hRRK!Vqw=VF80XV9bwK& z#?R6p8B>o!!!zaUQ{@*W!&DF0A>zuE;QJtLDx>4bah#_!1Ve-Aa8vx~0bf1nbB`eO zO`Y~g|M}nkgA3dwjBfKX%u2kpd2MO7a?Q+1Ge;GPoX^aUw5$v33e&?!;`xLYk7>Tc zNnVy&@%09*BAZ(e8f(zWQ&diH)8+9tTa5iIEVX{fi9J7HTb>cvX@DrU0DUCDrh`y^ zzL)Ln>Gk=vJwKf-#J;@$q-KL(fBIGW((j@Wem-UBSxd?Hc~%>r>4Ti9I?3s z(lqrLiY4U%jr3DCD8Dwz{d|2D@qPBT<+@k9N`|gkSzM6tfCB3QV`4gD6c{~?%@&j< zwv%x>kZyH=OHog>&^th~nVg^@x)UmjwKX-mOd>qD725 zS2cJy7&6Y`o6lHuw&*kw*alh^jw;{FRwKL^D*O5vZRJSn$7Jj5rjl)kDe#=39g^ci z{ta4bgE1EnynWuV*1==S`_2xH4I-Y;=cpxgFCZ%8z%a5jUM>7)Wq*9<0@y7gylu1w5CBsf&HwCCpYYw zJbjs_ljK{^RCe?R^F^Z)$g zAHO{FFK0o-USH>5x7{}z<;JXK-Rj^LgN{}PyL5+1*;9L!ekuG+^|iN0||Xir@4yLYGaIsmFUyY6uzwj#SRRv9wcSmdl6QA&GYIDaah zwZSb@6yQl`utdr+IZ}R=MgxOMx@=y3|#nAsTMPZ5S-n_ zXz2;@0E}s@G`hKY(vv7CbC=;cwL*UIu5f~f%FFm*=UtqQc1!eRm4h3L&BhDT!4#dj zjgJT4J@gOw_3GC;jKKLK3A#d4Z^IPz2tMKYu&8>0!_j=PWdNUECbtoeXcH+^46}N} z`@M>7kR?553vQ}1;$kX|wn6idUZinP59l98ZLM><+F=ZuLh8fyY!u>=lhyOi^4U-f zSJic>Ug^=K0b_D?@r1e+%aal7Qq&~XQJK0&*H<+QryK0)D-8?5^Le0bmNSCqzmtz} zX7@>Z@hPz8%-Y)9w%6^sp8?jFKmGL6`R}K*0qYC&Kc9pfusJCz&Hm%0$Yzw%Zj^dm0S(HYyty*;VXMFT0PKtz7gh;VY@OS1kK9*@36<+bhBc*6V4|pU{f$Z;S!aMsjx#GNU1UPqD z29$js>q-p+andx!M$*An*xLFQ=6N{!y%RU|`jr&v380ES2a`msuZL z?9?P?)x69t3fBFVF-)vSyElF&Ax8k;k23SFi;oQ;;eFVG^Cz(MN%xgB1p=tZ<-Wd! zV~ux1Qjukz0I`BB)NpT1oH363K=JK!i>;jC+ z5*=eBUzRCKprwLZfmM2;^;McjZkC?DX~u>7o0Ko)S%Sy4MMStvPqAo{y&fOAOE#VF+#HT z8?D*-4n%u`CoNRSNSrypS=xT8Ami{z2=;!h__@a8Wr_fdBx0|hlfCOPmz|x2;Q(hJ zTy(07tS-_T(p6N=yXq~h0Pn-wfhTrknV$Okx}WIIFMs;^Pk;R5pZ@gIFTeixKb)-$ zzkK=iOW8ABJ4Po^)Wgs16X$VSXQ4fhls+sXKqPx~Y=8nGY<-KlaX9x1cg(Hd++*$i zxj*-^l~&ohys@+c2T&tq6MUSGq+eCUN1-7ZyjGlTC$ALn&a^8|$GVuQ^iWLt9T(ZF zS8g2*XyD5e0RdeXhMyt&Lrj*79}|5^Gt1*%Y*DinR9?@zy~N-A`t^Kl{PZP!w=#W4 z6h~_-v87@GVfP~hL2%Ul3@>zj0BB3E?W7nGSHD3A=pr7SjTK4pgRYQ9>CS(_12lUu z>EARb5TalSJ1!)hjE>`SfOj0xVZWmo26x^gQhmPK%c5OlN^40Rs- zU+yY+%}pxEqAqaIz54G}ducKFih;=c;r7NlxAPy=kk)2npj7kC^BmXwitIX~N5LJx zOv}udy?Se+yMNWA;13bw=zV3=0~+sNc5M0i(&15ACci`rK&;%OxveQ3RvNv1j)U!x zb3f zI^mz6e){DPfBO8#KYjW2*Dq?uR;t5P?anfL3NepGr*eoPf!zC~3P#?0vY%$!#Z*I$ zh7_?M%MX$tu)ntRL4Ds!u{RNVz}<^6j>_he?O^i?2=+$^xuQ!`ubO-;kpncKjY&bf zOueG!5qvF$ndpfjb?DE=q|0~J2Ecxxh9nH4o-49rWZWuP#x-DAnn;yxfOa!CUshk! zlJJXrQkHN7;6SIu#BH6$aFyldfL*8SDCfwIyEH}3l}f01Ho%t{J%aO~h=(QTxRTaJ zOLBD+nabe`Au*}k74~V1h2&PX{c+}9nGie(S+~wV=>)E#lPQi%Okt2eM(EPVu4G3u?hgLDDu7qk||eWKI|Z zOah;wod`ZwDKp`6WW5&%S<>hEw>}QyrJhebR|(f+VIj zuVE0H{3T!TmMCF-W2S?ZOi3Dvi*zrXFQyH>DW2Y}_;t}V5T#_0pTTMw^pAbOE|St{ zqS%f~5{2}^o_C$x(aBQt!VvI$j;O-~me*9;-plqXuhSJd#os@l-6zhrM1T6_^Ut5( zfBN~$Sx@inj+!Bxm^4{_Fs+KYrpMw2$abC|YfpU6RstHx>#6bP#QZNug%5(1PnC{p z{ra9?1A(`oG%_0ZMgBd zzX`B8XUAJm?dh2`+5+}uD?W@f-%~M{*w#2v@FW8?0|=-M@_2FC(j|{0%!G)~ek+^e z=>Z7?`o5{Yhn*QW&x{Pt1vR^gv%MHI(uECUjXV~Wq^6E*PSGQjOmM#$vuQj(D5tLC zuw05ru=v@uw3sOX_fmRX3Deq*MEBCGYQ_Uutmr`Ca{Q-hcl5<(FTd zC-sNTg-@N1gVfoQk_^zSiK@pB>ux0d+Gj9j2J3Q6lVkU_7zk}FZ`CPOYgBG~7Uynv z|9vvS6!B8!Ij2Y9r8ExKS(i%j3JZo8j2`qhxNYqz8tP@FvaQa&P|cozndlde^YuEPVMu-B%M zF!bS(x+1<~+r#=$cM?UZTs_ccP^4}{+{eu>6yH?2^S**h5#&O1?GZk27d>U)UV0iHpLC;piapH?I#VvLlCj%mWMbP? zXaasD42Y@uou>aKVSV%fK1y=P5~qnSHE@pR8OS3ND1BoABSTfs(X|vFq9?ilYIOwr zkxUd|673TaV=YCLr$wtyO)q9kAYSZPhv@sute^M18Kl_r^Y!{7uFw9y)#s*^c}Pdm zMQ0JeW%~G-L6cSm(sVA7ebhyF#(NTx0X?o{lS1hrbE1g~WYx4%hqLxQ7t;u^)ToI2 z=M;$*4=R}NT;#j7C|926;}TIf<4s*$fj7yszReys#J5RF!uvL&EQmKJ2OI^xd|WAB z;U1VFxw%ASdjT-n)T%9$ca{O|3id9&P-WkY&j`NI6=m3LN&UhwCJk3)w$an+Stgmb zr#rPKGo(7|(aqM8N8X3R{nkSe*wuu4F_?R-8A`hE3_wIG?yX#KEt%o~tHD>e%Qn*` zt!tbvv+)u76kYPHeQxvVnm=+8xU41?)Em7p=-heV%KozT$tPpBWEot05QJ%Hs@XGf zuI4U$(K88s0w6)BcaBn98*Q&sih*Mv0_2|hnE-oFz0jw^dkQEHg5nWn!JhfWY&at0 zloE~`7o4)gB2>M^c@o4BZs)vTO{Iku0#T>`^l$(EXmfsVGI<~VXa={l8&9n7WIOU5 zY*}iCS|0wCe-=k!_DfxB2cSQ@f!7DIdlF72qSPIKv`LcHP~f!ip(w)Vo_8lTBNJ7JK1Dg9EjOvcxmdi z=W~01<)=Q=i^7BWv9kY1!WBpCrt@%zRR3yo2xhn}^{i7{mho+sJ$}5o@0m|N9D(V$ z7X~5h(JSWT&fXVlctBu7#g4m9JC{z(-OKY`RV>J)!MsB|4GQVuYNv#hU7}XDlY>~n zC?8!FSe_Uq(>6{s)>IVWIj8lOq=*UYyjL?AO|sW|r6!^qYCjcL^}e>_>PYI|!j~53_#cn+{pU9l z`>QC%Y4Har)uvz%{r$<#(g4wB94&XEQ9PM8SA{)aiWHwOM`7J_A=}zF8!{}>MlE&U zO(T+y!wE$V))C8?NK`|X?du<5F@>VQ;R_5KSjShm> z0=_yy_TMzD52vgEo(8l-AoAP=Vh_a_j&712VhGwGkN!A#Hwc47<51RvK3z`&J)o(u z%Ha>ciG_O(61Nad;n!)yhqBlzUfEH|5ZtjL_iLy9mxt$`-|E`sL;DS(NY;E98Q|Hw zxLw2t9i8((j%WwgXR%!aH<6d@<^@W;vLL%_x$Op+$fh>pO+F&UJ-tdhIHWnnfeQwx z44{pdhxb9j9S$J3nQ~m85()dhwApga%(^h|Q1b7xueNTE`mSu(5m$tE_(fHg8+1f& z)xBk_8Z_H4*^72HoTD zBUYIdm?Fw!*t61hx#s2q2W=o-K&Rrh+JDoOFpNF*KFAh|m}rS$3iZX|T8e9D;_{*} zOk!Sk6>H08ejMvONB6!t1$&Tui_VB?7_6&xS!5+259dk?zSdxjmbx3hAn{jt$lP>V zEj3T0r2KtFBPHi@)g>)Ml2c(XTbuUzadlROT6@&Vu>!QFH!bt^67dgGK`P;(}j)1GH&+V9e5t4YL()Dq+^!V+^AAk45Z+~~z$AA3I4x~DLwK2FNp=jiWJK*#zG!r!QZPR719Rq53 zElrS&`H!yyX$XPj(0r!IpeI@VG6p*Ej9WLc!T~)5_K4Or(S3`J6VMur=5VBqQy(%z zexF?*1Z8)$3LtQ76l8TbcQnqOK`bzQphBd#@oJH0i@p(#iF%54QBRclF1#m5+lKF^ z)jhD*-vck{-9zHb!|~WM=75y72u%8U#EE*Dx4bJ;p**pV2~-g^ig7`bA=^2kEo%*xj0{9&K4xOkyeZfBw%us&n6`56t=J_WJzg>o5*q zpI_5|HW6{U^@ROC!P$jlW=y#6$%N{XT_>+u-{Bs6S{$>wHwMWI>)W8TtO0`)={#s- z9T#%ZY2Xy_2v(QCe|U2#{Q3Z2rG$u^H_&>#lP`o7jrvW88pg4uaA!dpIr~p44*}Ai zZEyo?hwVn|t_>AvZoR{+#pS%_CAg1`7dBkOOibU7?4-aQG6;{uhz@D>W43`F4D{ei zay5AI#UyI_4kYCSLW{MuD03&DZt)HYFiz3Q3+-;yr=DMk@F7kukIbQDt%CbwwiP9#v0dT46$GjLOoj7-+<>p+5D?dr&pflI3bLhtsGPwDfBSl z?uYh8i+ZT2E}ll*h8v!JCnD3B<@Ynn%fUxtgVQ;O)=pJidAK?ItU z0E0CwxscG<_U*8AvO9N{sy;DWAz3_#>v#TomWJ=Or2q17|4~eo@BH&vJeSd7XPsCS zVYH{~@paW$QHk^Qu=k;KzBHnblRr z0NQSH>COD_06mw*>|=cqo1QM3jOe>&yDuuOm)OFLE3v=_XE67d9e?_5(L}&w77yI6_7^>_~KtH$+^q#TUof>7~3+EldtU;q6- zzJosB`93_3(h7LiIEQ4}F*lP(uY{xkML@d0>lsyZQ(s~5cd*U#V?{^?3vp!=zVEkx zJ`s6<;fnJ4Y*v}z&btngJrg0;!26d&i`*N#U+hi@UIae^gp9#!Y;b{o#<&7xWLU#& zf>pyM_SrQVuY7g(90>Q=?X^T*i+g1+_}-Cl?Avohqe(yex|)c`yXwAxQu@l%PcbM( zljlL}dgkac--Ru4e^^d9#+IW!N;F4BY+rdjSsM8&@4Hy(n@LYk2lp~P%D9=sIpd4z zKKt|p0LgV1|8;6k?ndQ#aUHgik%C)7?7+z@S#j%W7u|jg%FI_nEe!jgR}LX2&+sK+ ze~x|V8*4Fy2TM_o>=IqJEv{B4U=m$C=2#9s!|2N%Yx`F7^^MZSuP!6odrx}r*Vu(< zKVNe%ZDr+$y@O#i*dx}n#i(2TEi~w78Cj( zeV2N_TcTSm>c@t`uZ@Soi$rJ^HUvrtS$T*zc&*@AKw3Ql^HJF2yXm>rqxI^99!6Ml%lu1Nq{dzwRFB2g~kEql5$u_a{16YtJ8*N_Sb^;N6JY zH`YUex25c~OLr;YyaI?Dy(Kc6T=7L4TeM@o+34FI$tT1DX4C+2jvMd~#zbD+&xRTi zG(4CiVbL0&Xx-Re17LH&A(%y|;DwSCq-9M^cs&n&Bq~dz zA-D$kx2||ho*2Ay=uGueKH33IEF2=rLM)3JC0KMte$S2%=D)%az??xX?@Xdw+cNre zx<}py39-6N5^4dj#RF{$Xyo|sW&irFeNBSwa;x`lfa3e#ANS40fH(%nxoU;qOzP5ibDH`ajkcvLV)0xT|DCk}8EtqMYIH`(9`I zK0^2M+M{-nNg%d{qSgWQC+gH~K+|%W=3d%!FO{-~yXVuizqT`^eZ7po5qg*rOF0GN zN_Z}jat=;Iv8;XY$l2|edFi@zO<9t`a+ zHLZA4skD^1R4Dzd>*KgGl&$s|Rjr~ew2Kyxt#^PTTg(KqrIT2n*6OysJXLmBZE9DlLo|BvH(JZp`_ORkj7RrFq6cm~OYtp-v#@Q* zCC|F=RgsHV-MxQtGC-52f!D{C%*BA8|M@H23I#@m3hCm-(ZH8@f6IGHJ>FUp07^Gx zChQKg>8gO-)aJy=$3|+byxt+93r|V97`gYUer`Ko-8V%ayBfgz`1Y77qZ434+ahAl zc}!N`Ois|y&EOCgkTXtb@eonoSD3uDVpYI)LyUcPCGS}M>pMqBCWxJepN1Nn0oJI8 z!YGqRo9VQ7K&*h6hyN9Y7hd@wu`UpCpi6k$KxDj?vf9=E##L$faO|4$6r@*+Ols{_ z4%FgW64z)W&RQ^%dR?vurn8%J3RxL&S*PCMKkA&*G{f!0e~jA~0U+uzCOzapZ#|o& z6~iaqse@)xLO5bwK))EtkeFPXnEm0PxTHNIhIS^x_Pk>llxu%&AiN|}OD(4y>Aq)T zh}WKd#pON81G;m6cpPrX-M(Hnq5;B_#)8gZo5X5GX-5Mq)(HsYszA`()%QuqxNp& z#VL}AO>AFLT#f>2RELp>cITfhzji|VUSymgU>K4OoXcHDR;QN2f1 z?#m=3mPEe=9VJd+_7XkTL@j7Toz*s%1v=2iFUAGV%lixK6YYA68)WBFYvy{t9`LxmdA*m$HXJEF!i6046j8Q!8NeR9l2FiV*Qe}$^%+tEL@#$FqSfb9kQcx6O1fc>~-7@$@&`}^Syl26wq zDwQ{hh>s5(dWVv*GO?Xp4G3?*_Wfta_4MYw__fuWe_)D#qj=RmQyjv8c?Z)vTDFHH zfA=R?zZQ<^fn+5{b8=B}0L5HgiX69#0b#wRB((JG1Gew48jWY&fdc57@2*H~-)Ngm zEAwh5SNc!?=I=)k#3|l~pV!K>L5eV8APw&$dh_0cA^=cUvq--qG4lRQP4+9wm+_?H z@{nA|^n^PWwNdn>IIuo>(0(}Xn3JygRhupGKJFFo z3e3FN*h$i4fy|a+;$PjO51ofjXpWV@mVQa`=?3s|%ti1*iiG=@LFvZj0-Sl(keRzd zlMJ@J8$IW$WxwU)Cn~?%t#d3T9o|YEeekvvb7E|QoMA;KL!|%r0C!t;qPs=W zLw8~D67Eas?oD$#F}){`sEp~I#ds?Rn~(+oyt*OtAZ>rIDD?Z#5%8M=)MXXf=KUH5 zx%L(#dU~_P;w>y=XX@YQ)!@We7F@9CcAtDZb)l&W+v&dRg0CrV4SMQIM>zYoo6->; z(B1vHR|6a1Y=`Tg**bvYun{v%2i!Zqe&I{TH@t)Y@t|YU9Mc;-k(uh01o3lCitp4&9s;~4_t5S+Dy-G%5mpxL>J=?F! zRsX8pALz?$Iz5QDdCHwFqT@pRA_qp4wZG63vsekD{Lpak?jmv!B9 zc6qc2Ln07X57#jB_3O-ED=(~q(sv}oik9dupks*6PhaE1-Wf;yKT^g%< zmU-joUD&XYWb3B0qF5FLZ59n>#bP6IBJ2eWho3)sSiD6&ZhQD2f=+H%TTIpfkRWzQ+ zpWflZz7Fpw$JIx_RbMhrj3a$`T313$t}$9{A-XNS#U$L6``*Vyxjy(NCB8*MD8McB7(#|G@n_mwC*g&-@hL~LkaW4MDHB(935i#sRkCcfB%kAB3rwD~m-cb%2 zZqJ4+{Xg`8To?v{+w~U3=pFh*4ohkC;wWi#|B_W@FRleQ--6gG+#l};Ih4#$TD+p{ z;qC1s+PmmolWw5LP0br=$_r-&=9C^ehFIrCDuc0SU*2eg)1p5;&J>XI9ay-zRHPt; z4bm>5`{s^?I47bty2wIN1(^=FZlIrnN37Kldy3*a8*fRhBJ0I6gNMl%bdFM<(cBs1;E@OrsbTk0}a`-%!PlUuH>b- zKWVQu&eQsMG*xJ8-5ehBc8|6$>+`jp|A|vt$}gXP<-#_jt|%h*JnnZ{@OaE!WM|=- zUOMc-A_R9wISAS_(XN>n{q+Gh#qJbu&bziNGa4)`p0|M`%r4^PW?2#kT)E^e=NGo# zG1uNHcGmg?O*I`;#t53Yq*QnHj6DB6xB3$0R~3$V*X(_%+^OiV>Z1W{RD0Y|Yu9${ zl{#UyJt$hTk#%r1Ec5hOm+4>)A0rI|5bE*Q!rYQu1cr?475aNQ$CY#<*(2?f1xY2EN`=EqBD!l(Oz$BrpPAJaL0`x!syLX zY+<8NjBY=j>Os37Z*PtG>1taGV`yYIz4NCHCv|-*0+t(k09MkUK`9YW2h0W91caqP z_wQaj!>36*Fh9QBVTaxV4R{~$W>aY?*Mgw~si)hK&?kda+Wjewdp%GGJ-DsKphWO1 z1I9-DkVu8Y*$hgq!6PqpDDdg<6-w3}!l^;~{A@moJ=yavCqtf5et*7<>W?Q}eHy7^ z(|JCB3X=W*oLK}-cTyrhqh|nLD^i0VcgK=raepVHr!`byK?X5)_l!U=5bu9ry$WWNUlcOB1+@{ ziJ&8*#fz0!hgE&bt2=R&I#}jYx!XKl|qFEH&j0p5ok3eWOrrWuZ2u?Zrqo z?NG*Vv7nOGy3c>BNI@DvInt0zDFNuRw+DHz)Eq79eaIT6GXrNn_37svW^Tbb_5e4H zRh>vw+2ZuBUYd3S`5Lty{z+#Js5)Rh<|2x-8mKMg_}PnllKpE@04ylGxFEZiD2AzvFq2kMF0_pa>SwOywe9g%+rFzF)st=CGsGyF zojspg6h@2W4{R@W#_5WP-|5odogkiTqCw9Xm%}Q;*QOMHY|mcZo!Rgb{e|IORBo-G zA&BLuKKM>|mc;G-RfNrS^aIkxim*VuxU~kk>bAWfN82{FE|veMfB76QJtY~5m)KD4 z^(yZ#6YlJ|@;zlt#6I&dz*iX^4v zGW40eM}0ij$wXedp`--6HCaGTr02H}GC=dg7}R5x(UR-)06mo>HfTu{#a{&mPLMwr zPFhDiAdZVdrsp0~&h%zO8?w6!b%+_cX3B{qlZ>vAYQoOYHXoG{#Feo`)T6Ri@!z|v z$hGR-87p=w2!W*1{*3SH22DHz@>I39UsS&1??>cy?|9)oAfgIaEnKCD+E9Bu5suz~ zOj32FIoU&C^1zi!Z+i_hzS?z~gBe7wGfHiHHl`}Obx(%{b>Nc6EM!0Nr-ThM6s?yw z9Q_35R9f4t*o%8hMs{~<**D>M?&1V}`6|cfd~m8z4BGI@uV21yTTcJsU&#aNpa$cn z#M_3;qu;N_y%gNH8yce0t5T{x9Fu_Z zmF=o-RMhKq>yjR8 zdV9=gwedUy!Hnb(L96cVF~Sf<^GKF-u%!WBdRj^Y9Tk4n+pe;C>mpqxbt*ak@vCje zNVl@Z0+!=2Ez1X*FYWnRzl{>5`EH&`b>L&R9a1bd>sa`BzA0VG{$AOO4e|r8YJEtb zio|8CkZNIR=%Zo3FS}s6L<@O|Mq^&Mj=>u~eDZGb)9arx+5ijn#_$BV(S*!7S?A~u z<=*vnqisFL=*}}7lO96?=o*Xy1ZVbWXeAdc6$ylb1$69JFRg_c3wVnm^sdDtzqWZ% zav<`C;mcHV#4uXO=X!3S1-5&z7)iDYd-BeW0V2cDZmPhd(aS!H`uodiaz4M$-g6FH z-d}(HJjch|%`Q%$jQ8wdSZ;6$$z7rS3xRT)xU;5^&bZ2VS}MN=#&r3&WjxZg`M0|g zBcI5g;AXxKR;eE094>a&MXAWel@{5~5mn8-tjYF9K^T=PMp$(VhXGGA3+O3SQ27V? zgjCuz>IC5IUPgr4GCD&*1W!!0=GdgvG~{g7&ukJ-SuV}LwHH@QRAuXH0AINi!!KPgHEV=3pHl&=BetTyMzMJ;yZhOr5GuH0Qm41;Di7&+!U{ zgELfvNuxwR!W*7&u)O&eof80U#fHfd-|TBrho@Sv98H$D>SJ=dZ8bx-RR_KmU^dNB-f5AC94&x`4m=&5!fo5mWhAAsNnjI6bkQ zHgm3VO~0vC3?tWc;;6g)FjZ05QG=;^3R&lkgq^$jc;(EB8emr$Ysh52u z<>9{R-cJ$CUbP?)kwLuM9ViV!sKKgci<=tDX%YZb16@j_uSTjRjxaz7t+p0mv`+C?}@ z{M@?U^ljZ>dOVL)zP>n%pY86R znt812zVDg`Y)UgkWV*ANfk^#V!bPH?LQne>m$3dZ1O}34IxT`L-y5Q=>x#*SDA*CG zb}UQ1yR_4lYv<==E5Hg5you5T61!0E@$*49DUn~-M>ExPSuWR4CR=NgzAp#yzhjDh zf4b-X+`rD8yzN;p#p6!(2#!^d9)ldqb)c5`wY?Va|DKjTFo6w=TRK$9MK1H>a2HEL zLY8P}9o-wOr*)KI2f-J)5?^Lj|Crs|9?Ozd>vvMaKK%AKzpHin`PW~YnyoL7^|7nRtou_vR)NrW z4m2BUnB;o~vG+x!$eS#Cu%SKtV2;*a^L6}7-n|qgK;GDU8H{;rS|tCSS$66>nxjx7?C;xrfFU0 z$3u+`^*jQa2y?8>lw0kXyb=U+(lo$<;_K4EJnU}R@n9|L8OW@R=~xyyNFx5M z>yMvK5GH;6@K}w*$UrW=@`I8n`)UPm08fG-`ofs-Kzq#upBn|MVP$0T)Q{yi03wJ!S7aX|2n)NP{#RRP zx+OWTBWZDy7ETIvkb>>e-(XBJ@|)|r zO+mv>Vlf^`~6wMtp44=hWPkCjyX;cWP^}|MUdZvQmbrSRQ z&6R&FX>L4*lnaRom`YG6G4V=wlqgyTIiHUV(EtDczmDTlTGTX)k@-ILsrD)rQgb_A z>w5P)yP70+hYtwQq158Nlji)6JPi@qlVJtvd(u_C*}m_wY1*KsjXiWvv1=GfcgZs; z2LEkDCU8&X659~ZItzu7TPfW(+xzRa92YNp~@xf_n=5qI;0rA(b^IoP= z3ljP>UZ3>CvcVli8E$BPbY3uUCkZ~ruOO`JUA|I~hkTW8Peot*%vd=^`q=qb3pb7= zmAw0G#N>02g$;zn?LwYRX%PE&*Q8pjxa?cpivC!Fe4-I)(%PZ51$+b}y->M&8OY@? zsH-YDVV4&`7T;q8n~-(gra*cuC7TdMzhJFZV2`W+#0$;yfqwWl|^>Dsri;M(is>Yh1qao zUW&z+t`88jl!j;Fg?04ID!I_a88Ld6j|0HqLB@p{vEl9ZbX;UCTqI3R=pDcoTPUG0 zgK-5zG$JziA@-smdRlmwnlon4wryIrmrms*{u2O;p(a%QG<)B3NY?WAT#{N^d1bhC zHjY%Xsc@`WrwuOtc^>ui`RnfYzx((9MmkYT5jo6+L~4iT64Z$4k3WAH6WY5Bd6FHa zR5s(gRsGVTWh&s^$Q!@E93YW z#zv8MS~IMjarMKEilWRA4cW=0fgi1`XU+S*E|Rzxc9Yd1)?TS(q>3b4r}l_Bnh~5G z61OeA)+|Bj@U^A)p6#O6V8Pc~;wPaXj`4gn$*KG&-bzdvkz=noV+NXX$L`lW z{YQXEIgB^IqSbyvPySw9YW@EGm+|k)nt%NGS{A_< zC7RQrURBiS@)o84{onp$%<0(y)u(<~7&(ggnf}%~tn-kO=G6xeMmz;k{E-0AS-SJn zDWzImN3!#+*T}JQF#G*qsBkV#T62?FV~1izshMXlF|^{wK}nnwsVC~W;PPy%c^5?` zotrK1xNT-@b{lz9fKy78)L*o()J;RKPMiX@W5%F1xMUxhk+QC0(+9;znJ-gG+xta) zC2JK@^3rj;q?`;g4B_6_Z2uWI?^a>QVjbeM-Ne3^Qp}uX8_;&_gzLJtcBhh1t2ad$ zRa#I+;4bI04Yc{5F{{bOxCOXKB10GMrA&p>n{4D%H5`K3LS)D~d6_W=$exeXwOLpF zHsiv(ZRDbuQF(C9EcSo#uqIy88_NdUI#0{nn&^D$nved9x;gM1ACCMaAA;swaRLkO zmp!e|q%Q*47Y}V(9hkS)d=~}8{TIlAmv;X3H)zJ%?K8=XVM&muse<83JRs04VJ4iU zCbGIrn;*%dLm6iJeUtwsD~?Ade;xadHDPKgQIb@cjZN0a#&QQF4MB)(uT!Xupw?W9 z$rjbbf$PALLf`sJiQ` zQ5LLB_7bdu5``PJt}CiT+BSoGvcfLeon+dhX24Z!Sw<|BBwoY5%^>b@aVUI#5T%?} zdv_^u67ah22BxgYDJOOGur!=q^SAZ(_om1w@b9g{36*ywL z214oJK`8%(FM+2Ps{Z6@#CxtoMOJ21I|ur|C`Xcq;A~zSRpbwVWCWu*yY!(sC&Wei zn{y54a$_{@%UkUs{Yo@CVg_981NKV#-t3n`NOONYq24s4g>@13c(`{ShE*|*gfB8+ zqe^ks1_O~uENznSzK_pw+K|hmB{Nz@F8fwewl%1Ye0{y_mtS9d{*5m9oUGsaqgG*K zj_JCX@${U7=iDtj(2TA7=y=1%YrnA~160#h$w!J2e@+!*5*~|v@w`615S9l{cJJB9 zrqWdqjpV*AZ^aicY3tp2-3)Kfg%^Fhbzz~y*-6bx$}C)dZR_VeyQaSNCQo7PUBgS- zwuPehF_Lq)y^6x8MWLS$!uW|SgkH)fU7=cvy$5SKx3XUQ_1e~N>vY>XJ?KPqMr~Nx z&ATN1v+`zBG7n{43%rljtPwZcu;5oyJhe1sJvt6)HLA<7ayWL`zxM9y|-qH4neQnaY5eWWFD?>82{#^8Nj7Z@XV)Gt!|t-&s=gsplyh zS5pe)Xn&Kw?HZB?^xke(twQW6+mTf`T$d6p5RRTSSgtx6UU?>gpqb&4vuNYk$r-I) zxr#t@x%VU!hMVIK6arx`)M^s8@!$tv$xr9981laUoUF~xSOM%fKd1M z_S(u-FRiH(N4jVr(Ai_p`QBgqaTre6t}9!nd`SyB1J&G| zc9$BjY&q=Pnq;j_G!Hi}gIlMcmJaBXA2{EnJ3OddqKJ*wC>jp9U_eVOLHqEUR#&Us za=s*0xvu&1Is8>hHX<%R;^;m_?Ve za%lB#V#*4$kk~dOnp0$yKM3MjA~6`aNSq#3pWI$50WPZ|cyIgRd!}c@Y6xlDR!1Uc zu(PB)jsDTrBx`kv9Q3?hRnjfKV_5pas4H|X7sD`<*{A|I4ueZ>7>r6$TfnSS;jnZ5 zUfU@B*}?|nbkR7iemfG0JY6IUi0*3lB%{x5dP@7aKHFBRYJ8EMea648`UC`dT^6`$ zXN_S&_0!Opecv0g3`5+<2Wq(kivAYiOZf71KlYBK%s!yt(D~{wlkD4B`Ga|N2j}P5bNTkAJnN z_k9IR!@DS_?(tc1HwOGjJTRU+sV!ggPjwQER#2tPQzyxXbFpaDI~AmEzW|bJEc4$| z{%!;Z-ir^y`3ED6li1<-g{TTR$w`o5q75Q$u<#SX7NojM&a`OOkZD*KU(EU+O?oLA zbIlhv8!nb9Ra9?jGDH#~BWzk%{tu**3C=`+VOU7DZrMyswty>y&b9EKQ3WcNs$IHW zUUXd)C^=&|c zh_0~^D0saaFkaV`K15L?V%WWz0G-p*Cci5B8`1{sGQgk|Bi0AK2wG{oxGRFN>b-ut9zM45P3u4j5SP8KBk@xhUy?Zec}<1>>MueK<~2d6!y*# zi#a=q+HXXGffOdlZQBT}2>LKcf^IPzaY4VJkTbbLkk1$$bv{;WUBykyw)BJ<0YZ~L z;u3ktKBKW|2 zFYLLhQ5+xzIsi*=gRKAF(kCJoC*X;kCGD?nw|1cv5LxA-lC9aKov*hni-bI|HFmhI`hUxm{q!Joi+kzYp5X z%z821=(nll&Fv_URjb(0<#76&e#b6#<|Tm5uY3w?p#$PK=yNg@6!g6V?XcbN^Qe}2tWatbC9)owbdE|YYdxOJ%=^JjfyEguB&3Or$xC`Va^!&Vj z4dz4tk%=dn1Mz-Dw}VWo(z9@4MRH>52ZBYVv`rc)iLB3l)y9uY*= z*K7a!`r6hlMbg46s98G<=z|9|hIc)7e}u(AY(q0|^f8vXq_I=jhbVPxD0#jE`^mi= z$;{_WXLuTApoY$q_Vwy@@LHfQR<92kdNX3lqs!$2P(DZ`i>L=EHrRWgXpDO+nOn4) zwZzG=h2*VD!8_E`6-92Fa$m#E*48ao0R5r+yzHg9`YotF9LK3^w+C}bt3KS}b6xCG zIp(G}lCwd%q@lh*iXJsGb*JJMBA3GiTEkVTvQi7;VB~QeuJWUnGQj*stJf7TS8c9p zYJ{;)KYQ6wJz2tqqh3MKj9od2^JW6I7NMLMYtxr8xh}FKWRPlWaw{_kai=$y21H=* z*9UG(L4AvM%jKx*^^2i7K`*i`ZRF*Tp=s4`H~VDbQ8E5mv$QbdhG9Ynj45&0T&hczOB1a-xPAKKYsk&G?tqP zR8QyXs3c_HyxdGIp*`^zS1UsT%7N#Fx+ap`%!u!w9$w)xJErSkHo$kioR{eFm2WIw zkr2f#;oY)_DuVzii`SDFB+-AOHv$!OBhr%aJ~$h*97SoxnvDHgKoS?J=b>xXwGhca zQU;X#sifYFp1Ho5EU=HOc@~TsKU$%NUs1HHm`b)YZPGJ?I=E1~yx5L`5s`(BwOxku z^|35@jaVvcSwkGg%d@A>lxMfqPRMG8fMEvuuKjZvMPru#CEMQ)1WF!zOU1RWa_ArmHz&GhnRU0+<(lAF{(-= zmn2}$a*+*x^sG{n4xL=PX(Z`(npCB>WnGR#z^Kd$CQp)$wvnjV;f`Y|;g1~c%&M;r z&5-p>2M%lXe8!>M-=%@Gs3o$zd-kL|-jiOy?!_~Dy^PHabV!y!`BQtTjL%x~kFTF8 zDc5Ftpq&)({`y+WcAUpr_LPd$pEnv92&Y$J_g$m1qzQ*UEoDvXZt>Pywo(d60im)Q zyhQexe9!T$Vpk~lgnl|%bMQ#M{+H~bzI2WQ+axUHSALkQ*m~05n=Z{lzUcZ`PR+>3 zpeiVZ6GqcGKW)kx)M<_TAq|E!N_LKC))o?swn53B(WZug$P5?km<3S~lxX&FF(JN(XPRG! z*jpo0;;rcbU$8q|C-OR!JI0*VsRs$ks8;A?S8$3EqZ!s{lst=C z%}q@h>uz^5S(aRf*}?+i2HJB9NE%l=Ftun_6~eVu2A#cG#lK76QE8TYf* z&v^vZ7)f0DlHJekvN_ax9&Z#dDChS*-m;KW1R6H!x^80OZ!rb^aa}C*e*OIOZ4Pp` zyj3lYA#TsZ0jCE)@IxVNZNr--4~b)dKcddfuzm06(p2w<%7F{(7aA9*u}J^ z*|t?OZL{+~OuBo4-?KdiaEjO&-OIH{PsjlZ=6kttDrl(kOk;8UQ{^7z9h?Uj1M%Y- zQ&PGGC?|JZyjr%Do7~A%v03un%wZ{awp>D9y1i_M+gWzjFotGs(WLM$+@Z+Qqv!vYax~=)Wuh*7p zHQJ~V58*A%ihLKfh{4pYOp1i)@hcCGp?E7cgEm3O>qzUwzbw*sV%{g%zENWlOmeX4K7#?VU6*|`V!h;H_5o& zr=jC2i-jTk5SfvY!qW_4k@feu&s>as2qu013g+B#^Zw>rYAHp|U{_*iI51d9Dl9X^ zu5y;avQ3XES+-kJ{V-b0h>!p!%}1Yful|p!+FMP(((sq}*X#Y-_SYJd3awX@on-sA zaI#tgQN9a?oAeItvRAUo^jTM4M{<>}jU79S8HXZjcd}0@0#u@p4uVoIP48VubkYta zKU;?T8o(20qr>A3Ux`|~NKTvyer+x^98YANa3n>xVk5j~@QHa=E@ zfIe}^df$lFXQ|ZSccIjC93KfJS|H})2U6HC@7LFHT)#dar~}t&^Jc;3mg7CJh}XV% z6#!c9l?Ij|n<3^{zPcF6kaYTeNQ9__q%zt-eskJsz#mDXaiLy+e4?8-+fh;+o&XzqSD5r>OC5MioRF_mR_TSy7c1`GO1i~Xia z08{i_#h8v(qF{c=Q+E#LFXZ18HavtLZ^7Il-L3UMdc0793?`cvI$Tg1(TfteL$^T? zbDo3M`i$%X!)$rNMtNr?Zz<`=)T&;3q`JgsO-3$!Ez3u_@!+{Or=s&h-$QEGIO*R9 zE!C_-ajHYC(H_jQ zmbh>EIMP+3HvIiGe1?Q zPdd}f9Ut3|uMKziJSrrkl5(0lPGdmlr9EdZbPcj~{`K{5mgSbp`s?q30kXp5ZKZ+% zo}W6E*_#8HnPYx-?u=6a#CtsF8XmPTuU zV%3a0wSpscgIX@`i_%A|CaUeA(Ts-aP95mfJb=pJ&Mds)llKN_Fk9(UBjKdKW{8CH zX0lC7C`;m|dWhK{%x}!Yk$Wao(RhL>5A#-~=cZg05thLp?Ke>QNdoc;AXzlmIYSeR z%XUe2{`20oiY=6`on>T0#!My|Iy{**%-?T%uiYzLDAbYEuw8(?Al)>R4ti=}Q78^< z9br7$B7YzC`(vTO|Np*zefmLZunBm3?MQS)lUG0DV~?6!Z+(mRk!D+D9d1Gw-9g;R zwHeu27l*U%0)vOezjHgUgfBfs4zZv%16feILjhm`Q+N~Q% z=c93TkV$gs7A`q5ecZ0prVTl?j?D1o7T1yBi%THISc>&9+Jau?vyJ-6N>{q8UP^c! zF&R@HYPq;1?Q@|2Q*%41V2B$8Ma>lsH$*U%m0U|YI(#dMBVh)dTS4vTWg z2C%B0g27T9jxI@qpNLothfOfP1@#1TQ{@Fv4)Z}!Z+)QSjy;2s$0b;8rRJk69*S4n zwlXN$Wy#Z7{v4mHVq4pkC9cCXr+#ZrZq?bl+pbrPnm)$tO@3cR+X#2s}6cTh${))mPh*}hdR96$lVHvW2b5Ii9 zC+XzDtFr9U7uBFBg7K;)vxcthM{G{lq?q#EZ;o>Q-Sg)^#(7C}gvE8VI8yWqH$xCp zo~qlU?;}$O5X;G&ZXSAIOaic5|69UR*DydBs^7C(QPw1o>$TU1;IiaTQ4eAi1{%g7p;E45tX*F0ifUP-rR!ntu|z zL6=_3N|XeSqnsAE zt$W!@NLw7vMFCZCEuP<0`usEgDnA&{faB5L-qCvCUDU4{`mL0z!h!KpN}j@v@ez|2 zG=Qf$nXVE{?iifl7l6|c?%*3z(6Ee0PEe#foTh=&dI7eljL?eYS)f#>`!XPvysa$= z%1Awgg25m*M2aJkW;f``S6F;W{gQq&oTS5aH#QQ~lavGxWjk9^`@*33KF^3iniHJ! zb_Fy~`b_><6-DsTAfvWG6QWX3>S$Vi^}g~_B({`*mFdr;{%}&is@K$!Y3HdPU}(!c z%5bnnw7xT4JMlYA`Y1&X;3dWBTGsBD9h3!i{zMcV#k%H zh2;)RRwE{xc?F>oW0axmmDj-5YOl+D726OJ?az3bZW3X+u{*ETa;T8NwsjR7HI%#= z)kq|u&*u|DqMcq??gtCK_P*jp$`u;z=uj_RoV@N9wpQx#+$wq4=+UA9H_-<6XhO?& z&LQ@z@%`(s1whI$wxib9b**bI3n?UD%t|@kQ={&x$MU(QQZQa2-(aKhr#vG8zJWJK z`HmSdr=g1t)YFv5PTFQoa4DzVW^vyQ;Msrv@yGk?Yu(@b{5I8@qZgD_f6VGJWb+^iNK)=34bV#F}UKA%Iuy4mJ86RZZ`|?>8 zrd}5Wt-gd&pn4qN!bIfgk&D#c-owCCcs5jK%4Xg1PN+6lNo&zDA*XN|1JNwIU7_|T zVpUw&ILeyBD%Qoe#*#8jIy(R!Jqk z-`m&M&rQ4c`~6aGI4yx-v&!{xpK8swaM}4(n5N@UW=^BTG)OmL;!qU5(`R;d~{CC;smQ!L7nxJ=FpU~%!ses z`~6j3`|I`N?@#;o&Sj%bBcLSr`-C<9k0GUL?PLL;9ULeQusa+}(!HZa);;3nh^}U~ zrY+3;!^|~2pl3G$*#TE&g1$YQwWU$aG&h4jcl*ZQMm-MPINGzs+lu72C?|5*3M#VW z$$F^rxZI-%!@2nl^w(5io}6A|U?yP{Mue5`Gs&NDlx$s!8N=EmdKrnT)-QXc0ghh{ z)A1Dc!7^(~c`XUgP2e(>nz5v@gNRn9&xu$ASLtq7GjB>Dk{flAxKfNf=Ca7au>HK` zwbK5fl<&{+Yi;*oE3IyOvC3cR4D7G{`}>2KfOg&yAY2!zclGnOY?Zt`vs33Cn&h1p zyh<~vOB2GpsEldJHA&);q?}#%A`vEB#hfjOv==z3&7pA@>Qdu^2)|ZT(t9TvD%Z!l z>>KNKlWQI(Y&n)Q&y?J>y7F8{{ALRBjB1;qf1*i+;MsGcNhi~6{jh(pHtC`c+-ec z&nQvy)*8aY7u`Gm`g~hB33e{rf&CW2S!xoK-ebn+^Sy1`c_m{cC%d*{oS9N94uD;T z^T=TmBUZU3i3+4;-SW%Ed*8l{GxK^yGV0rFef`*s(o5kX5DafV%S7^3Vzey;4P!0g zth@LL=stPIHu}CLEk4#T<%n9jUXH7u3A6QZvIg?S0JIX^t|RAIlGzbkXlOh|F3qk) z=c{fXSjlcf;hO0e4HOz@gU+N>od=G`g-9*~L?Fgx2L%^Yw|Vv)>%#->LVFbh6Qk;; z5S+gxPDrSQ`N5sUJplqViIj<42vX+Rl1~>(^1)W!<&myYTY3g zm`66p9XYJE<^lUCMzkwRFK{>L_ZzcpM@dOQgSB(W23YQ;nY7bq-HY`)I*xXk9)3OB zQLja)EHX|c=;i6xn2g6uJEt{g@ht3^o4R=u=vi{IYqc{J-)@G*q_`yENdZ$al(v#mco;HCOGfEMY-Lr?v;FJu z|JLnQa?#YU5`!`=Q9D&m@MEX|cMjSB4?qn1op_(8z2N6Vggg8uaNZP}u>$$6->J-z z`!WtQv1d69tnkHQjR)yOx@JmWj)PrVXJ9uDTsV|AH-5a^#2Iey0T^YT6iQ;waHqV% tI_DhjUbZk&=n_7Z9ZVVLM3IbA{7;g!m}-Mr@ZbOd002ovPDHLkV1m&FZI}Q6 diff --git a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py index 98994f28a..421ff2258 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parser_router.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parser_router.py @@ -1,24 +1,45 @@ import pytest +from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.parsers.base import TextDocumentParser +from ragbits.document_search.ingestion.parsers.base import ImageDocumentParser, TextDocumentParser from ragbits.document_search.ingestion.parsers.exceptions import ParserNotFoundError from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter +from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser -async def test_parser_router() -> None: +def test_parser_router_from_config() -> None: + config = { + "txt": ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.parsers.base:TextDocumentParser"} + ), + "png": ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.parsers.base:ImageDocumentParser"} + ), + "pdf": ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser"} + ), + } + router = DocumentParserRouter.from_config(config) + + assert isinstance(router._parsers[DocumentType.TXT], TextDocumentParser) + assert isinstance(router._parsers[DocumentType.PNG], ImageDocumentParser) + assert isinstance(router._parsers[DocumentType.PDF], UnstructuredDocumentParser) + + +def test_parser_router_get() -> None: parser = TextDocumentParser() parser_router = DocumentParserRouter({DocumentType.TXT: parser}) assert parser_router.get(DocumentType.TXT) is parser -async def test_parser_router_raises_when_no_parser_found() -> None: +def test_parser_router_get_raises_when_no_parser_found() -> None: parser_router = DocumentParserRouter() parser_router._parsers = {DocumentType.TXT: TextDocumentParser()} - with pytest.raises(ParserNotFoundError) as err: + with pytest.raises(ParserNotFoundError) as exc: parser_router.get(DocumentType.PDF) - assert err.value.message == f"No parser found for the document type {DocumentType.PDF}" - assert err.value.document_type == DocumentType.PDF + assert exc.value.message == f"No parser found for the document type {DocumentType.PDF}" + assert exc.value.document_type == DocumentType.PDF diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index 5e65194f1..18fc2a45c 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -1,8 +1,11 @@ +from pathlib import Path + import pytest from ragbits.core.utils.config_handling import ObjectContructionConfig -from ragbits.document_search.documents.document import DocumentType -from ragbits.document_search.ingestion.parsers.base import DocumentParser, TextDocumentParser +from ragbits.document_search.documents.document import DocumentMeta, DocumentType +from ragbits.document_search.documents.element import ImageElement, TextElement +from ragbits.document_search.ingestion.parsers.base import DocumentParser, ImageDocumentParser, TextDocumentParser from ragbits.document_search.ingestion.parsers.exceptions import ParserDocumentNotSupportedError from ragbits.document_search.ingestion.parsers.unstructured import UnstructuredDocumentParser @@ -17,15 +20,71 @@ def test_parser_validates_supported_document_types_fails() -> None: UnstructuredDocumentParser.validate_document_type(DocumentType.UNKNOWN) -def test_subclass_from_config() -> None: - config = ObjectContructionConfig.model_validate( - {"type": "ragbits.document_search.ingestion.parsers:TextDocumentParser"} - ) +@pytest.mark.parametrize( + ("parser_type", "expected_parser"), + [ + ("ragbits.document_search.ingestion.parsers.base:TextDocumentParser", TextDocumentParser), + ("ragbits.document_search.ingestion.parsers.base:ImageDocumentParser", ImageDocumentParser), + ( + "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser", + UnstructuredDocumentParser, + ), + ("TextDocumentParser", TextDocumentParser), + ("ImageDocumentParser", ImageDocumentParser), + ], +) +def test_parser_subclass_from_config(parser_type: str, expected_parser: type[DocumentParser]) -> None: + config = ObjectContructionConfig.model_validate({"type": parser_type}) parser = DocumentParser.subclass_from_config(config) - assert isinstance(parser, TextDocumentParser) + assert isinstance(parser, expected_parser) -def test_subclass_from_config_default_path() -> None: - config = ObjectContructionConfig.model_validate({"type": "TextDocumentParser"}) - parser = DocumentParser.subclass_from_config(config) - assert isinstance(parser, TextDocumentParser) + +async def test_text_parser_call() -> None: + document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md") + document = await document_meta.fetch() + enricher = TextDocumentParser() + + elements = await enricher.parse(document) + + assert len(elements) == 1 + assert isinstance(elements[0], TextElement) + assert elements[0].content == "# Ragbits\n\nRepository for internal experiment with our upcoming LLM framework.\n" + + +async def test_image_parser_call() -> None: + document_meta = DocumentMeta.from_local_path( + Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png" + ) + document = await document_meta.fetch() + parser = ImageDocumentParser() + + elements = await parser.parse(document) + + assert len(elements) == 1 + assert isinstance(elements[0], ImageElement) + assert elements[0].image_bytes == document.local_path.read_bytes() + assert elements[0].description is None + assert elements[0].ocr_extracted_text is None + + +@pytest.mark.parametrize( + "parser_type", + [ + ImageDocumentParser, + TextDocumentParser, + ], +) +async def test_parser_call_fail(parser_type: type[DocumentParser]) -> None: + document_meta = DocumentMeta.from_local_path( + Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf" + ) + document = await document_meta.fetch() + parser = parser_type() + + with pytest.raises(ParserDocumentNotSupportedError) as exc: + await parser.parse(document) + + assert exc.value.message == f"Document type {DocumentType.PDF} is not supported by the {parser_type.__name__}" + assert exc.value.document_type == DocumentType.PDF + assert exc.value.parser_name == parser_type.__name__ diff --git a/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py b/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py index 200e4e279..cde9795e3 100644 --- a/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py @@ -1,25 +1,41 @@ import pytest +from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.element import ImageElement, TextElement from ragbits.document_search.ingestion.enrichers.exceptions import EnricherNotFoundError from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter -async def test_enricher_router() -> None: +def test_enricher_router_from_config() -> None: + config = { + "TextElement": ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"} + ), + "ImageElement": ObjectContructionConfig.model_validate( + {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"} + ), + } + router = ElementEnricherRouter.from_config(config) + + assert isinstance(router._enrichers[TextElement], ImageElementEnricher) + assert isinstance(router._enrichers[ImageElement], ImageElementEnricher) + + +async def test_enricher_router_get() -> None: enricher = ImageElementEnricher() enricher_router = ElementEnricherRouter({ImageElement: enricher}) assert enricher_router.get(ImageElement) is enricher -async def test_enricher_router_raises_when_no_enricher_found() -> None: +async def test_enricher_router_get_raises_when_no_enricher_found() -> None: enricher = ImageElementEnricher() enricher_router = ElementEnricherRouter() enricher_router._enrichers = {ImageElement: enricher} - with pytest.raises(EnricherNotFoundError) as err: + with pytest.raises(EnricherNotFoundError) as exc: enricher_router.get(TextElement) - assert err.value.message == f"No enricher found for the element type {TextElement}" - assert err.value.element_type == TextElement + assert exc.value.message == f"No enricher found for the element type {TextElement}" + assert exc.value.element_type == TextElement diff --git a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py index a4f36425a..183e0ef2c 100644 --- a/packages/ragbits-document-search/tests/unit/test_element_enrichers.py +++ b/packages/ragbits-document-search/tests/unit/test_element_enrichers.py @@ -3,33 +3,14 @@ import pytest from ragbits.core.llms.litellm import LiteLLM, LiteLLMOptions +from ragbits.core.utils.config_handling import ObjectContructionConfig from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.documents.element import Element, ImageElement +from ragbits.document_search.documents.element import Element, ImageElement, TextElement +from ragbits.document_search.ingestion.enrichers.base import ElementEnricher from ragbits.document_search.ingestion.enrichers.exceptions import EnricherElementNotSupportedError from ragbits.document_search.ingestion.enrichers.image import ImageDescriberPrompt, ImageElementEnricher -@pytest.fixture -def llm() -> LiteLLM: - default_options = LiteLLMOptions(mock_response='{"description": "response"}') - return LiteLLM(model_name="gpt-4o", default_options=default_options) - - -@pytest.fixture -def image_bytes() -> bytes: - with open(Path(__file__).parent.parent / "test.png", "rb") as f: - return f.read() - - -@pytest.fixture -def image_element(image_bytes: bytes) -> ImageElement: - return ImageElement( - document_meta=DocumentMeta.create_text_document_from_literal(""), - image_bytes=image_bytes, - ocr_extracted_text="ocr text", - ) - - def test_enricher_validates_supported_element_types_passes() -> None: ImageElementEnricher.validate_element_type(ImageElement) @@ -42,27 +23,75 @@ class CustomElement(Element): ImageElementEnricher.validate_element_type(CustomElement) # type: ignore -async def test_process(llm: LiteLLM, image_element: ImageElement): +@pytest.mark.parametrize( + ("enricher_type", "expected_enricher"), + [ + ("ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher", ImageElementEnricher), + ("ImageElementEnricher", ImageElementEnricher), + ], +) +def test_enricher_subclass_from_config(enricher_type: str, expected_enricher: type[ImageElementEnricher]) -> None: + config = ObjectContructionConfig.model_validate( + { + "type": enricher_type, + "config": { + "llm": { + "type": "LiteLLM", + "prompt": "ragbits.document_search.ingestion.enrichers.image:ImageDescriberPrompt", + }, + }, + } + ) + enricher = ElementEnricher.subclass_from_config(config) # type: ignore + + assert isinstance(enricher, expected_enricher) + assert isinstance(enricher._llm, LiteLLM) + assert enricher._prompt == ImageDescriberPrompt + + +async def test_image_enricher_call() -> None: + default_options = LiteLLMOptions(mock_response='{"description": "response"}') + llm = LiteLLM( + model_name="gpt-4o", + default_options=default_options, + ) + document_meta = DocumentMeta.from_local_path( + Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png" + ) + document = await document_meta.fetch() + element = ImageElement( + document_meta=document_meta, + image_bytes=document.local_path.read_bytes(), + ocr_extracted_text="ocr text", + ) enricher = ImageElementEnricher(llm=llm) - results = await enricher.enrich([image_element]) - assert len(results) == 1 - assert isinstance(results[0], ImageElement) - assert results[0].description == "response" - assert results[0].image_bytes == image_element.image_bytes - assert results[0].ocr_extracted_text == image_element.ocr_extracted_text + enriched_elements = await enricher.enrich([element]) + assert len(enriched_elements) == 1 + assert isinstance(enriched_elements[0], ImageElement) + assert enriched_elements[0].description == "response" + assert enriched_elements[0].image_bytes == element.image_bytes + assert enriched_elements[0].ocr_extracted_text == element.ocr_extracted_text -def test_from_config(): - config = { - "llm": { - "type": "LiteLLM", - "prompt": "ragbits.document_search.ingestion.enrichers.image:ImageDescriberPrompt", - } - } - enricher = ImageElementEnricher.from_config(config) +async def test_image_enricher_call_fail() -> None: + default_options = LiteLLMOptions(mock_response='{"description": "response"}') + llm = LiteLLM( + model_name="gpt-4o", + default_options=default_options, + ) + document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md") + document = await document_meta.fetch() + element = TextElement( + document_meta=document_meta, + content=document.local_path.read_text(), + ) + enricher = ImageElementEnricher(llm=llm) - assert isinstance(enricher, ImageElementEnricher) - assert isinstance(enricher._llm, LiteLLM) - assert enricher._prompt == ImageDescriberPrompt + with pytest.raises(EnricherElementNotSupportedError) as exc: + await enricher.enrich([element]) # type: ignore + + assert exc.value.message == f"Element type {TextElement} is not supported by the {ImageElementEnricher.__name__}" + assert exc.value.element_type == TextElement + assert exc.value.enricher_name == ImageElementEnricher.__name__ diff --git a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py index 99b2bc5fb..67bf820dd 100644 --- a/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py +++ b/packages/ragbits-document-search/tests/unit/test_ingest_strategies.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from ragbits.core.embeddings.noop import NoopEmbedder @@ -25,18 +27,14 @@ def ingest_strategy_fixture(request: pytest.FixtureRequest) -> IngestStrategy: return request.param -@pytest.fixture(name="documents") -def documents_fixture() -> list[DocumentMeta]: - return [ +async def test_ingest_strategy_call(ingest_strategy: IngestStrategy) -> None: + documents = [ DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), DocumentMeta.create_text_document_from_literal("Name of Peppa's mother is Mummy Pig"), DocumentMeta.create_text_document_from_literal("Name of Peppa's father is Daddy Pig"), DocumentMeta.create_text_document_from_literal("Name of Peppa's grandfather is Grandpa Pig"), DocumentMeta.create_text_document_from_literal("Name of Peppa's grandmother is Granny Pig"), ] - - -async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: list[DocumentMeta]) -> None: vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) parser_router = DocumentParserRouter({DocumentType.TXT: TextDocumentParser()}) enricher_router = ElementEnricherRouter() @@ -48,5 +46,29 @@ async def test_ingest_strategy_call(ingest_strategy: IngestStrategy, documents: enricher_router=enricher_router, ) - assert len(results.successful) == len(documents) + assert len(results.successful) == 5 assert len(results.failed) == 0 + + +async def test_ingest_strategy_call_fail(ingest_strategy: IngestStrategy) -> None: + documents = [ + DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), + DocumentMeta.create_text_document_from_literal("Name of Peppa's mother is Mummy Pig"), + DocumentMeta.create_text_document_from_literal("Name of Peppa's father is Daddy Pig"), + DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png"), + DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf"), + ] + vector_store = InMemoryVectorStore(embedder=NoopEmbedder()) + parser_router = DocumentParserRouter() + parser_router._parsers = {DocumentType.TXT: TextDocumentParser()} + enricher_router = ElementEnricherRouter() + + results = await ingest_strategy( + documents=documents, + vector_store=vector_store, + parser_router=parser_router, + enricher_router=enricher_router, + ) + + assert len(results.successful) == 3 + assert len(results.failed) == 2 From b90d5d470a2a529c18b72ee7831c29534291eb1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 24 Mar 2025 00:14:45 +0100 Subject: [PATCH 27/31] fix tests --- .../ragbits-document-search/tests/unit/test_document_parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ragbits-document-search/tests/unit/test_document_parsers.py b/packages/ragbits-document-search/tests/unit/test_document_parsers.py index 18fc2a45c..35f78631e 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_parsers.py +++ b/packages/ragbits-document-search/tests/unit/test_document_parsers.py @@ -85,6 +85,6 @@ async def test_parser_call_fail(parser_type: type[DocumentParser]) -> None: with pytest.raises(ParserDocumentNotSupportedError) as exc: await parser.parse(document) - assert exc.value.message == f"Document type {DocumentType.PDF} is not supported by the {parser_type.__name__}" + assert exc.value.message == f"Document type {DocumentType.PDF.value} is not supported by the {parser_type.__name__}" assert exc.value.document_type == DocumentType.PDF assert exc.value.parser_name == parser_type.__name__ From 8520cfa885ebce4f261b67887fc1bbe6ada990cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 24 Mar 2025 16:38:08 +0100 Subject: [PATCH 28/31] fix typos --- .../src/ragbits/document_search/ingestion/enrichers/base.py | 5 ++++- .../src/ragbits/document_search/ingestion/enrichers/image.py | 5 ++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py index 1698589a0..67b73cafa 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py @@ -13,6 +13,9 @@ class ElementEnricher(Generic[ElementT], WithConstructionConfig, ABC): """ Base class for element enrichers, responsible for providing additional information about elements. + + Enrichers operate on raw elements and are used to fill in missing fields that could not be filled in during parsing. + They usually deal with summarizing text or describing images. """ default_module: ClassVar[ModuleType | None] = enrichers @@ -36,7 +39,7 @@ async def enrich(self, elements: list[ElementT]) -> list[ElementT]: @classmethod def validate_element_type(cls, element_type: type[Element]) -> None: """ - Check if the enricher supports the enricher type. + Check if the enricher supports the element type. Args: element_type: The element type to validate against the enricher. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index ec14d3064..99e9b538b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -55,7 +55,7 @@ def __init__( async def enrich(self, elements: list[ImageElement]) -> list[ImageElement]: """ - Enrich image elements with additinal description of the image. + Enrich image elements with additional description of the image. Args: elements: The elements to be enriched. @@ -70,8 +70,7 @@ async def enrich(self, elements: list[ImageElement]) -> list[ImageElement]: responses: list[ImageDescriberOutput] = [] for element in elements: self.validate_element_type(type(element)) - input_data = self._prompt.input_type(image=element.image_bytes) # type: ignore - prompt = self._prompt(input_data) + prompt = self._prompt(ImageDescriberInput(image=element.image_bytes)) responses.append(await self._llm.generate(prompt)) return [ From 1a8ad31dc14f8951f1d8a86b0198169e2801fd2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Mon, 24 Mar 2025 17:32:03 +0100 Subject: [PATCH 29/31] use super() in from_config --- .../ragbits/document_search/ingestion/enrichers/image.py | 6 +++--- .../ragbits/document_search/ingestion/enrichers/router.py | 2 +- .../src/ragbits/document_search/ingestion/parsers/router.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py index 99e9b538b..d02fe4cc7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py @@ -98,8 +98,8 @@ def from_config(cls, config: dict) -> "ImageElementEnricher": ValidationError: If the configuration doesn't follow the expected format. InvalidConfigError: If llm or prompt can't be found or are not the correct type. """ - llm: LLM | None = ( + config["llm"] = ( LLM.subclass_from_config(ObjectContructionConfig.model_validate(config["llm"])) if "llm" in config else None ) - prompt = import_by_path(config["prompt"]) if "prompt" in config else None - return cls(llm=llm, prompt=prompt) + config["prompt"] = import_by_path(config["prompt"]) if "prompt" in config else None + return super().from_config(config) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index fb5c4c6b2..96de28cfd 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -66,7 +66,7 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: import_by_path(element_type, element): ElementEnricher.subclass_from_config(enricher_config) for element_type, enricher_config in config.items() } - return cls(enrichers=enrichers) + return super().from_config({"enrichers": enrichers}) def get(self, element_type: type[Element]) -> ElementEnricher: """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 4554b5e1e..68a5b3c15 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -71,7 +71,7 @@ def from_config(cls, config: dict[str, ObjectContructionConfig]) -> Self: DocumentType(document_type): DocumentParser.subclass_from_config(parser_config) for document_type, parser_config in config.items() } - return cls(parsers=parsers) + return super().from_config({"parsers": parsers}) def get(self, document_type: DocumentType) -> DocumentParser: """ From b08d4598e2c74446cd0effb2ef9816d6080f8899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 25 Mar 2025 13:05:59 +0100 Subject: [PATCH 30/31] remove reduce from parser and enricher errors --- .../document_search/ingestion/enrichers/exceptions.py | 8 -------- .../document_search/ingestion/parsers/exceptions.py | 8 -------- 2 files changed, 16 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py index d72c462ea..4f3691809 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py @@ -1,6 +1,4 @@ -import inspect -from typing_extensions import Self from ragbits.document_search.documents.element import Element @@ -14,12 +12,6 @@ def __init__(self, message: str) -> None: super().__init__(message) self.message = message - def __reduce__(self) -> tuple[type[Self], tuple]: - return self.__class__, tuple( - self.__getattribute__(param_name) - for param_name in list(inspect.signature(self.__class__.__init__).parameters)[1:] - ) - class EnricherNotFoundError(EnricherError): """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py index 4c626cb74..c1bbc71ff 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py @@ -1,6 +1,4 @@ -import inspect -from typing_extensions import Self from ragbits.document_search.documents.document import DocumentType @@ -14,12 +12,6 @@ def __init__(self, message: str) -> None: super().__init__(message) self.message = message - def __reduce__(self) -> tuple[type[Self], tuple]: - return self.__class__, tuple( - self.__getattribute__(param_name) - for param_name in list(inspect.signature(self.__class__.__init__).parameters)[1:] - ) - class ParserNotFoundError(ParserError): """ From 75a78100a8db03fd7df51af1a2e265e7ec16be93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pstr=C4=85g?= Date: Tue, 25 Mar 2025 13:23:37 +0100 Subject: [PATCH 31/31] rename config keys --- docs/how-to/document_search/search_documents.md | 2 +- examples/document-search/configurable.py | 2 +- .../advanced/config/experiments/chunking-1000.yaml | 2 +- .../advanced/config/experiments/chunking-250.yaml | 2 +- .../advanced/config/experiments/chunking-500.yaml | 2 +- .../advanced/config/pipeline/document_search.yaml | 2 +- .../config/pipeline/document_search_optimization.yaml | 2 +- .../pipeline/{parsers => parser_router}/unstructured.yaml | 0 .../unstructured_optimization.yaml | 0 examples/evaluation/document-search/basic/evaluate.py | 2 +- examples/evaluation/document-search/basic/optimize.py | 2 +- .../src/ragbits/document_search/_main.py | 8 ++++---- .../document_search/ingestion/enrichers/exceptions.py | 2 -- .../ragbits/document_search/ingestion/enrichers/router.py | 2 +- .../document_search/ingestion/parsers/exceptions.py | 2 -- .../ragbits/document_search/ingestion/parsers/router.py | 2 +- 16 files changed, 15 insertions(+), 19 deletions(-) rename examples/evaluation/document-search/advanced/config/pipeline/{parsers => parser_router}/unstructured.yaml (100%) rename examples/evaluation/document-search/advanced/config/pipeline/{parsers => parser_router}/unstructured_optimization.yaml (100%) diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 4453f4db4..2460e6dec 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -112,7 +112,7 @@ There is an additional functionality of [`DocumentSearch`][ragbits.document_sear config = { "vector_store": {...}, "reranker": {...}, - "parsers": {...}, + "parser_router": {...}, "rephraser": {...}, } diff --git a/examples/document-search/configurable.py b/examples/document-search/configurable.py index 3f7881937..0c2782228 100644 --- a/examples/document-search/configurable.py +++ b/examples/document-search/configurable.py @@ -90,7 +90,7 @@ class to rephrase the query. }, }, }, - "parsers": {"txt": {"type": "TextDocumentParser"}}, + "parser_router": {"txt": {"type": "TextDocumentParser"}}, "rephraser": { "type": "LLMQueryRephraser", "config": { diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml index 82fe67a09..f8f9f0625 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - parsers: + parser_router: txt: config: chunking_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml index 7276e2593..dc948c432 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - parsers: + parser_router: txt: config: chunking_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml index 2902f3ce5..1c4cab3b3 100644 --- a/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml +++ b/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml @@ -5,7 +5,7 @@ task: pipeline: config: - parsers: + parser_router: txt: config: chunking_kwargs: diff --git a/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml index 4180c8440..093e61170 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml @@ -2,7 +2,7 @@ defaults: - vector_store@config.vector_store: chroma - rephraser@config.rephraser: noop - reranker@config.reranker: noop - - parsers@config.parsers: unstructured + - parser_router@config.parser_router: unstructured - source@config.source: hf - _self_ diff --git a/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml index 2e690c0f2..167235133 100644 --- a/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml +++ b/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml @@ -2,7 +2,7 @@ defaults: - vector_store@config.vector_store: chroma_optimization - rephraser@config.rephraser: noop - reranker@config.reranker: noop - - parsers@config.parsers: unstructured_optimization + - parser_router@config.parser_router: unstructured_optimization - source@config.source: hf - _self_ diff --git a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured.yaml similarity index 100% rename from examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured.yaml diff --git a/examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured_optimization.yaml similarity index 100% rename from examples/evaluation/document-search/advanced/config/pipeline/parsers/unstructured_optimization.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured_optimization.yaml diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py index f90bf397d..338eeb57f 100644 --- a/examples/evaluation/document-search/basic/evaluate.py +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -51,7 +51,7 @@ "batch_size": 10, }, }, - "parsers": { + "parser_router": { "txt": { "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser", }, diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py index 9108a303e..634f6f566 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/optimize.py @@ -53,7 +53,7 @@ }, }, }, - "parsers": { + "parser_router": { "txt": { "type": "ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser", }, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 1668459c0..1a19b59f6 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -53,8 +53,8 @@ class DocumentSearchConfig(BaseModel): rephraser: ObjectContructionConfig = ObjectContructionConfig(type="NoopQueryRephraser") reranker: ObjectContructionConfig = ObjectContructionConfig(type="NoopReranker") ingest_strategy: ObjectContructionConfig = ObjectContructionConfig(type="SequentialIngestStrategy") - parsers: dict[str, ObjectContructionConfig] = {} - enrichers: dict[str, ObjectContructionConfig] = {} + parser_router: dict[str, ObjectContructionConfig] = {} + enricher_router: dict[str, ObjectContructionConfig] = {} class DocumentSearch(WithConstructionConfig): @@ -119,8 +119,8 @@ def from_config(cls, config: dict) -> Self: vector_store: VectorStore = VectorStore.subclass_from_config(model.vector_store) ingest_strategy = IngestStrategy.subclass_from_config(model.ingest_strategy) - parser_router = DocumentParserRouter.from_config(model.parsers) - enricher_router = ElementEnricherRouter.from_config(model.enrichers) + parser_router = DocumentParserRouter.from_config(model.parser_router) + enricher_router = ElementEnricherRouter.from_config(model.enricher_router) return cls( vector_store=vector_store, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py index 4f3691809..fd381cfe1 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py @@ -1,5 +1,3 @@ - - from ragbits.document_search.documents.element import Element diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py index 96de28cfd..f478a27c8 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py @@ -20,7 +20,7 @@ class ElementEnricherRouter(WithConstructionConfig): The class responsible for routing the element to the correct enricher based on the element type. """ - configuration_key: ClassVar[str] = "enrichers" + configuration_key: ClassVar[str] = "enricher_router" _enrichers: Mapping[type[Element], ElementEnricher] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py index c1bbc71ff..db29d7b8a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py @@ -1,5 +1,3 @@ - - from ragbits.document_search.documents.document import DocumentType diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py index 68a5b3c15..af08bdc77 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/router.py @@ -40,7 +40,7 @@ class DocumentParserRouter(WithConstructionConfig): The class responsible for routing the document to the correct parser based on the document type. """ - configuration_key: ClassVar[str] = "parsers" + configuration_key: ClassVar[str] = "parser_router" _parsers: Mapping[DocumentType, DocumentParser]