From 735811b90d8314a8487b2ab6b63562abc921745d Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Fri, 14 Mar 2025 14:23:14 +0100 Subject: [PATCH 1/6] import from config --- packages/ragbits-core/src/ragbits/core/config.py | 16 ++++++++++++++++ .../documents/sources/__init__.py | 3 +++ .../tests/unit/test_document_search.py | 6 +++--- pyproject.toml | 3 +++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/config.py b/packages/ragbits-core/src/ragbits/core/config.py index c7b85af0a..4b1ece95f 100644 --- a/packages/ragbits-core/src/ragbits/core/config.py +++ b/packages/ragbits-core/src/ragbits/core/config.py @@ -5,6 +5,7 @@ from ragbits.core.llms.base import LLMType from ragbits.core.utils._pyproject import get_config_from_yaml, get_config_instance +from ragbits.core.utils.config_handling import import_by_path class CoreConfig(BaseModel): @@ -31,6 +32,8 @@ class CoreConfig(BaseModel): # Path to a YAML file with preferred configuration of varius Ragbits objects component_preference_config_path: Path | None = None + modules_to_import: dict[str, list[str]] = {} + @cached_property def preferred_instances_config(self) -> dict: """ @@ -46,3 +49,16 @@ def preferred_instances_config(self) -> dict: core_config = get_config_instance(CoreConfig, subproject="core") + + +def import_modules_from_config(config_key: str, config: CoreConfig = core_config) -> None: + """ + A function that imports all modules specified in config instance for given key + Args: + config_key: str configuration key + config: CoreConfig instance of configuration + """ + paths_to_import = config.modules_to_import.get(config_key) + if paths_to_import: + for path in paths_to_import: + import_by_path(path) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources/__init__.py index 85c808a7d..8a1674828 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources/__init__.py @@ -5,6 +5,7 @@ from ragbits.document_search.documents.sources.http import HttpSource from ragbits.document_search.documents.sources.local import LocalFileSource from ragbits.document_search.documents.sources.s3 import S3Source +from ragbits.core.config import import_modules_from_config __all__ = [ "AzureBlobStorageSource", @@ -15,3 +16,5 @@ "S3Source", "Source", ] + +import_modules_from_config("sources") diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index b84b285c4..e147e54c2 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -254,9 +254,9 @@ async def test_document_search_ingest_from_uri_with_wildcard( results = await document_search.search(search_query) # Check that we have the expected number of results - assert len(results) == len( - expected_contents - ), f"Expected {len(expected_contents)} result(s) but got {len(results)}" + assert len(results) == len(expected_contents), ( + f"Expected {len(expected_contents)} result(s) but got {len(results)}" + ) # Verify each result is a TextElement assert all(isinstance(result, TextElement) for result in results) diff --git a/pyproject.toml b/pyproject.toml index f7996ee6f..583163e7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -191,3 +191,6 @@ known-third-party = [ "requests", "scipy", "setuptools", "shapely", "skimage", "sklearn", "streamlit", "torch", "torchvision", "tqdm", "typer" ] + + + From 7be34cae7b91980b8f62c600913ea74c8625b553 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Fri, 14 Mar 2025 14:25:34 +0100 Subject: [PATCH 2/6] revert test document search --- .../tests/unit/test_document_search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index e147e54c2..b84b285c4 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -254,9 +254,9 @@ async def test_document_search_ingest_from_uri_with_wildcard( results = await document_search.search(search_query) # Check that we have the expected number of results - assert len(results) == len(expected_contents), ( - f"Expected {len(expected_contents)} result(s) but got {len(results)}" - ) + assert len(results) == len( + expected_contents + ), f"Expected {len(expected_contents)} result(s) but got {len(results)}" # Verify each result is a TextElement assert all(isinstance(result, TextElement) for result in results) From bc59cc967a5fd121d419b660193108bf1d432ab4 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Fri, 14 Mar 2025 15:35:48 +0100 Subject: [PATCH 3/6] remove blank lines --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 583163e7f..f7996ee6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -191,6 +191,3 @@ known-third-party = [ "requests", "scipy", "setuptools", "shapely", "skimage", "sklearn", "streamlit", "torch", "torchvision", "tqdm", "typer" ] - - - From 225d5bf95247f257f965e9c7cb97dbf81ed8b87d Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Mon, 17 Mar 2025 09:10:09 +0100 Subject: [PATCH 4/6] update changelog --- packages/ragbits-core/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/ragbits-core/CHANGELOG.md b/packages/ragbits-core/CHANGELOG.md index ccb4ad35f..4f196a36a 100644 --- a/packages/ragbits-core/CHANGELOG.md +++ b/packages/ragbits-core/CHANGELOG.md @@ -1,6 +1,10 @@ # CHANGELOG ## Unreleased +- configure imported modules (#343) +- improve cli trace handler +- added traceable to some method +- Add support for images in few shot prompts (#155) - Better handling of cases when text and image embeddings are mixed in VectorStore ## 0.10.0 (2025-03-17) From f0ebef4ec67b0e201c9d260b9b62d1a96bbc82a4 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Mon, 17 Mar 2025 10:05:36 +0100 Subject: [PATCH 5/6] update changelog --- packages/ragbits-document-search/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/ragbits-document-search/CHANGELOG.md b/packages/ragbits-document-search/CHANGELOG.md index 8c8744c46..feee3004b 100644 --- a/packages/ragbits-document-search/CHANGELOG.md +++ b/packages/ragbits-document-search/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Configured source classes are auto-imported (#343) ## 0.10.0 (2025-03-17) ### Changed From c33a8b4c1f1ae23c33085e8a550b7d9d963f39a0 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Mon, 17 Mar 2025 15:29:59 +0100 Subject: [PATCH 6/6] commit --- docs/how-to/document_search/search_documents.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/how-to/document_search/search_documents.md b/docs/how-to/document_search/search_documents.md index 3c7d5de5f..00f8ae17e 100644 --- a/docs/how-to/document_search/search_documents.md +++ b/docs/how-to/document_search/search_documents.md @@ -64,6 +64,16 @@ class CustomSource(Source): pass ``` +To register protocol for your custom source class please update `pyproject.toml` within your project root with the following lines: + + +```toml +[tool.ragbits.core] +modules_to_import = {sources=["python.path.to.custom_source:CustomSource"]} +``` + +You can specify any number of custom source classes in that list - they would be imported and registered whenever you import `ragbits.document_search.documents.sources.base:SourceResolver` + ## Processing, embedding and storing Having the documents loaded we can proceed with the pipeline. The next step covers the processing, embedding and storing. Embedders and Vector Stores have their own sections in the documentation, here we will focus on the processing.