Skip to content

Pipeline state dump and load #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 37 additions & 13 deletions src/neo4j_graphrag/experimental/pipeline/stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,23 @@ async def get_result_for_component(self, run_id: str, task_name: str) -> Any:
return await self.get(self.get_key(run_id, task_name))

@abc.abstractmethod
def dump(self) -> dict[str, Any]:
"""Dump the store state to a serializable dictionary.
def dump(self, run_id: str) -> dict[str, Any]:
"""Dump the store state for a specific run_id to a serializable dictionary.

Args:
run_id (str): The run_id to dump data for

Returns:
dict[str, Any]: A serializable dictionary containing the store state
dict[str, Any]: A serializable dictionary containing the store state for the run_id
"""
pass

@abc.abstractmethod
def load(self, state: dict[str, Any]) -> None:
"""Load the store state from a serializable dictionary.
def load(self, run_id: str, state: dict[str, Any]) -> None:
"""Load the store state for a specific run_id from a serializable dictionary.

Args:
run_id (str): The run_id to load data for
state (dict[str, Any]): A serializable dictionary containing the store state
"""
pass
Expand Down Expand Up @@ -134,18 +138,38 @@ def all(self) -> dict[str, Any]:
def empty(self) -> None:
self._data = {}

def dump(self) -> dict[str, Any]:
"""Dump the store state to a serializable dictionary.
def dump(self, run_id: str) -> dict[str, Any]:
"""Dump the store state for a specific run_id to a serializable dictionary.

Args:
run_id (str): The run_id to dump data for

Returns:
dict[str, Any]: A serializable dictionary containing the store state
dict[str, Any]: A serializable dictionary containing the store state for the run_id
"""
return self._data.copy()

def load(self, state: dict[str, Any]) -> None:
"""Load the store state from a serializable dictionary.
# filter data by run_id prefix
run_id_prefix = f"{run_id}:"
filtered_data = {
key: value
for key, value in self._data.items()
if key.startswith(run_id_prefix)
}
return filtered_data

def load(self, run_id: str, state: dict[str, Any]) -> None:
"""Load the store state for a specific run_id from a serializable dictionary.

Args:
run_id (str): The run_id to load data for
state (dict[str, Any]): A serializable dictionary containing the store state
"""
self._data = state.copy()
# clear existing data for this run_id first
run_id_prefix = f"{run_id}:"
keys_to_remove = [
key for key in self._data.keys() if key.startswith(run_id_prefix)
]
for key in keys_to_remove:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here we are removing all results from a previous run with this run_id, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes!

del self._data[key]

# load the new state data
self._data.update(state)