From f98a447bba6ecae2e806de788f67fe9a2c61dd69 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Mon, 24 Feb 2025 07:37:06 +0100 Subject: [PATCH] improve tutorial --- .../src/sparql_llm/sparql_info_loader.py | 2 +- tutorial/app.py | 191 +++----- tutorial/chain.py | 149 ------ tutorial/graph.py | 255 ++++++++++ tutorial/index.py | 58 ++- tutorial/public/style.css | 8 +- tutorial/pyproject.toml | 4 +- tutorial/slides/index.html | 10 +- tutorial/slides/public/slides.md | 457 ++++++++++++------ tutorial/slides/public/sparql_workflow.png | Bin 0 -> 12748 bytes tutorial/uv.lock | 24 +- 11 files changed, 701 insertions(+), 457 deletions(-) delete mode 100644 tutorial/chain.py create mode 100644 tutorial/graph.py create mode 100644 tutorial/slides/public/sparql_workflow.png diff --git a/packages/sparql-llm/src/sparql_llm/sparql_info_loader.py b/packages/sparql-llm/src/sparql_llm/sparql_info_loader.py index 467ceb9..9c03a33 100644 --- a/packages/sparql-llm/src/sparql_llm/sparql_info_loader.py +++ b/packages/sparql-llm/src/sparql_llm/sparql_info_loader.py @@ -21,7 +21,7 @@ def load(self) -> list[Document]: """Load and return documents from the SPARQL endpoint.""" docs: list[Document] = [] - resources_summary_question = "Which resources are available through this system?" + resources_summary_question = "Which resources do you support?" metadata = { "question": resources_summary_question, "answer": f"This system helps to access the following SPARQL endpoints {self.org_label}:\n- " diff --git a/tutorial/app.py b/tutorial/app.py index 0dc8266..0155385 100644 --- a/tutorial/app.py +++ b/tutorial/app.py @@ -1,16 +1,7 @@ -from typing import Literal -from langchain_qdrant import QdrantVectorStore -from langchain_community.embeddings import FastEmbedEmbeddings -from langchain_core.documents import Document from langchain_core.language_models import BaseChatModel -from langchain_core.prompts import ChatPromptTemplate -from langgraph.graph import StateGraph -from langgraph.graph.message import MessagesState -from qdrant_client.models import FieldCondition, Filter, MatchValue +from qdrant_client.models import FieldCondition, Filter, MatchValue, ScoredPoint import chainlit as cl -# https://docs.chainlit.io/integrations/langchain - def load_chat_model(model: str) -> BaseChatModel: provider, model_name = model.split("/", maxsplit=1) @@ -20,6 +11,7 @@ def load_chat_model(model: str) -> BaseChatModel: return ChatGroq( model_name=model_name, temperature=0, + ) if provider == "openai": # https://python.langchain.com/docs/integrations/chat/openai/ @@ -38,49 +30,37 @@ def load_chat_model(model: str) -> BaseChatModel: raise ValueError(f"Unknown provider: {provider}") # llm = load_chat_model("groq/llama-3.3-70b-versatile") -llm = load_chat_model("openai/gpt-4o-mini") - +# llm = load_chat_model("openai/gpt-4o-mini") +llm = load_chat_model("ollama/mistral") -vectordb = QdrantVectorStore.from_existing_collection( - # path="data/qdrant", - host="localhost", - prefer_grpc=True, - collection_name="sparql-docs", - embedding=FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5"), -) -retriever = vectordb.as_retriever() - -class AgentState(MessagesState): - """State of the agent available inside each node.""" - relevant_docs: str - passed_validation: bool - try_count: int +from index import vectordb, embedding_model, collection_name retrieved_docs_count = 3 -async def retrieve_docs(state: AgentState) -> dict[str, str]: +async def retrieve_docs(question: str) -> str: """Retrieve documents relevant to the user's question.""" - last_msg = state["messages"][-1] - retriever = vectordb.as_retriever() - retrieved_docs = retriever.invoke( - last_msg.content, - k=retrieved_docs_count, - filter=Filter( + question_embeddings = next(iter(embedding_model.embed([question]))) + retrieved_docs = vectordb.search( + collection_name=collection_name, + query_vector=question_embeddings, + limit=retrieved_docs_count, + query_filter=Filter( must=[ FieldCondition( - key="metadata.doc_type", + key="doc_type", match=MatchValue(value="SPARQL endpoints query examples"), ) ] - ) + ), ) - retrieved_docs += retriever.invoke( - last_msg.content, - k=retrieved_docs_count, - filter=Filter( + retrieved_docs += vectordb.search( + collection_name=collection_name, + query_vector=question_embeddings, + limit=retrieved_docs_count, + query_filter=Filter( must_not=[ FieldCondition( - key="metadata.doc_type", + key="doc_type", match=MatchValue(value="SPARQL endpoints query examples"), ) ] @@ -89,18 +69,18 @@ async def retrieve_docs(state: AgentState) -> dict[str, str]: relevant_docs = f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_docs)}\n" async with cl.Step(name=f"{len(retrieved_docs)} relevant documents πŸ“šοΈ") as step: step.output = relevant_docs - return {"relevant_docs": relevant_docs} + return relevant_docs -def _format_doc(doc: Document) -> str: +def _format_doc(doc: ScoredPoint) -> str: """Format a question/answer document to be provided as context to the model.""" doc_lang = ( - "sparql" if "query" in doc.metadata.get("doc_type", "") - else "shex" if "schema" in doc.metadata.get("doc_type", "") + "sparql" if "query" in doc.payload.get("doc_type", "") + else "shex" if "schema" in doc.payload.get("doc_type", "") else "" ) - return f"\n{doc.page_content} ({doc.metadata.get('endpoint_url', '')}):\n\n```{doc_lang}\n{doc.metadata.get('answer')}\n```\n" - # # Default formatting + return f"\n{doc.payload['question']} ({doc.payload.get('endpoint_url', '')}):\n\n```{doc_lang}\n{doc.payload.get('answer')}\n```\n" + # # Generic formatting: # meta = "".join(f" {k}={v!r}" for k, v in doc.metadata.items()) # if meta: # meta = f" {meta}" @@ -115,21 +95,6 @@ def _format_doc(doc: Document) -> str: Here is a list of documents (reference questions and query answers, classes schema) relevant to the user question that will help you answer the user question accurately: {relevant_docs} """ -prompt_template = ChatPromptTemplate.from_messages([ - ("system", SYSTEM_PROMPT), - ("placeholder", "{messages}"), -]) - -def call_model(state: AgentState): - """Call the model with the retrieved documents as context.""" - prompt_with_context = prompt_template.invoke({ - "messages": state["messages"], - "relevant_docs": state['relevant_docs'], - }) - response = llm.invoke(prompt_with_context) - # # Fix id of response to use the same as the rest of the messages - # response.id = state["messages"][-1].id - return {"messages": [response]} import logging @@ -144,101 +109,73 @@ def call_model(state: AgentState): from sparql_llm import validate_sparql_in_msg -async def validate_output(state: AgentState) -> dict[str, bool | list[tuple[str, str]] | int]: - """Node to validate the output of a LLM call, e.g. SPARQL queries generated.""" - recall_messages = [] - validation_outputs = validate_sparql_in_msg(state["messages"][-1].content, prefixes_map, endpoints_void_dict) +async def validate_output(last_msg: str) -> str | None: + """Validate the output of a LLM call, e.g. SPARQL queries generated.""" + validation_outputs = validate_sparql_in_msg(last_msg, prefixes_map, endpoints_void_dict) for validation_output in validation_outputs: if validation_output["fixed_query"]: async with cl.Step(name="missing prefixes correction βœ…") as step: step.output = f"Missing prefixes added to the generated query:\n```sparql\n{validation_output['fixed_query']}\n```" if validation_output["errors"]: - # errors_str = "- " + "\n- ".join(validation_output["errors"]) recall_msg = f"""Fix the SPARQL query helping yourself with the error message and context from previous messages in a way that it is a fully valid query.\n ### Error messages:\n- {'\n- '.join(validation_output['errors'])}\n -### Erroneous SPARQL query\n```sparql\n{validation_output['original_query']}\n```""" - # print(error_str, flush=True) +### Erroneous SPARQL query\n```sparql\n{validation_output.get('fixed_query', validation_output['original_query'])}\n```""" async with cl.Step(name=f"SPARQL query validation, got {len(validation_output['errors'])} errors to fix 🐞") as step: step.output = recall_msg - # Add a new message to ask the model to fix the error - recall_messages.append(("human", recall_msg)) - return { - "messages": recall_messages, - "try_count": state.get("try_count", 0) + 1, - "passed_validation": not recall_messages, - } - - - - -max_try_fix_sparql = 3 -def route_model_output(state: AgentState) -> Literal["__end__", "call_model"]: - """Determine the next node based on the model's output.""" - if state["try_count"] > max_try_fix_sparql: - return "__end__" - - # # Check for tool calls first - # if isinstance(last_msg, AIMessage) and state["messages"][-1].tool_calls: - # return "tools" - - # If validation failed, we need to call the model again - if not state["passed_validation"]: - return "call_model" - return "__end__" + return recall_msg -# Define the LangGraph graph -builder = StateGraph(AgentState) - -builder.add_node(retrieve_docs) -builder.add_node(call_model) -builder.add_node(validate_output) - -builder.add_edge("__start__", "retrieve_docs") -builder.add_edge("retrieve_docs", "call_model") -builder.add_edge("call_model", "validate_output") -# Add a conditional edge to determine the next step after `validate_output` -builder.add_conditional_edges("validate_output", route_model_output) - -graph = builder.compile() +# Setup chainlit web UI +import chainlit as cl +max_try_count = 3 -# Setup chainlit web UI @cl.on_message async def on_message(msg: cl.Message): - # config = {"configurable": {"thread_id": cl.context.session.id}} - # cb = cl.LangchainCallbackHandler() - print(cl.chat_context.to_openai()) - answer = cl.Message(content="") - async for msg, metadata in graph.astream( - # {"messages": [HumanMessage(content=msg.content)]}, - # {"messages": [("human", msg.content)]}, - {"messages": cl.chat_context.to_openai()}, - stream_mode="messages", - # config=RunnableConfig(callbacks=[cb], **config), - ): - if not msg.response_metadata: - # and msg.content and not isinstance(msg, HumanMessage) and metadata["langgraph_node"] == "call_model" - # print(msg, metadata) - await answer.stream_token(msg.content) + """Main function to handle when user send a message to the assistant.""" + relevant_docs = await retrieve_docs(msg.content) + messages = [ + ("system", SYSTEM_PROMPT.format(relevant_docs=relevant_docs)), + *cl.chat_context.to_openai(), + ] + # # NOTE: to fix issue with ollama only considering the last message: + # messages = [ + # ("human", SYSTEM_PROMPT.format(relevant_docs=relevant_docs) + f"\n\nHere is the user question:\n{msg.content}"), + # ] + + for _i in range(max_try_count): + answer = cl.Message(content="") + for resp in llm.stream(messages): + await answer.stream_token(resp.content) + if resp.usage_metadata: + print(resp.usage_metadata) + await answer.send() + + validation_msg = await validate_output(answer.content) + if validation_msg is None: + break else: - await answer.send() - answer = cl.Message(content="") + messages.append(("human", validation_msg)) + @cl.set_starters async def set_starters(): return [ + cl.Starter( + label="Supported resources", + message="Which resources do you support?", + # icon="/public/idea.svg", + ), cl.Starter( label="Rat orthologs", message="What are the rat orthologs of human TP53?", - # icon="/public/idea.svg", ), cl.Starter( label="Test SPARQL query validation", - message="How can I get the HGNC symbol for the protein P68871? (modify your answer to use `rdfs:label` instead of `rdfs:comment`, and add the type `up:Resource` to ?hgnc, and purposefully forget 2 prefixes declarations, it is for a test)", + message="How can I get the HGNC symbol for the protein P68871? (modify your answer to use `rdfs:label` instead of `rdfs:comment`, and add the type `up:Resource` to ?hgnc, and forget all prefixes declarations, it is for a test)", ), ] diff --git a/tutorial/chain.py b/tutorial/chain.py deleted file mode 100644 index 027dbe0..0000000 --- a/tutorial/chain.py +++ /dev/null @@ -1,149 +0,0 @@ -from langchain_core.language_models import BaseChatModel - -# question = "What are the rat orthologs of human TP53?" - -def load_chat_model(model: str) -> BaseChatModel: - provider, model_name = model.split("/", maxsplit=1) - if provider == "groq": - # https://python.langchain.com/docs/integrations/chat/groq/ - from langchain_groq import ChatGroq - return ChatGroq(model_name=model_name, temperature=0) - if provider == "openai": - # https://python.langchain.com/docs/integrations/chat/openai/ - from langchain_openai import ChatOpenAI - return ChatOpenAI(model_name=model_name, temperature=0) - if provider == "ollama": - # https://python.langchain.com/docs/integrations/chat/ollama/ - from langchain_ollama import ChatOllama - return ChatOllama(model=model_name, temperature=0) - raise ValueError(f"Unknown provider: {provider}") - -llm = load_chat_model("groq/llama-3.3-70b-versatile") -# llm = load_chat_model("openai/gpt-4o-mini") -# llm = load_chat_model("ollama/mistral") - -from langchain_qdrant import QdrantVectorStore -from langchain_community.embeddings import FastEmbedEmbeddings - -vectordb = QdrantVectorStore.from_existing_collection( - host="localhost", - prefer_grpc=True, - # path="data/qdrant", - collection_name="sparql-docs", - embedding=FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5"), -) - -retriever = vectordb.as_retriever() -docs_retrieved_count = 5 - -# retrieved_docs = retriever.invoke(question, k=docs_retrieved_count) - -from qdrant_client.models import FieldCondition, Filter, MatchValue -from langchain_core.documents import Document - -def retrieve_docs(question: str) -> str: - retrieved_docs = retriever.invoke( - question, - k=docs_retrieved_count, - filter=Filter( - must=[ - FieldCondition( - key="metadata.doc_type", - match=MatchValue(value="SPARQL endpoints query examples"), - ) - ] - ) - ) - retrieved_docs += retriever.invoke( - question, - k=docs_retrieved_count, - filter=Filter( - must_not=[ - FieldCondition( - key="metadata.doc_type", - match=MatchValue(value="SPARQL endpoints query examples"), - ) - ] - ), - ) - return f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_docs)}\n" - -# relevant_docs = "\n".join(doc.page_content + "\n" + doc.metadata.get("answer") for doc in retrieved_docs) -# relevant_docs = retrieve_docs(question) - -# print(f"πŸ“šοΈ Retrieved {len(retrieved_docs)} documents") -# # print(retrieved_docs) -# for doc in retrieved_docs: -# print(f"{doc.metadata.get('doc_type')} - {doc.metadata.get('endpoint_url')} - {doc.page_content}") - - -from langchain_core.prompts import ChatPromptTemplate - -def _format_doc(doc: Document) -> str: - """Format a question/answer document to be provided as context to the model.""" - doc_lang = ( - "sparql" if "query" in doc.metadata.get("doc_type", "") - else "shex" if "schema" in doc.metadata.get("doc_type", "") - else "" - ) - return f"\n{doc.page_content} ({doc.metadata.get('endpoint_url')}):\n\n```{doc_lang}\n{doc.metadata.get('answer')}\n```\n" - - -SYSTEM_PROMPT = """You are an assistant that helps users to write SPARQL queries. -Put the SPARQL query inside a markdown codeblock with the "sparql" language tag, and always add the URL of the endpoint on which the query should be executed in a comment at the start of the query inside the codeblocks. -Use the queries examples and classes shapes provided in the prompt to derive your answer, don't try to create a query from nothing and do not provide a generic query. -Try to always answer with one query, if the answer lies in different endpoints, provide a federated query. -And briefly explain the query. -Here is a list of documents (reference questions and query answers, classes schema) relevant to the user question that will help you answer the user question accurately: -{relevant_docs} -""" -prompt_template = ChatPromptTemplate.from_messages([ - ("system", SYSTEM_PROMPT), - ("placeholder", "{messages}"), -]) - -# prompt_with_context = prompt_template.invoke({ -# "messages": [("human", question)], -# "relevant_docs": relevant_docs, -# }) - -# print(str("\n".join(prompt_with_context.messages))) - -# resp = llm.invoke("What are the rat orthologs of human TP53?") -# print(resp) - -# for msg in llm.stream(prompt_with_context): -# print(msg.content, end='') - - -import chainlit as cl - -@cl.on_message -async def on_message(msg: cl.Message): - relevant_docs = retrieve_docs(msg.content) - async with cl.Step(name="relevant documents") as step: - # step.input = msg.content - step.output = relevant_docs - - prompt_with_context = prompt_template.invoke({ - # "messages": [("human", msg.content)], - "messages": cl.chat_context.to_openai(), - "relevant_docs": relevant_docs, - }) - answer = cl.Message(content="") - for resp in llm.stream(prompt_with_context): - await answer.stream_token(resp.content) - await answer.send() - - -@cl.set_starters -async def set_starters(): - return [ - cl.Starter( - label="Rat orthologs", - message="What are the rat orthologs of human TP53?", - # icon="/public/idea.svg", - ), - ] - -# uv run chainlit run simple.py diff --git a/tutorial/graph.py b/tutorial/graph.py new file mode 100644 index 0000000..6cd1030 --- /dev/null +++ b/tutorial/graph.py @@ -0,0 +1,255 @@ +from typing import Literal +from langchain_qdrant import QdrantVectorStore +from langchain_community.embeddings import FastEmbedEmbeddings +from langchain_core.documents import Document +from langchain_core.language_models import BaseChatModel +from langgraph.graph import StateGraph +from langgraph.graph.message import MessagesState +from qdrant_client.models import FieldCondition, Filter, MatchValue +import chainlit as cl + + +def load_chat_model(model: str) -> BaseChatModel: + provider, model_name = model.split("/", maxsplit=1) + if provider == "groq": + # https://python.langchain.com/docs/integrations/chat/groq/ + from langchain_groq import ChatGroq + return ChatGroq( + model_name=model_name, + temperature=0, + ) + if provider == "openai": + # https://python.langchain.com/docs/integrations/chat/openai/ + from langchain_openai import ChatOpenAI + return ChatOpenAI( + model_name=model_name, + temperature=0, + ) + if provider == "ollama": + # https://python.langchain.com/docs/integrations/chat/ollama/ + from langchain_ollama import ChatOllama + return ChatOllama( + model=model_name, + temperature=0, + ) + raise ValueError(f"Unknown provider: {provider}") + +llm = load_chat_model("groq/llama-3.3-70b-versatile") +# llm = load_chat_model("openai/gpt-4o-mini") +# llm = load_chat_model("ollama/mistral") + + +vectordb = QdrantVectorStore.from_existing_collection( + # path="data/qdrant", + host="localhost", + prefer_grpc=True, + collection_name="sparql-docs", + embedding=FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5"), +) +retriever = vectordb.as_retriever() + + +class AgentState(MessagesState): + """State of the agent available inside each node.""" + relevant_docs: str + passed_validation: bool + try_count: int + +retrieved_docs_count = 3 +async def retrieve_docs(state: AgentState) -> dict[str, str]: + """Retrieve documents relevant to the user's question.""" + last_msg = state["messages"][-1] + retriever = vectordb.as_retriever() + retrieved_docs = retriever.invoke( + last_msg.content, + k=retrieved_docs_count, + filter=Filter( + must=[ + FieldCondition( + key="metadata.doc_type", + match=MatchValue(value="SPARQL endpoints query examples"), + ) + ] + ) + ) + retrieved_docs += retriever.invoke( + last_msg.content, + k=retrieved_docs_count, + filter=Filter( + must_not=[ + FieldCondition( + key="metadata.doc_type", + match=MatchValue(value="SPARQL endpoints query examples"), + ) + ] + ), + ) + relevant_docs = f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_docs)}\n" + async with cl.Step(name=f"{len(retrieved_docs)} relevant documents πŸ“šοΈ") as step: + step.output = relevant_docs + return {"relevant_docs": relevant_docs} + + +def _format_doc(doc: Document) -> str: + """Format a question/answer document to be provided as context to the model.""" + doc_lang = ( + "sparql" if "query" in doc.metadata.get("doc_type", "") + else "shex" if "schema" in doc.metadata.get("doc_type", "") + else "" + ) + return f"\n{doc.page_content} ({doc.metadata.get('endpoint_url', '')}):\n\n```{doc_lang}\n{doc.metadata.get('answer')}\n```\n" + # # Default formatting + # meta = "".join(f" {k}={v!r}" for k, v in doc.metadata.items()) + # if meta: + # meta = f" {meta}" + # return f"\n{doc.page_content}\n" + + +SYSTEM_PROMPT = """You are an assistant that helps users to write SPARQL queries. +Put the SPARQL query inside a markdown codeblock with the "sparql" language tag, and always add the URL of the endpoint on which the query should be executed in a comment at the start of the query inside the codeblocks. +Use the queries examples and classes shapes provided in the prompt to derive your answer, don't try to create a query from nothing and do not provide a generic query. +Try to always answer with one query, if the answer lies in different endpoints, provide a federated query. +And briefly explain the query. +Here is a list of documents (reference questions and query answers, classes schema) relevant to the user question that will help you answer the user question accurately: +{relevant_docs} +""" + +def call_model(state: AgentState): + """Call the model with the retrieved documents as context.""" + response = llm.invoke([ + ("system", SYSTEM_PROMPT.format(relevant_docs=state["relevant_docs"])), + *state["messages"], + ]) + # NOTE: to fix issue with ollama ignoring system messages + # state["messages"][-1].content = SYSTEM_PROMPT.replace("{relevant_docs}", state['relevant_docs']) + "\n\nHere is the user question:\n" + state["messages"][-1].content + # response = llm.invoke(state["messages"]) + + # # Fix id of response to use the same as the rest of the messages + # response.id = state["messages"][-1].id + return {"messages": [response]} + + +import logging +from sparql_llm.utils import get_prefixes_and_schema_for_endpoints +from index import endpoints + +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.info("Initializing endpoints metadata...") +# Retrieve the prefixes map and initialize VoID schema dictionary from the indexed endpoints +prefixes_map, endpoints_void_dict = get_prefixes_and_schema_for_endpoints(endpoints) + + +from sparql_llm import validate_sparql_in_msg +from langchain_core.messages import AIMessage + +async def validate_output(state: AgentState) -> dict[str, bool | list[tuple[str, str]] | int]: + """Node to validate the output of a LLM call, e.g. SPARQL queries generated.""" + recall_messages = [] + print(state["messages"]) + last_msg = next(msg.content for msg in reversed(state["messages"]) if isinstance(msg, AIMessage) and msg.content) + print(last_msg) + # last_msg = state["messages"][-1].content + validation_outputs = validate_sparql_in_msg(last_msg, prefixes_map, endpoints_void_dict) + for validation_output in validation_outputs: + if validation_output["fixed_query"]: + async with cl.Step(name="missing prefixes correction βœ…") as step: + step.output = f"Missing prefixes added to the generated query:\n```sparql\n{validation_output['fixed_query']}\n```" + if validation_output["errors"]: + # errors_str = "- " + "\n- ".join(validation_output["errors"]) + recall_msg = f"""Fix the SPARQL query helping yourself with the error message and context from previous messages in a way that it is a fully valid query.\n +### Error messages:\n- {'\n- '.join(validation_output['errors'])}\n +### Erroneous SPARQL query\n```sparql\n{validation_output.get('fixed_query', validation_output['original_query'])}\n```""" + # print(error_str, flush=True) + async with cl.Step(name=f"SPARQL query validation, got {len(validation_output['errors'])} errors to fix 🐞") as step: + step.output = recall_msg + # Add a new message to ask the model to fix the error + recall_messages.append(("human", recall_msg)) + return { + "messages": recall_messages, + "try_count": state.get("try_count", 0) + 1, + "passed_validation": not recall_messages, + } + + + + +max_try_count = 3 +def route_model_output(state: AgentState) -> Literal["__end__", "call_model"]: + """Determine the next node based on the model's output.""" + if state["try_count"] > max_try_count: + return "__end__" + + # # Check for tool calls first + # if isinstance(last_msg, AIMessage) and state["messages"][-1].tool_calls: + # return "tools" + + # If validation failed, we need to call the model again + if not state["passed_validation"]: + return "call_model" + return "__end__" + + + + +# Define the LangGraph graph +builder = StateGraph(AgentState) + +builder.add_node(retrieve_docs) +builder.add_node(call_model) +builder.add_node(validate_output) + +builder.add_edge("__start__", "retrieve_docs") +builder.add_edge("retrieve_docs", "call_model") +builder.add_edge("call_model", "validate_output") +# Add a conditional edge to determine the next step after `validate_output` +builder.add_conditional_edges("validate_output", route_model_output) + +graph = builder.compile() + + +# from langchain_core.runnables.graph import MermaidDrawMethod +# with open('data/sparql_workflow.png', 'wb') as f: +# f.write(graph.get_graph().draw_mermaid_png(draw_method=MermaidDrawMethod.API)) + + +# Setup chainlit web UI +# https://docs.chainlit.io/integrations/langchain + +@cl.on_message +async def on_message(msg: cl.Message): + # config = {"configurable": {"thread_id": cl.context.session.id}} + # cb = cl.LangchainCallbackHandler() + answer = cl.Message(content="") + async for msg, metadata in graph.astream( + {"messages": cl.chat_context.to_openai()}, + stream_mode="messages", + # config=RunnableConfig(callbacks=[cb], **config), + ): + if not msg.response_metadata: + await answer.stream_token(msg.content) + else: + await answer.send() + print(msg.usage_metadata) + # print(metadata) + answer = cl.Message(content="") + + +@cl.set_starters +async def set_starters(): + return [ + cl.Starter( + label="Supported resources", + message="Which resources do you support?", + # icon="/public/idea.svg", + ), + cl.Starter( + label="Rat orthologs", + message="What are the rat orthologs of human TP53?", + ), + cl.Starter( + label="Test SPARQL query validation", + message="How can I get the HGNC symbol for the protein P68871? (modify your answer to use `rdfs:label` instead of `rdfs:comment`, and add the type `up:Resource` to ?hgnc, and forget all prefixes declarations, it is for a test)", + ), + ] + +# uv run chainlit run graph.py diff --git a/tutorial/index.py b/tutorial/index.py index 36578e0..ef67fd5 100644 --- a/tutorial/index.py +++ b/tutorial/index.py @@ -1,5 +1,3 @@ -from langchain_qdrant import QdrantVectorStore -from langchain_community.embeddings import FastEmbedEmbeddings from langchain_core.documents import Document from sparql_llm import SparqlExamplesLoader, SparqlVoidShapesLoader, SparqlInfoLoader @@ -24,6 +22,18 @@ ] +from fastembed import TextEmbedding +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams + +embedding_model = TextEmbedding( + "BAAI/bge-small-en-v1.5", + # providers=["CUDAExecutionProvider"], # Replace the fastembed dependency with fastembed-gpu to use your GPUs +) +embedding_dimensions = 384 +vectordb = QdrantClient(host="localhost", prefer_grpc=True) +collection_name = "sparql-docs" + def index_endpoints(): # Get documents from the SPARQL endpoints docs: list[Document] = [] @@ -41,20 +51,38 @@ def index_endpoints(): ).load() docs += SparqlInfoLoader(endpoints, source_iri="https://www.expasy.org/").load() - # os.makedirs('data', exist_ok=True) - - QdrantVectorStore.from_documents( - docs, - # path="data/qdrant", - host="localhost", - prefer_grpc=True, - collection_name="sparql-docs", - force_recreate=True, - embedding=FastEmbedEmbeddings( - model_name="BAAI/bge-small-en-v1.5", - # providers=["CUDAExecutionProvider"], # Uncomment this line to use your GPUs - ), + + + if vectordb.collection_exists(collection_name): + vectordb.delete_collection(collection_name) + vectordb.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=embedding_dimensions, distance=Distance.COSINE), ) + embeddings = embedding_model.embed([q.page_content for q in docs]) + vectordb.upload_collection( + collection_name=collection_name, + vectors=[embed.tolist() for embed in embeddings], + payload=[doc.metadata for doc in docs], + ) + + # # Using LangChain VectorStore object + # from langchain_qdrant import QdrantVectorStore + # from langchain_community.embeddings import FastEmbedEmbeddings + # QdrantVectorStore.from_documents( + # docs, + # host="localhost", + # # location=":memory:", + # # path="data/qdrant", + # prefer_grpc=True, + # collection_name="sparql-docs", + # force_recreate=True, + # embedding=FastEmbedEmbeddings( + # model_name="BAAI/bge-small-en-v1.5", + # # providers=["CUDAExecutionProvider"], # Uncomment this line to use your GPUs + # ), + # ) + if __name__ == "__main__": index_endpoints() diff --git a/tutorial/public/style.css b/tutorial/public/style.css index 12297bf..becf3c5 100644 --- a/tutorial/public/style.css +++ b/tutorial/public/style.css @@ -1,2 +1,6 @@ -pre { padding: .5em; } -a.watermark { display: none !important; } +pre { + padding: .5em; +} +a.watermark { + display: none !important; +} diff --git a/tutorial/pyproject.toml b/tutorial/pyproject.toml index 9b51913..c3a7684 100644 --- a/tutorial/pyproject.toml +++ b/tutorial/pyproject.toml @@ -8,14 +8,14 @@ dependencies = [ # "sparql-llm @ git+https://github.com/sib-swiss/sparql-llm.git#subdirectory=packages/sparql-llm", # "sparql-llm @ file:///home/vemonet/dev/expasy/sparql-llm/packages/sparql-llm", "langchain >=0.3.19", - "langchain-community >=0.3.17", "langchain-openai >=0.3.6", "langchain-groq >=0.2.4", "langchain-ollama >=0.2.3", - "langchain-qdrant >=0.2.0", "qdrant-client >=1.13.0", "fastembed >=0.5.1", # "fastembed-gpu >=0.5.1", # Optional GPU support "chainlit", "langgraph >=0.2.73", + "langchain-qdrant >=0.2.0", + "langchain-community >=0.3.17", ] diff --git a/tutorial/slides/index.html b/tutorial/slides/index.html index 4d04e08..9ef81c5 100644 --- a/tutorial/slides/index.html +++ b/tutorial/slides/index.html @@ -12,7 +12,7 @@
-
+
@@ -32,13 +32,13 @@ hash: true, history: true, hashOneBasedIndex: true, - // slideNumber: true, + slideNumber: true, clipcode: { // https://www.npmjs.com/package/@edc4it/reveal.js-clipcode style: { copybg: 'silver', scale: 0.8, - radius: 1, + radius: 0.5, }, }, }); @@ -51,7 +51,9 @@ font-size: 0.6em; } .reveal section h2 { - font-size: 1em; + font-family: Lato, sans-serif; + font-size: .9em; + text-transform: none; } .reveal section pre code { border-radius: 8px; diff --git a/tutorial/slides/public/slides.md b/tutorial/slides/public/slides.md index 25d3532..8c3e396 100644 --- a/tutorial/slides/public/slides.md +++ b/tutorial/slides/public/slides.md @@ -12,14 +12,16 @@ As we progress, you'll be provided with code snippets to gradually construct the 2. Index documents 3. Use indexed documents as context 4. Add a web UI -5. Define a more complex agent workflow -6. Add SPARQL query validation +5. Add SPARQL query validation +6. Optional: use an agent framework --- ## Setup -[Install `uv`](https://docs.astral.sh/uv/getting-started/installation/) to easily handle dependencies and run scripts +[Install `uv`](https://docs.astral.sh/uv/getting-started/installation/) to easily handle dependencies and run scripts. + +If you use VSCode we recommend to have the [`Python` extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) installed. Create a new folder, you will be using this same folder along the tutorial. @@ -46,21 +48,18 @@ requires-python = "==3.12.*" dependencies = [ "sparql-llm >=0.0.8", "langchain >=0.3.19", - "langchain-community >=0.3.17", "langchain-openai >=0.3.6", "langchain-groq >=0.2.4", "langchain-ollama >=0.2.3", - "langchain-qdrant >=0.2.0", "qdrant-client >=1.13.0", "fastembed >=0.5.1", "chainlit >=2.2.1", - "langgraph >=0.2.73", ] ``` --- -## Call a LLM +## Programmatically query a LLM Create a `app.py` file in the same folder @@ -116,6 +115,8 @@ llm = load_chat_model("groq/llama-3.3-70b-versatile") # llm = load_chat_model("openai/gpt-4o-mini") ``` +> Alternatively you could replace LangChain by [LiteLLM](https://docs.litellm.ai/docs/) here + --- ## Use a local LLM @@ -139,6 +140,10 @@ Add the new provider: llm = load_chat_model("ollama/mistral") ``` +> Ollama is mainly a wrapper around [llama.cpp](https://python.langchain.com/docs/integrations/chat/llamacpp/), you can also [download `.gguf` files](https://huggingface.co/lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF) and use them directly. + +> [vLLM](https://github.com/vllm-project/vllm) and [llamafile](https://github.com/Mozilla-Ocho/llamafile) are other solutions to serve LLMs locally. + --- ## Setup vector store @@ -149,7 +154,7 @@ Deploy a **[Qdrant](https://qdrant.tech/documentation/)** vector store using [do docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/data/qdrant:/qdrant/storage qdrant/qdrant ``` -Or create a `compose.yml` file and start with `docker compose up` +Or create a `compose.yml` file and start with `docker compose up -d` ```yml services: @@ -211,7 +216,6 @@ def index_endpoints(): endpoint["endpoint_url"], examples_file=endpoint.get("examples_file"), ).load() - docs += SparqlVoidShapesLoader( endpoint["endpoint_url"], void_file=endpoint.get("void_file"), @@ -240,25 +244,65 @@ Finally we can load these documents in the **[Qdrant](https://qdrant.tech/docume We use **[FastEmbed](https://qdrant.github.io/fastembed/)** to generate embeddings locally with [open source embedding models](https://qdrant.github.io/fastembed/examples/Supported_Models/#supported-text-embedding-models). ```python -from langchain_qdrant import QdrantVectorStore -from langchain_community.embeddings import FastEmbedEmbeddings +from fastembed import TextEmbedding +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams -vectordb = QdrantVectorStore.from_documents( - docs, +embedding_model = TextEmbedding( + "BAAI/bge-small-en-v1.5", + # providers=["CUDAExecutionProvider"], # Replace the fastembed dependency with fastembed-gpu to use your GPUs +) +embedding_dimensions = 384 +collection_name = "sparql-docs" +vectordb = QdrantClient( host="localhost", prefer_grpc=True, - # path="data/qdrant", # if not using Qdrant as a service - collection_name="sparql-docs", - embedding=FastEmbedEmbeddings( - model_name="BAAI/bge-small-en-v1.5", - # providers=["CUDAExecutionProvider"], # Replace the fastembed dependency with fastembed-gpu to use your GPUs - ), - force_recreate=True, + # location=":memory:", # if not using Qdrant as a service ) +def index_endpoints(): + # [...] + if vectordb.collection_exists(collection_name): + vectordb.delete_collection(collection_name) + vectordb.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=embedding_dimensions, distance=Distance.COSINE), + ) + embeddings = embedding_model.embed([q.page_content for q in docs]) + vectordb.upload_collection( + collection_name=collection_name, + vectors=[embed.tolist() for embed in embeddings], + payload=[doc.metadata for doc in docs], + ) ``` > Checkout indexed docs at http://localhost:6333/dashboard +---- + +Alternatively you could use a LangChain retriever instead of the Qdrant client directly + +```python +from langchain_qdrant import QdrantVectorStore +from langchain_community.embeddings import FastEmbedEmbeddings + +def index_endpoints(): + # [...] + QdrantVectorStore.from_documents( + docs, + host="localhost", + prefer_grpc=True, + # location=":memory:", # if not using Qdrant as a service + collection_name="sparql-docs", + embedding=FastEmbedEmbeddings( + model_name="BAAI/bge-small-en-v1.5", + # providers=["CUDAExecutionProvider"], # Replace the fastembed dependency with fastembed-gpu to use your GPUs + ), + force_recreate=True, + ) +``` + +> You will need to add the following dependencies to your `pyproject.toml`: `langchain-qdrant` and `langchain-community` + --- ## Provide context to the LLM @@ -267,6 +311,25 @@ Now we can go back to our `app.py` file. And retrieve documents related to the user question using the vector store +```python +from index import vectordb, embedding_model, collection_name + +question_embeddings = next(iter(embedding_model.embed([question]))) + +retrieved_docs_count = 3 +retrieved_docs = vectordb.search( + collection_name=collection_name, + query_vector=question_embeddings, + limit=retrieved_docs_count, +) +relevant_docs = "\n".join(doc.payload["question"] + "\n" + doc.payload["answer"] for doc in retrieved_docs) +print(f"πŸ“šοΈ Retrieved {len(retrieved_docs)} documents", retrieved_docs[0]) +``` + +---- + +If you are using LangChain retriever + ```python from langchain_qdrant import QdrantVectorStore from langchain_community.embeddings import FastEmbedEmbeddings @@ -282,11 +345,11 @@ retriever = vectordb.as_retriever() retrieved_docs_count = 3 retrieved_docs = retriever.invoke(question, k=retrieved_docs_count) relevant_docs = "\n".join(doc.page_content + "\n" + doc.metadata.get("answer") for doc in retrieved_docs) - -print(f"πŸ“šοΈ Retrieved {len(retrieved_docs)} documents") -print(retrieved_docs[0]) +print(f"πŸ“šοΈ Retrieved {len(retrieved_docs)} documents", retrieved_docs[0]) ``` +> LangChain retriever returns a list of `Document` instead of `ScoredPoint`, access the fields using `metadata` instead of `payload` + --- ## Provide context to the LLM @@ -294,43 +357,38 @@ print(retrieved_docs[0]) Customize the system prompt to provide the retrieved documents ```python -from langchain_core.prompts import ChatPromptTemplate - SYSTEM_PROMPT = """You are an assistant that helps users to write SPARQL queries. Put the SPARQL query inside a markdown codeblock with the "sparql" language tag, and always add the URL of the endpoint on which the query should be executed in a comment at the start of the query inside the codeblocks. Use the queries examples and classes shapes provided in the prompt to derive your answer, don't try to create a query from nothing and do not provide a generic query. Try to always answer with one query, if the answer lies in different endpoints, provide a federated query. And briefly explain the query. Here is a list of documents (reference questions and query answers, classes schema) relevant to the user question that will help you answer the user question accurately: -{relevant_docs} -""" -prompt_template = ChatPromptTemplate.from_messages([ - ("system", SYSTEM_PROMPT), - ("placeholder", "{messages}"), -]) -prompt_with_context = prompt_template.invoke({ - "messages": [("human", question)], - "relevant_docs": relevant_docs, -}) +{relevant_docs}""" +messages = [ + ("system", SYSTEM_PROMPT.format(relevant_docs=relevant_docs)), + ("human", question), +] ``` +> Try now to pass `messages` to `llm.stream()` + --- ## Provide context to the LLM -We can improve how the documents are formatted when passed to the LLM: +We can improve how the documents are formatted when passed to the LLM ```python -from langchain_core.documents import Document +from qdrant_client.models import ScoredPoint -def _format_doc(doc: Document) -> str: - """Format our question/answer document to be provided as context to the model.""" +def _format_doc(doc: ScoredPoint) -> str: + """Format a question/answer document to be provided as context to the model.""" doc_lang = ( - "sparql" if "query" in doc.metadata.get("doc_type", "") - else "shex" if "schema" in doc.metadata.get("doc_type", "") + "sparql" if "query" in doc.payload.get("doc_type", "") + else "shex" if "schema" in doc.payload.get("doc_type", "") else "" ) - return f"\n{doc.page_content} ({doc.metadata.get('endpoint_url', '')}):\n\n```{doc_lang}\n{doc.metadata.get('answer')}\n```\n" + return f"\n{doc.payload['question']} ({doc.payload.get('endpoint_url', '')}):\n\n```{doc_lang}\n{doc.payload.get('answer')}\n```\n" relevant_docs = f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_docs)}\n" ``` @@ -339,7 +397,39 @@ relevant_docs = f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_ ## Provide context to the LLM -We can retrieve documents related to query examples and classes shapes separately, to make sure we always get a number of examples and classes shapes. +We can retrieve documents related to query examples and classes shapes separately, to make sure we always get a number of examples and classes shapes + +```python +from qdrant_client.models import FieldCondition, Filter, MatchValue + +def retrieve_docs(question: str) -> str: + question_embeddings = next(iter(embedding_model.embed([question]))) + retrieved_docs = vectordb.search( + collection_name=collection_name, + query_vector=question_embeddings, + limit=retrieved_docs_count, + query_filter=Filter(must=[FieldCondition( + key="doc_type", + match=MatchValue(value="SPARQL endpoints query examples"), + )]), + ) + retrieved_docs += vectordb.search( + collection_name=collection_name, + query_vector=question_embeddings, + limit=retrieved_docs_count, + query_filter=Filter(must_not=[FieldCondition( + key="doc_type", + match=MatchValue(value="SPARQL endpoints query examples"), + )]), + ) + return f"\n{'\n'.join(_format_doc(doc) for doc in retrieved_docs)}\n" + +relevant_docs = retrieve_docs(question) +``` + +---- + +If using LangChain retriever: ```python from qdrant_client.models import FieldCondition, Filter, MatchValue @@ -377,16 +467,19 @@ import chainlit as cl @cl.on_message async def on_message(msg: cl.Message): + """Main function to handle when user send a message to the assistant.""" relevant_docs = retrieve_docs(msg.content) async with cl.Step(name="relevant documents πŸ“šοΈ") as step: step.output = relevant_docs - prompt_with_context = prompt_template.invoke({ - "messages": cl.chat_context.to_openai(), - "relevant_docs": relevant_docs, - }) + messages = [ + ("system", SYSTEM_PROMPT.format(relevant_docs=relevant_docs)), + *cl.chat_context.to_openai(), + ] answer = cl.Message(content="") - for resp in llm.stream(prompt_with_context): + for resp in llm.stream(messages): await answer.stream_token(resp.content) + if resp.usage_metadata: + print(resp.usage_metadata) await answer.send() ``` @@ -413,7 +506,7 @@ async def set_starters(): ] ``` -[Customize the UI](https://docs.chainlit.io/customisation/overview): +And [customize the UI](https://docs.chainlit.io/customisation/overview) - Change general settings in `.chainlit/config.toml` - e.g. set `custom_css= "/public/style.css"` containing: `pre { padding: .5em; } a.watermark { display: none !important; }` @@ -423,151 +516,183 @@ async def set_starters(): --- -## Define an agent workflow +## Deploy with a nice web UI -Create more complex agent workflow agent that can loop over themselves using [LangGraph](https://langchain-ai.github.io/langgraph/#): +You can also change `retrieve_docs()` to make it `async`, and directly define the chainlit step in the retrieval function -- To validate a generated query -- To use tools +```python +async def retrieve_docs(question: str) -> str: + # [...] + async with cl.Step(name=f"{len(retrieved_docs)} relevant documents πŸ“šοΈ") as step: + step.output = relevant_docs + return relevant_docs + +@cl.on_message +async def on_message(msg: cl.Message): + relevant_docs = await retrieve_docs(msg.content) + # [...] +``` --- -## Define an agent workflow +## Add SPARQL query validation -Define the state and update the retrieve function +
+
-```python -from langgraph.graph.message import MessagesState +Why do we add validation of the query generated: -class AgentState(MessagesState): - """State of the agent available inside each node.""" - relevant_docs: str +🧠 fix missing prefixes +πŸ„ detect use of a wrong predicate with a class -async def retrieve_docs(state: AgentState) -> dict[str, str]: - question = state["messages"][-1].content - # [...] - async with cl.Step(name=f"{len(retrieved_docs)} relevant documents πŸ“šοΈ") as step: - step.output = relevant_docs - # This will update relevant_docs in the state: - return {"relevant_docs": relevant_docs} -``` +
+
+ SPARQL agent workflow +
+
--- -## Define an agent workflow +## Add SPARQL query validation -Define the node to call the LLM +Initialize the prefixes map and VoID classes schema that will be used by validation ```python -def call_model(state: AgentState): - """Call the model with the retrieved documents as context.""" - prompt_with_context = prompt_template.invoke({ - "messages": state["messages"], - "relevant_docs": state['relevant_docs'], - }) - response = llm.invoke(prompt_with_context) - return {"messages": [response]} +import logging +from sparql_llm.utils import get_prefixes_and_schema_for_endpoints +from index import endpoints + +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.info("Initializing endpoints metadata...") +prefixes_map, endpoints_void_dict = get_prefixes_and_schema_for_endpoints(endpoints) ``` --- -## Define an agent workflow +## Add SPARQL query validation -Define the workflow "graph" +Create the validation function ```python -from langgraph.graph import StateGraph - -builder = StateGraph(AgentState) - -builder.add_node(retrieve_docs) -builder.add_node(call_model) - -builder.add_edge("__start__", "retrieve_docs") -builder.add_edge("retrieve_docs", "call_model") -builder.add_edge("call_model", "__end__") +from sparql_llm import validate_sparql_in_msg +from langchain_core.messages import AIMessage -graph = builder.compile() +async def validate_output(last_msg: str) -> str | None: + """Validate the output of a LLM call, e.g. SPARQL queries generated.""" + validation_outputs = validate_sparql_in_msg(last_msg, prefixes_map, endpoints_void_dict) + for validation_output in validation_outputs: + # Add step when missing prefixes have been fixed + if validation_output["fixed_query"]: + async with cl.Step(name="missing prefixes correction βœ…") as step: + step.output = f"Missing prefixes added to the generated query:\n```sparql\n{validation_output['fixed_query']}\n```" + # Create a new message to ask the model to fix the errors + if validation_output["errors"]: + recall_msg = f"""Fix the SPARQL query helping yourself with the error message and context from previous messages in a way that it is a fully valid query.\n +### Error messages:\n- {'\n- '.join(validation_output['errors'])}\n +### Erroneous SPARQL query\n```sparql\n{validation_output.get('fixed_query', validation_output['original_query'])}\n```""" + async with cl.Step(name=f"SPARQL query validation, got {len(validation_output['errors'])} errors to fix 🐞") as step: + step.output = recall_msg + return recall_msg ``` --- -## Define an agent workflow +## Add SPARQL query validation -Update the UI +Update the main `on_message` function running the chat to add a loop that makes sure the validation passes, if not we recall the LLM asking to fix the wrong query ```python +max_try_count = 3 + @cl.on_message async def on_message(msg: cl.Message): - answer = cl.Message(content="") - async for msg, metadata in graph.astream( - {"messages": cl.chat_context.to_openai()}, - stream_mode="messages", - ): - if not msg.response_metadata: - await answer.stream_token(msg.content) + # [...] + for _i in range(max_try_count): + answer = cl.Message(content="") + for resp in llm.stream(messages): + await answer.stream_token(resp.content) + await answer.send() + validation_msg = await validate_output(answer.content) + if validation_msg is None: + break else: - await answer.send() - answer = cl.Message(content="") + messages.append(("human", validation_msg)) ``` > Try running your agent again now --- -## Add SPARQL query validation +## Use an agent framework + +Optionally you can move to fully use an "agent framework" like [LangGraph](https://langchain-ai.github.io/langgraph/#): + +βœ… Give access to some nice features + +- switch between streaming and complete response +- parallel execution of nodes +- generate a visual diagram for your workflow + +βœ… Provide structure to build your workflow -Add fields to the state related to validation +⚠️ Can be slower at runtime than doing things yourself + +⚠️ Relies on more dependencies increasing the overall complexity of the system, some people might find it more confusing than just using good old loops + +--- + +## Use an agent framework + +Add the `langgraph` dependency to your `pyproject.toml` + +Define the state and update the retrieve function ```python +from langgraph.graph.message import MessagesState + class AgentState(MessagesState): - # [...] + """State of the agent available inside each node.""" + relevant_docs: str passed_validation: bool try_count: int + + +async def retrieve_docs(state: AgentState) -> dict[str, str]: + question = state["messages"][-1].content + # [...] + # This will update relevant_docs in the state: + return {"relevant_docs": relevant_docs} ``` --- -## Add SPARQL query validation +## Use an agent framework -Initialize the prefixes map and VoID classes schema that will be used by validation +Define the node to call the LLM ```python -import logging -from sparql_llm.utils import get_prefixes_and_schema_for_endpoints -from index import endpoints - -logging.getLogger("httpx").setLevel(logging.WARNING) -logging.info("Initializing endpoints metadata...") -prefixes_map, endpoints_void_dict = get_prefixes_and_schema_for_endpoints(endpoints) +def call_model(state: AgentState): + """Call the model with the retrieved documents as context.""" + response = llm.invoke([ + ("system", SYSTEM_PROMPT.format(relevant_docs=state["relevant_docs"])), + *state["messages"], + ]) + return {"messages": [response]} ``` --- -## Add SPARQL query validation +## Use an agent framework -Create the validation node +Update the function that does validation ```python -from sparql_llm import validate_sparql_in_msg - -async def validate_output(state: AgentState) -> dict[str, bool | list[tuple[str, str]] | int]: - """Node to validate the output of a LLM call, e.g. SPARQL queries generated.""" - recall_messages = [] - validation_outputs = validate_sparql_in_msg(state["messages"][-1].content, prefixes_map, endpoints_void_dict) - for validation_output in validation_outputs: - # Handle when missing prefixes have been fixed - if validation_output["fixed_query"]: - async with cl.Step(name="missing prefixes correction βœ…") as step: - step.output = f"Missing prefixes added to the generated query:\n```sparql\n{validation_output['fixed_query']}\n```" - # Add a new message to ask the model to fix the errors - if validation_output["errors"]: - recall_msg = f"""Fix the SPARQL query helping yourself with the error message and context from previous messages in a way that it is a fully valid query.\n -### Error messages:\n- {'\n- '.join(validation_output['errors'])}\n -### Erroneous SPARQL query\n```sparql\n{validation_output['original_query']}\n```""" - async with cl.Step(name=f"SPARQL query validation, got {len(validation_output['errors'])} errors to fix 🐞") as step: - step.output = recall_msg +async def validate_output(state) -> dict[str, bool | list[tuple[str, str]] | int]: + recall_messages = [] + last_msg = next(msg.content for msg in reversed(state["messages"]) if msg.content) + # [...] + # Add a new message to ask the model to fix the error recall_messages.append(("human", recall_msg)) return { "messages": recall_messages, @@ -576,19 +701,21 @@ async def validate_output(state: AgentState) -> dict[str, bool | list[tuple[str, } ``` + + --- -## Add SPARQL query validation +## Use an agent framework Create a conditional edge to route the workflow based on validation results ```python from typing import Literal -max_try_fix_sparql = 3 +max_try_count = 3 def route_model_output(state: AgentState) -> Literal["call_model", "__end__"]: """Determine the next node based on the model's output.""" - if state["try_count"] > max_try_fix_sparql: + if state["try_count"] > max_try_count: return "__end__" if not state["passed_validation"]: return "call_model" @@ -597,15 +724,67 @@ def route_model_output(state: AgentState) -> Literal["call_model", "__end__"]: --- -## Add SPARQL query validation +## Use an agent framework -Add this new edge to the workflow graph +Define the workflow "graph" ```python +from langgraph.graph import StateGraph + +builder = StateGraph(AgentState) + +builder.add_node(retrieve_docs) +builder.add_node(call_model) builder.add_node(validate_output) +builder.add_edge("__start__", "retrieve_docs") +builder.add_edge("retrieve_docs", "call_model") builder.add_edge("call_model", "validate_output") builder.add_conditional_edges("validate_output", route_model_output) + +graph = builder.compile() +``` + +--- + +## Use an agent framework + +Update the UI + +```python +@cl.on_message +async def on_message(msg: cl.Message): + answer = cl.Message(content="") + async for msg, metadata in graph.astream( + {"messages": cl.chat_context.to_openai()}, + stream_mode="messages", + ): + if not msg.response_metadata: + await answer.stream_token(msg.content) + else: + print(msg.usage_metadata) + await answer.send() + answer = cl.Message(content="") ``` > Try running your agent again now + +--- + +## Thank you + +[Complete script on GitHub](https://github.com/sib-swiss/sparql-llm/blob/main/tutorial/app.py) + +  + +Live deployment for SIB endpoints (UniProt, Bgee, OMA, Rhea…) + +[**chat.expasy.org**](https://chat.expasy.org) + +  + +Code: [**github.com/sib-swiss/sparql-llm**](https://github.com/sib-swiss/sparql-llm) + +Short paper: [arxiv.org/abs/2410.06062](https://arxiv.org/abs/2410.06062) + +Standalone components available as a pip package: [pypi.org/project/sparql-llm](https://pypi.org/project/sparql-llm) diff --git a/tutorial/slides/public/sparql_workflow.png b/tutorial/slides/public/sparql_workflow.png new file mode 100644 index 0000000000000000000000000000000000000000..51b3955e8da10c3c3fb98a2d6dddb5021a10ec59 GIT binary patch literal 12748 zcmaibby!^AvgN^DgS!Q{;BLVZoCJ4wcc&o)2mwMGcPE74?iL^fcXxMp0&{YI_kDBU zyqRy#AKks-boJiVwQH}nsyb3tSq=l01Qh@Pj5qSq>fm!W0Kilr!-B7J(C;+BCm1(% zIVqrQm~0OKsDL-plJC9Kk21U@-f6A$sd)avMETGPZyo^~@+nr&h00MTN?YcrY+6?@ z;76U9n975;aAsXuo$lm!jS>wQ!rX7jjM>?_5uecKXhRP(Zeni>-@}E_-E_6yTxhu- zH|{*Tw%=uVjC7OCvb(T z7tYuzktEd{|QV-o^K8kGz?xmh*n{`Uf6c-rPTB=<{KB@=+cz@?WQz zmBWU2-+df+oA{t=iJH;j8rhaBE#^#zR7K`Tjbqkm5vHE72pZ**MJ zb8OlbRJ^?SWedXLZyB63`Y{rig$tg^utm?BTs0)<;8I%vK=+8{T6R!rQLC*r3LevP zQbco}7Yz*?Wl=xRq~eH+qS%f^r6=vrPph4wgzWnA^w6PK3UA-~ z`}?o2uS?U6#nH;)q4wOY1Ua?QB@K8TFE%6;%(!iGbIX z$8aa;-0q*AnSldvad8nb$%qIEtuWD1P^fVu{l&yZL>6(~{@h;)kPSbk$kP?(=UelT zpoW>Uf8n>E0zI)S3 z;DbYF2!dJfXF-qs_%Z96`0@A5Q@QLs*bxH14EKTKr@qI>$L~!PfgWEvJdoX}zLVe~ndCU%#BCLQI@W~98EXk8 z$uWz4CToh~_0`o(E@0d&vDTj)2Pwa;{E%{Oi{n<<0ZX!?=}t(AA1Om;u8`A z&ifhe?(W2&A1@+sXk*9($JMGvmK^ocq7mN>(H3}dN;^$ur;WMX#acc)BkI3{`eJDm zeA-9)H0G9`(~sFS7nswnofKO?Xtb2m0fwJ=;Nxx7T4!e`7KKQK@4#}aUqC>BgYEXk zudVZ#-s%SZvV!udOoL`m1SBM{cw@sR=kWvBwE0=$FT?#p_4U+LU-!g+-DkV&>7_}T z6cF{T3Gs~2&85iKoCHl;^YHQU>9*!i?8(!ON3HpbwdYY+fJtu3;#1@s_By`m*Hrio zO5cVQUMRou3}fwE?%fl;%G(ioC8+sC;0ZgwbF}#TAm=t^~T4r}~1+hx-ag;>idB z)2BZ!QHgtVDgXS%-%#}h>tLWTDQl+Cp#!+g$kuHa;*$F?D~f}wqGYgL_Pc!t;s z_`iGU1$^K;rXRw6?!Jv#H@V1qbEpV;i}bN1A8*Ed2(upI-A@hwotCmLnJ4&~UU>Zs zyH%hH_3uQghVJ#_H8#}1fU9BDT?lV zkUe?V8@F8zy9~dij%&-AG6OnXo4fNZXHr=5(6u)QQ?v$4;)g(n9EiF62$W}0>herBv ze(ls9Ba(+RpircTj%|1D;%b*nB}PY_7a=l$9oTZMLHUe7hKyuD@wrt=N?>+@4m}^jj@4T@L__u?qE~`s z7ndlFFxt7oXAC)9xyUSdUuC1x5ss{IolTO9GEue?m+@7vEFCZX%BkI?!GDk^zs3(uoVVz#@iCU{h z7E+}?tuX0^g@v6Dhetw^lacAP!g0@*GOm8Xoic(sZC0t>-{0@72ajWo<2_qplGy!S zN=hm>FK?|5nP!xR6QE*ZitI5xyYs(naEjF(Lu`FBnV%4K6;P12)kVU?XiSr$xUC+- zd_4D*ujm&H7u^5S6=73LO!XfBp;Ja@y(*2%7&fL1uk?HDVap0q&M0!1FE1vR)W<5s z==ByU)N&f}q$%bR7R zT+I@pSXugayY3$?3Sea!BwF2j%(+sMP;|Bm+U(i_x^DYl^s3Z_dLaKihJ8NW(9){r zDAj2-*-?2eDJi+zKFA2A4d>yRK3Tk5D$VuhO2H+D2)dQ3v zwn4uy;&gXwV0q*0_Fy_lRqgEd*GbV(OqqJ|_F$R-Ti~#;Q@MJ{lwaVI3)4Z1X35#* zsj|E_kK^Ibq^B=G1_-a8uNB_C3yrPM8oQkld7V??F{AQ4!0~J9;K~H9YbDSwAuFr> zP8~b~GULg^!e$cQ%CrYUP%ggq#zeNd(C#FOF6EfyHzap$bt3AI9j*l}>+UpU+yA2wgw?j1sg64gUcfSPD4JzDk%yvEpo{44* zpPrsp<)3!fl_zopIN6*FQyd$bM*gzqv9uJ^Bl0f!X1pY>)vFwl#_QImIDfqp*!~^vtE2FR*R79>~HomK}yu${du3) ziSqV{4%&%_s<)z7`O}z_HThX6X~VldzoE8Pr}(|&e9Ls-!b#o~DKpDJSQmkxxIZKO zv~8`OrV}zIh4GW`>M8|$-`PUBSIQp_PbvNJ18$Wp(Z=;wUPng?Ab52Z#{PPxFk(1W zFlapA`@)KW!HUvJe4xN#p*@p8+;8iX%AiqR?ngG|Og-<5`_p7K5ejkciyu0xgHcv_ zh*($7H!ZGL>q}qPyTixt{273b4`)yf=B+FSOfQ-W*}S>7jWh@xZQF8FcT10)fq~K; zRBqcmORV(%I?Z5PzpIM|7h0Gl;9`Du;PmKeTrW+dJm)q!Fn|OY*R_|mY37Z6CKF`$ z?818xIoyFrfeF!8jS%>fm9?-qjz=gyj+;BNr<QrMV#^;tQ}qgA1)(`3wWSQoS@E5xToj9&}ORc*)TY zI+y{qp6ft(ex?LbW=}TvAc*(%_4Qj@{3*nk5r9p?O<7s#w5i(F?R^Roq0`kIqvH4B zHfPn;+R>O{9hjIk*G4j?CW?c<_>;=5DQE9!3;&2BpqW-;0~NPd{BqG`Gh&Nlaace^ z8rQ{YPJGM3+|W?8e%S`-*=;>5rZYZ)aImyZESlB3Q%%y`HsZHfvpvq`47}mWF3((=7 zMJ1)*4i^->JbEonlhZNRW1l9^T4Tv=26_<}m%!oiFb{)Ni&E4zQMUT&rWS?M{Ut4J z_+qoy&RQN)B6h;QPTve<9{{W!j%+8oySo*roCUqahQ5XvmOHf<9M;^<@jGt5*U(Um zPP6D=GUm)LpbUC-6SF(RfgRgt)Z%{JqmYvWXTh}>igYt*1abv^Ev**>1eid?=LmLY zW^3yo_0rP^SBYiWX(bPbI%u1yK4#6x*8n1x;p2T8Xxsm>-w(_dV4eyI5eXeT%^F&P z1*DMB!FX8k3gosJcaPTO4wzSvjdB)P^g9W>oZF#nuQ%<#v~i0@eBB#1QCaEYwnLXn zi%u%kH5NyRhqr&o${GGsIYL1^V7(|S2%G{IIx;gyP(0Mn$LO~Ub8Yut2P|YDzJFpQ zU9@1z`Xf3qJ6_%P&W>}V$lqP@4)af&*Ti;h)Yn8XX~H30gC9&hu>%7Tykr|`&iwQA zX>+ccKtSl+ak*t}RpP;TYrd!7t@m@x)zw~7s_$n>tK@}*>(C0DiNdlz0$$rN4BeA2 z!}|j}zeYw%%gWNTDt_U1nN~f$@^`v7(4yI@lq^CKXQT z+ekB8Kl5(X=QI7Ojpa3BC7vun#4V)Bo@ChgH6?&eAtzoXMTC}>wdKO4vNJTC8Si|t zr6erOM}RD*-@N$!cOC8>aM^3G^1w7TF&?|KvxA6;7*u!G&dy#20DNgLxb4j?EPS?m zs%2yl;AvbAlOtgvb`D1q>5Bt1u2tS5MIKr9u3je}J2Y;#hBNWXvPqSykJ-fh^jU-6 z(^2xy6dcqW&wgQ%Bb74K4m7zeTM7AO;Mvr5EV{6uHAtYXc%2hk?5u50$_dGe`%=Lu z7I+=C=B>XHSO7tJMCXedvaDRI>$<)?YG>$G_z(T#zaQcMC5|k;NS5dopqferryo=o`;GtLG0$PVq#=-{T#_hr7tzs2+Mdv zC4ML2O$JD@Aj`kwv@dB>jT$p<^3OLJTy#9ay=PuP8Ti@sC#}S+P`Hw;Ur`RX&aZ&H z6Q)>|<;9+3x{c0x3RcF_(M)rBAm?;+b?KD~k8g$=(*buHY3dH|b%_yYEB$9?C&B7tlsyza5WBvJl>QMHW`5}#z|gEgi|g*gi3)<}cg62(JfNd5 zD|AbHNpy%JU_qQbv@0RNm_7#iH*8i)lL3c(QU4y+=Hg&Zf07lT!$om`%^L_^$Z811 z9o_0=gg2lVH0J_A`BzC3aS(fR6QL^_(}i~VfPk6k0)${boL!PRNO`8TH6K2}bEVjG z5=qnGP{i(>c_W5O7EO?gJ8o4-eSn~1(c?z`<%EL~jy5*0QWRleVEzq{-zqB7L7(e} zxH6Pgl$Ae@(CkWtb)21@wY9Y!z4i|XU{Feh&ypF1phB|TOi7|ai3Gl01{*>E+1S|l z^Y}QJ1wNRXnf(_uHeTxra~H~x%>(IsmjjynY$blFCXxHR;Up62E7s`|mJE5iUg=n9 zWVs&AXSy2zKdsy6VoW}BfD0tR7{N^JNINI@zefk`XoQN?Yjb(xGHIYPP zQC-j0ejvB~u`a!b!EC>cOY*eprg;SK#pUzfw%Z$&C4*<8Pc0u+ru zAO8j2#)fJ6nk86di*9%4VTse0kPZ*i!^yD7e#FqVdbsB5Dy~$$RPbE=Ia{ni&^`s- za4sAos$Or6hy(Nn4h!UDo~m<-=+B)gQRfp;sLG0&csf%lG%X z6tUvGj?-N~%|$4%0M0b``@uK0y7&|n1!^S=&mTnFj~n7W3`?fGeZ3jzSV{OjbJ;SZ z9fIE(hjvyxO=!HlaU994?PxbL*6@%~a;Vk$rB{D$_A%Dg?TV2j$oo_k725wcc_yzG zdFAOO%4m_p0#w&(3PMbG}OHb^+Cz z?e^zt!&1#2r4*CA@X2?pc(=CK1?vm^;)c~{jeU|-*q+C;PCDq?E2?P$Y$f5aU}_KO zy=(-6>zD6W!CS*370gU!VF7}+#K)C#vGVfrtINw}Xcd_M{k!j<0YEmIi~;&4mTXDf z>e{Z>LuEoGOJ29ZeQw+f27uD{thBieq}yND{%C&L6OL7Iaik9 z2n2_8iJ*pY*_b{?CH*uVb3*bYTyCL;zJUQ05v#*})EM7iAaO;a0~J5sxw;Z0lWq!- zmDSc-1a{F3u#JtqvKlFvo)&HOWJ3cuy^f^@Ub;16{0HRI=lU1q3!k3H7C<0~`rw~A z5tUFPPv?Je6b2%=g8q|+MxXrr!ym<^b@i`Y!^nURP~W{u?>C15boElxQb|FIpVm`% z{^F+;MeOquzj!Sc-DP2E!zxc$r}3wsU4-2e2;$_?{>+*(UHH*X0V zU=4lCT7B_`uIg<8j~p%T8vt-4UUu^qX^ZNp&8C{cb-mh`0pEDCBF@Q41UNGtJVtrR z$YQ=CLburq!l?W9p7{dF&44PQZtiD`7NRm%Oa`_W^HnUHQGlVQ=L#5{G8T8GxsOJH zLUtf4#oWe(Cax|2ZZmhKZT^SAT022z$xN$UxpE>g+t?_54s(b^aze==q_* zOHWUaRh$}(*Q~&&oSaHAPO5J$k7~-tTLDacAY>@d5(mAzg7y4ob7O>UiF23Qe!iw( z(?+!ErJH+B)$K@<8M!1Ku3T)4gB?`AF*PO4t3ZjFm|KXCj&5*dWW^lJnWCO-4`0)o zcT!{d665i}06RTpG+GI?t^Hnc>g)Wh%E6Jf`lX_(>H!}=R@kYQLad5|ii%OlmI#n2 zP=9hcTs>XOgM|wLfavHeVnStbBN$LQpWR?>x{_D38wq%cDbWys37mmV|5Nwcski@N zMI3MKlvcT6i=O)7!9;mZ&hWwlKfNU#vPwoD3Sb7!TCGDGDk+AsoKmRjJnqVcfJy$^9-Z8k@a&ZEye#Y7n>jan9{W~tWa0Ng)57_zV4hP_5A{$u zI7f}-t*bPj;tP$%Mj9UC=w->e^j9Vie{wmVmZd$Ze8V{_959+=DkKejv_5u+zH-a3 zPpdNEgKvkjF%2b=TuD%e`Ivd+^}`1jLuZ~4^WuWT8(G#msJ<}XjuKhUiVN_mEXFWo zrF@nsc{R^;#$M%fu?^NLK9lkf?@xhRctTQA(8E#v^78VRv};21Pvi`lvz2xqgU@ma--KYMC{9mLZ?CPHTf zI+V)$MGpx_p5S(^I~>&d_jYa7)Z!HB;eb`=22y^~1h*%V?xYQTCd|U^sMJidoK0Y3zPgvrQ#wu>B@F^6FlaoAP zC8ey7gM$O);B`eZ6%a+p!NI|sd`B{sjFgm=#6(M#C}X%38TWVZpb}KOzK=Pt!$H-2 zeu8FJqOilYQAD7DRPUXPtnB;*nSi7v&eG^;hozU<0A`M=_ttNGKV5jzUwGOyan1ru z$sP1@@}9?wHvh8tS9L|qBlWvKzk4+4{y6uKLFR_PRyBBls_Qb{`H0WpTAVJ zw+Ge37f$Ty)jQVydvW{wlSeUpzD&NY6vebUzwnZki&*eJfi7TS(QEcB{l_{P0fD8U za1P72DO>NEtT`<67@4RZ(;QVIZ0ymVTd-&bUY>lY6kql_MJQAlYg~<0m=l&#*C)ayEXfb|_U<z0}WwtE2!wMMqb6J|S5uXS78J ziAM5AM@~+TfqrGXlaP zmwJqv^kp2cnqD8yxi;$4;Sxv7{>50?s*G6y*K_%ApsRcsh}hcF8_oh3>XMJoPE!3F zXQ0OXLspX!kByCOlmwXx8P6Mxd}ZaJqd8ly6p(Nh6%|eWYJa*vn5(v;uEDg$W^~rq zUyNsZ_ifT@!FI#BVRv^ohNiip;WLFOiPS+hA_QEtW|=-Q1%)F;`nt0~2CL{>dHLOY zSs(z%92Fh7U60}z^m8BGyNv05wdv|3D~q^UD2Zx{U|Bs!8T9xnYvkto+Okqxb7OOB zi`ru-B`nDus=LqAqw}?}$Tp!xC9@8sQONL8a$3={v>jJNW~(mrNQh%SX<$`lW-M&c z=4T8-8MDS_0UA<_=cYg$IZ*^T?q1ZYGfTn7DFER3{_O>jV48?ZK*jBMn80> z)(nSD!hfrPW?uQ`_`2prH=_ArIVsOqa%__T-h!*~H>EuB+h+UM81!tOV16At_(2(K zVzBxVl@}5*NVpmSuhKCkUMa>h+0>z)qr9qAU9CaH*Ii1fw3u7%+wVK^CXa8w77WL3 zPwOdJ*iK))?^xH^oK0~5y+PnVF#P`^fb&6uZqG{Q+!%D*G~dUoWWC~JnE8!fJl~XB zxsk_i(Q59Q-f7Ip)aZbuU=&kYRoADo%SNFH#sE5f-0A*{(6*=Rm6d?!0;T8>Av?dv z56fH=zXt|(ZeEKD-AI>=%?^`!|9w+MbjN>4&;t;#4wCNkqNry?r|xoT1@mzD_it-Q zoAtBX7iLB5I*-#)6J#Tx0A71#1O%7YA4iag-~m)|5nhLneEcWODT{alrr}L4g4R#pAd4*AW56wg9)A%p;Xdk#$#c z+OE7L4!_twzkU~MLiTh)cW=rGJIF5-rcKixyDA{_#|-=Z^p2G^Ou7PzQ@;~P|v@jeKv6HuwAZO z>N&)4GCef~s^6|IE(%JbmXXE$y92+EN0P;ohStq@%mdE05WT!cRw}g(MytMW$#8KgJIyuOJl^&X zB(~}4>VB>AA_R=p)UEoUxpRXHb}uLO@1w z?)^A8SZxX6=E#gD5t-~>dI40p^#u+*(O+TI&dpRA;( zuMydmGws?oxA)sPwvq)qnci6rrgBsB-@J8J{T3chm^OqH8D?U#mgrvR@a(JmXg^>U z{fVSbz{hcI=zv!4bL~BgBK-tdW&rOYoqk3FIBhotQWSkRcb^2SjcB35>3;USc0fn+ zSf!b)3IL!Jamq70fHI)z*?RqM4dzYG3FE&OfC4YI7*EWgOrdktDb%R%S@Lc5Vx%sa zBeEwK3`6!7pV#zISRfgFTj5=EqT3vZP=Bxk!1E1F>tlajz(Y{rZN=o`AztJUC!N{Z zSyclA9W5=l`@#O}wV@#;@-02Rhnjylj)J_q`{0~gF-WZb`2R7o^<+RN=6bwLfyPoM zg<+8_Lfs8v(s--Z3kShCczNRM>E=e8bRZyNi3do~;kMmfOCcf6k8gS{E{dqDdrnVk zw(}HkZ}$cR2MWhgpU`H?^l)$}uKN43CTuQ8_yX*Cbh6l)3D97GD5D^^$zLu<-Q>dT z_nWAKKybH9%?HjDRzSi|!3_(+k~0|q65h^1r42FCP!8AENyy2eK4m(kz@~Fs5 zQk$fuMTjLNi~;}-4nYpCHhiJ|=B@68rNdf4Z5sMb7D%if9}rTfrfh59 zCX8GAZ+E``4(_-b|EtqeTx9*>43%noveKCY51|7$g&`2*6GH4HJB^xs^jm$w66*cK z4Ffi^B7KR*tvv?-?VntHoWVRBvG_wq58Ye5hDLZviA^iscEu3bo! z3V#d*ScYiyNf-2ErlGL~a1|*%nI* zenRZS4VJ|#9X7ThkmD+U&e-4HUOU=IO7s*taQ)b6{H?rP)YUvFAmHem+6EXKC0UvV z;4B9`>}_oypZeH5Ja`3w^$jcVIBnEV^bmZ_Xdl;~4^|)y4aHy~;CWf>GgGZZud1ti zzEM{y)XGkmU(HsTj6OYPhNP#P>Mpbt36%q@SM{7OHtv(0KnGY0tg7bl@wxl8Rh7}) z2DWox{wveH08aI%!MqtBdstHH2Oi76vqbJKZt6gh0h-j}aD3vylEE%^*ZUSf^0V8C zl?;a->gT?VE!VjS9J8S#D^O{uFYh-ON$@??y?)8>HNs2jz{3j_S$f_^PE_YiW8h%h$KjBrr#dD`y$ zF%Ei`V4rz)a31EiGeKvnW@_p>-c_*e%FD`{TeaVRCXTgbFhW7W{Yiq(mWQO8lfyFL z_LrEJ)=hZqTMk1ZBtSbkxyAVDV4JlmL76bip8;0S$Y1jd_4xPB>{l@yhF!a;6PBMN z!HQ5uy_O`@z$I}M5)+kaEmOYC^&W-Twehhw>`arR^=cdG?46aQV_bk_Mh6ecQBqb= zy>DdCVQ*wj0Ky~+=s*Xq-UY$E;#Lc*h~@Jp>~7`%_vs8aN@!E|(|kRZ{poNOYb^g> z1Y-W@s|QwLHA*hxY$36kJE2L&$NtLWl4*qP#xF{;gP{-TvUzbQmI2E}9+10ZJj8Hs zhK2i|jGql<#tJm$U{YAJwOi-sJiBy)YF1GiB=g84>3*{44-&! zSVirJ8w)hi<*6_AP4*6zoT zi#2XMxaAiV;2?h}epl4xAg}TY=0%2#`NbRWM}_URwndIJZ?c(`L65o&kJGj3*tc}J zOMjaAQz}$KJMxY==O=dT@*LHSj8;ZRfqMKeA?%QM@7|513&Q;!e5iAoK|w=1*o-Ha z4w(XlaRV852M33M0MSKF3DhQdWfc{9QQ6iyR~mdqXhFX>|D0A+Vnc}XoFkG=7ixRIMDD|t%g z_XMCieSl;}h2cKc3AR{)4R+3YFigA5x!l~`Z|J^eWl=7Elt5~NzrMciz3=?1kb7`m zixI7*iw8Y*g`xfH=I>v?4z|e2P9cR4|DJq}VL3n1mkPw9gKUM>zb#fv&4i9DR0syJ zdDuTNWd*>-lD|)kKSIZ*u(n+rXSI0{b0xHJ*l;6%n!fsH&0@ZnyIDa`D1ZG^hKd5N z`3qvxC+kRQu!hitjUxzJ1{p4;cp%xoEaQL8%cBKn2Km|7$Ir(nM!37a?s|V|lgt`| z`5te~#l?k#qBS}g1EU$189tNWDYK}^>H(XBqkP8D4?`>}8x9{knL}K^Q7}#oJ{PcJS(tblj zLlhJg=LJ~u*7~&fe?|;VK=usLB}u4Z$;_|X zbIk-RY+8Hx35I0MdW)buLX<_-;->Q=Y&kk1XKPjll4FassT=0cvd|woIq~04>Hq8s z`V%@aFA|96)g!07R21Z>A5D(L4Ii<^ZcnL3yl2BVjRp2UAn<)d_Ej#&zP%~s@{l0*QCUzdYfZc$=N1_y7H+lMa%TssdmT}LHTG>>pc{$%l!4dOS3T1F9 zzcuZ9I$TCv5jzk(T;Lku{qXz;&yaidauq6k5Q$DdQJmz-X&+Lf1DRkhY+XZ5f~oL1 zJz_DH$feRV0DyrMM_}_S*=%842W>^a_3t;z3Q`lxS#e8H_h5g%AVGW zt5>Eb6RMK^88VYZ(cu8j4C^YPgbdsh_-_c)cF}CSwnY*Pgk!RKueL_zOkKJg)S`&d z`xx-+F`p_W^E5EXMaV(Gdc8cg`1-=~2S3qe-^7#O=FjZ}rvd;-)&1^S(DJ&-W!-e; z~O|I*cB>gb9x!?Zdb0OH^nY-)a| zAIJ@XobxMTV7KWx0MLGXnVA$Dx8{6{@AT()w%bs6XqNnnE!G79;E_l& z@s7@Y5p(;7pjo#^K5Oom|F~*W*7@!Eyjdp^J8x`N!a}=E{nyAo0(&1m01CZdBI3ih z_)T+nlBW;=>{CyFDjq(a<-*qu9q_oyCF2XU7Ag9_OwU)8m&e4gos>9fAm)ii%yzs) zmofSgy*RaRMdRQ-N(gKrd#t(24oMEG)|jk~QiEF!U*!z~JWM?G8_9BCuIbi7hC}Wl zum6PSR!5Q)7{Fhj5IXRXoTEPJu9S|o5g9|V|bQ6k*A^gy4=ZMU)^^LnjqJ|Ib0Z_NY?y#vytWW zU~%{5mp2(j(9ylxg06*R$tcRD*-*h+>Y!}MT7dKGg8i>V@?jW7?F{^8+FRXg-3Bw>NkQ{QNa{9c9B{bBoyXjvAG+IlN(52gO7HFu*4?u_ZJ;v zRU9|doho