Feat: Introduce LLM integration (#799)

izeigerman · web-flow · commit b60df35faedd · 2023-05-02T10:03:10.000-07:00
diff --git a/setup.cfg b/setup.cfg
@@ -66,6 +66,9 @@ ignore_missing_imports = True
 [mypy-psycopg2.*]
 ignore_missing_imports = True
 
+[mypy-langchain.*]
+ignore_missing_imports = True
+
 [autoflake]
 in-place = True
 expand-star-imports = True
diff --git a/setup.py b/setup.py
@@ -87,6 +87,10 @@
         "dbt": [
             "dbt-core<1.5.0",
         ],
+        "llm": [
+            "langchain",
+            "openai",
+        ],
         "postgres": [
             "psycopg2",
         ],
diff --git a/sqlmesh/cli/main.py b/sqlmesh/cli/main.py
@@ -378,5 +378,44 @@ def migrate(ctx: click.Context) -> None:
     ctx.obj.migrate()
 
 
+@cli.command("prompt")
+@click.argument("prompt")
+@click.option(
+    "-e",
+    "--evaluate",
+    is_flag=True,
+    help="Evaluate the generated SQL query and display the results.",
+)
+@click.option(
+    "-t",
+    "--temperature",
+    type=float,
+    help="Sampling temperature. 0.0 - precise and predictable, 0.5 - balanced, 1.0 - creative. Default: 0.7",
+    default=0.7,
+)
+@opt.verbose
+@click.pass_context
+@error_handler
+def prompt(
+    ctx: click.Context, prompt: str, evaluate: bool, temperature: float, verbose: bool
+) -> None:
+    """Uses LLM to generate a SQL query from a prompt."""
+    from sqlmesh.integrations.llm import LLMIntegration
+
+    context = ctx.obj
+
+    llm_integration = LLMIntegration(
+        context.models.values(),
+        context.engine_adapter.dialect,
+        temperature=temperature,
+        verbose=verbose,
+    )
+    query = llm_integration.query(prompt)
+
+    context.console.log_status_update(query)
+    if evaluate:
+        context.console.log_success(context.fetchdf(query))
+
+
 if __name__ == "__main__":
     cli()
diff --git a/sqlmesh/integrations/llm.py b/sqlmesh/integrations/llm.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import typing as t
+
+from langchain import LLMChain, PromptTemplate
+from langchain.chat_models import ChatOpenAI
+
+from sqlmesh.core.model import Model
+
+_QUERY_PROMPT_TEMPLATE = """Given an input request, create a syntactically correct {dialect} SQL query.
+Use full table names.
+Convert string operands to lowercase in the WHERE clause.
+Reply with a SQL query and nothing else.
+
+Use the following tables and columns:
+
+{table_info}
+
+Request: {input}"""
+
+
+class LLMIntegration:
+    def __init__(
+        self,
+        models: t.Iterable[Model],
+        dialect: str,
+        temperature: float = 0.7,
+        verbose: bool = False,
+    ):
+        query_prompt_template = PromptTemplate.from_template(_QUERY_PROMPT_TEMPLATE).partial(
+            dialect=dialect, table_info=_to_table_info(models)
+        )
+        llm = ChatOpenAI(temperature=temperature)  # type: ignore
+        self._query_chain = LLMChain(llm=llm, prompt=query_prompt_template, verbose=verbose)
+
+    def query(self, prompt: str) -> str:
+        result = self._query_chain.predict(input=prompt).strip()
+        select_pos = result.find("SELECT")
+        if select_pos >= 0:
+            return result[select_pos:]
+        return result
+
+
+def _to_table_info(models: t.Iterable[Model]) -> str:
+    infos = []
+    for model in models:
+        if not model.kind.is_materialized:
+            continue
+
+        columns_csv = ", ".join(model.columns_to_types)
+        infos.append(f"Table: {model.name}\nColumns: {columns_csv}\n")
+
+    return "\n".join(infos)