VertexAI Google Cloud Palm2 Support (#226)

StanGirard · web-flow · commit 72c92b1a5461 · 2023-06-01T16:01:27.000+02:00
* feat(bard): added

* docs(readme): update

* chore(print): removed
diff --git a/.backend_env.example b/.backend_env.example
@@ -3,4 +3,6 @@ SUPABASE_SERVICE_KEY=eyXXXXX
 OPENAI_API_KEY=sk-XXXXXX
 ANTHROPIC_API_KEY=XXXXXX
 JWT_SECRET_KEY=Found in Supabase settings in the API tab
-AUTHENTICATE="true"
+AUTHENTICATE=true
+GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json
+GOOGLE_CLOUD_PROJECT=XXXXX to  be changed with your GCP id
diff --git a/.gitignore b/.gitignore
@@ -50,3 +50,4 @@ streamlit-demo/.streamlit/secrets.toml
 .frontend_env
 backend/pandoc-*
 **/.pandoc-*
+backend/application_default_credentials.json
diff --git a/README.md b/README.md
@@ -88,8 +88,15 @@ cp .frontend_env.example frontend/.env
 - **Step 3**: Update the `backend/.env` and `frontend/.env` file 
 
 > _Your `supabase_service_key` can be found in your Supabase dashboard under Project Settings -> API. Use the `anon` `public` key found in the `Project API keys` section._
+
+
 > _Your  `JWT_SECRET_KEY`can be found in your supabase settings under Project Settings -> JWT Settings -> JWT Secret_
 
+> _To activate vertexAI with PaLM from GCP follow the instructions [here](https://python.langchain.com/en/latest/modules/models/llms/integrations/google_vertex_ai_palm.html) and update `bacend/.env`- It is an advanced feature, please be expert in GCP before trying to use it_
+
+- [ ] Change variables in `backend/.env`
+- [ ] Change variables in `frontend/.env`
+
 - **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`)
 
 [Migration Script 1](scripts/supabase_new_store_documents.sql)
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -1,4 +1,7 @@
-FROM python:3.11
+FROM python:3.11-buster
+
+# Install GEOS library
+RUN apt-get update && apt-get install -y libgeos-dev
 
 WORKDIR /code
 
@@ -8,4 +11,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
 
 COPY . /code/
 
-CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
+CMD ["uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
diff --git a/backend/api.py b/backend/api.py
@@ -1,12 +1,14 @@
 import os
 import shutil
+import time
 from tempfile import SpooledTemporaryFile
 from typing import Annotated, List, Tuple
 
 import pypandoc
 from auth_bearer import JWTBearer
 from crawl.crawler import CrawlWebsite
-from fastapi import Depends, FastAPI, File, Header, HTTPException, UploadFile
+from fastapi import (Depends, FastAPI, File, Header, HTTPException, Request,
+                     UploadFile)
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from llm.qa import get_qa_llm
@@ -25,13 +27,11 @@
 from parsers.powerpoint import process_powerpoint
 from parsers.txt import process_txt
 from pydantic import BaseModel
-from utils import ChatMessage, CommonsDep, similarity_search
-
 from supabase import Client
+from utils import ChatMessage, CommonsDep, similarity_search
 
 logger = get_logger(__name__)
 
-
 app = FastAPI()
 
 origins = [
@@ -49,6 +49,7 @@
 )
 
 
+
 @app.on_event("startup")
 async def startup_event():
     pypandoc.download_pandoc()
diff --git a/backend/auth_handler.py b/backend/auth_handler.py
@@ -23,7 +23,6 @@ def decode_access_token(token: str):
         payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False})
         return payload
     except JWTError as e:
-        print(f"JWTError: {str(e)}")
         return None
 
 def get_user_email_from_token(token: str):
diff --git a/backend/llm/qa.py b/backend/llm/qa.py
@@ -2,10 +2,11 @@
 from typing import Any, List
 
 from langchain.chains import ConversationalRetrievalChain
-from langchain.chat_models import ChatOpenAI
+from langchain.chat_models import ChatOpenAI, ChatVertexAI
 from langchain.chat_models.anthropic import ChatAnthropic
 from langchain.docstore.document import Document
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import VertexAI
 from langchain.memory import ConversationBufferMemory
 from langchain.vectorstores import SupabaseVectorStore
 from llm import LANGUAGE_PROMPT
@@ -94,6 +95,9 @@ def get_qa_llm(chat_message: ChatMessage, user_id: str):
                 temperature=chat_message.temperature, max_tokens=chat_message.max_tokens), 
                 vector_store.as_retriever(), memory=memory, verbose=True, 
                 max_tokens_limit=1024)
+    elif chat_message.model.startswith("vertex"):
+        qa = ConversationalRetrievalChain.from_llm(
+            ChatVertexAI(), vector_store.as_retriever(), memory=memory, verbose=False, max_tokens_limit=1024)
     elif anthropic_api_key and chat_message.model.startswith("claude"):
         qa = ConversationalRetrievalChain.from_llm(
             ChatAnthropic(
diff --git a/backend/parsers/audio.py b/backend/parsers/audio.py
@@ -1,15 +1,16 @@
 import os
-from tempfile import NamedTemporaryFile
 import tempfile
-from io import BytesIO
 import time
+from io import BytesIO
+from tempfile import NamedTemporaryFile
+
 import openai
+from fastapi import UploadFile
 from langchain.document_loaders import TextLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.schema import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from utils import compute_sha1_from_content, documents_vector_store
-from langchain.schema import Document
-from fastapi import UploadFile
 
 # # Create a function to transcribe audio using Whisper
 # def _transcribe_audio(api_key, audio_file, stats_db):
@@ -52,7 +53,6 @@ async def process_audio(upload_file: UploadFile, stats_db):
 
     file_sha = compute_sha1_from_content(transcript.text.encode("utf-8"))
     file_size = len(transcript.text.encode("utf-8"))
-    print(file_size)
 
     # Load chunk size and overlap from sidebar
     chunk_size = 500
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.0.166
+langchain==0.0.187
 Markdown==3.4.3
 openai==0.27.6
 pdf2image==1.16.3
@@ -15,4 +15,5 @@ uvicorn==0.22.0
 pypandoc==1.11
 docx2txt==0.8
 guidance==0.0.53
-python-jose==3.3.0
+python-jose==3.3.0
+google_cloud_aiplatform==1.25.0
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -25,5 +25,6 @@ services:
     restart: always
     volumes:
       - ./backend/:/code/
+      - ~/.config/gcloud:/root/.config/gcloud
     ports:
       - 5050:5050
diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx
@@ -159,6 +159,7 @@ export default function ChatPage() {
                     >
                       <option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
                       <option value="gpt-4">gpt-4</option>
+                      <option value="vertexai">vertexai</option>
                     </select>
                   </fieldset>
                   <fieldset className="w-full flex">

Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,7 @@ export default function ChatPage() {`
`159`	`159`	`>`
`160`	`160`	`<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>`
`161`	`161`	`<option value="gpt-4">gpt-4</option>`
	`162`	`+ <option value="vertexai">vertexai</option>`
`162`	`163`	`</select>`
`163`	`164`	`</fieldset>`
`164`	`165`	`<fieldset className="w-full flex">`