-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathChat2VIS.py
162 lines (144 loc) · 8.18 KB
/
Chat2VIS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#################################################################################
# Chat2VIS
# https://chat2vis.streamlit.app/
# Paula Maddigan
#################################################################################
import pandas as pd
import openai
import streamlit as st
#import streamlit_nested_layout
from classes import get_primer,format_question,run_request
import warnings
warnings.filterwarnings("ignore")
st.set_option('deprecation.showPyplotGlobalUse', False)
st.set_page_config(page_icon="chat2vis.png",layout="wide",page_title="Chat2VIS")
st.markdown("<h1 style='text-align: center; font-weight:bold; font-family:comic sans ms; padding-top: 0rem;'> \
Chat2VIS</h1>", unsafe_allow_html=True)
st.markdown("<h2 style='text-align: center;padding-top: 0rem;'>Creating Visualisations using Natural Language \
with ChatGPT and Code Llama</h2>", unsafe_allow_html=True)
st.sidebar.markdown('</a> Developed by Paula Maddigan <a style="text-align: center;padding-top: 0rem;" href="mailto: i.build.apps.4.u@gmail.com">:email:', unsafe_allow_html=True)
available_models = {"ChatGPT-4": "gpt-4","ChatGPT-3.5": "gpt-3.5-turbo","GPT-3": "text-davinci-003",
"GPT-3.5 Instruct": "gpt-3.5-turbo-instruct","Code Llama":"CodeLlama-34b-Instruct-hf"}
# List to hold datasets
if "datasets" not in st.session_state:
datasets = {}
# Preload datasets
datasets["Movies"] = pd.read_csv("movies.csv")
datasets["Housing"] =pd.read_csv("housing.csv")
datasets["Cars"] =pd.read_csv("cars.csv")
datasets["Colleges"] =pd.read_csv("colleges.csv")
datasets["Customers & Products"] =pd.read_csv("customers_and_products_contacts.csv")
datasets["Department Store"] =pd.read_csv("department_store.csv")
datasets["Energy Production"] =pd.read_csv("energy_production.csv")
st.session_state["datasets"] = datasets
else:
# use the list already loaded
datasets = st.session_state["datasets"]
key_col1,key_col2 = st.columns(2)
openai_key = key_col1.text_input(label = ":key: OpenAI Key:", help="Required for ChatGPT-4, ChatGPT-3.5, GPT-3, GPT-3.5 Instruct.",type="password")
hf_key = key_col2.text_input(label = ":hugging_face: HuggingFace Key:",help="Required for Code Llama", type="password")
with st.sidebar:
# First we want to choose the dataset, but we will fill it with choices once we've loaded one
dataset_container = st.empty()
# Add facility to upload a dataset
try:
uploaded_file = st.file_uploader(":computer: Load a CSV file:", type="csv")
index_no=0
if uploaded_file:
# Read in the data, add it to the list of available datasets. Give it a nice name.
file_name = uploaded_file.name[:-4].capitalize()
datasets[file_name] = pd.read_csv(uploaded_file)
# We want to default the radio button to the newly added dataset
index_no = len(datasets)-1
except Exception as e:
st.error("File failed to load. Please select a valid CSV file.")
print("File failed to load.\n" + str(e))
# Radio buttons for dataset choice
chosen_dataset = dataset_container.radio(":bar_chart: Choose your data:",datasets.keys(),index=index_no)#,horizontal=True,)
# Check boxes for model choice
st.write(":brain: Choose your model(s):")
# Keep a dictionary of whether models are selected or not
use_model = {}
for model_desc,model_name in available_models.items():
label = f"{model_desc} ({model_name})"
key = f"key_{model_desc}"
use_model[model_desc] = st.checkbox(label,value=True,key=key)
st.info("Note: Upgrade of Code Llama model is causing failures in plot generation. Fix under investigation...")
# Text area for query
question = st.text_area(":eyes: What would you like to visualise?",height=10)
go_btn = st.button("Go...")
# Make a list of the models which have been selected
selected_models = [model_name for model_name, choose_model in use_model.items() if choose_model]
model_count = len(selected_models)
# Execute chatbot query
if go_btn and model_count > 0:
api_keys_entered = True
# Check API keys are entered.
if "ChatGPT-4" in selected_models or "ChatGPT-3.5" in selected_models or "GPT-3" in selected_models or "GPT-3.5 Instruct" in selected_models:
if not openai_key.startswith('sk-'):
st.error("Please enter a valid OpenAI API key.")
api_keys_entered = False
if "Code Llama" in selected_models:
if not hf_key.startswith('hf_'):
st.error("Please enter a valid HuggingFace API key.")
api_keys_entered = False
if api_keys_entered:
# Place for plots depending on how many models
plots = st.columns(model_count)
# Get the primer for this dataset
primer1,primer2 = get_primer(datasets[chosen_dataset],'datasets["'+ chosen_dataset + '"]')
# Create model, run the request and print the results
for plot_num, model_type in enumerate(selected_models):
with plots[plot_num]:
st.subheader(model_type)
try:
# Format the question
question_to_ask = format_question(primer1, primer2, question, model_type)
# Run the question
answer=""
answer = run_request(question_to_ask, available_models[model_type], key=openai_key,alt_key=hf_key)
# the answer is the completed Python script so add to the beginning of the script to it.
answer = primer2 + answer
print("Model: " + model_type)
print(answer)
plot_area = st.empty()
plot_area.pyplot(exec(answer))
except Exception as e:
if type(e) == openai.error.APIError:
st.error("OpenAI API Error. Please try again a short time later. (" + str(e) + ")")
elif type(e) == openai.error.Timeout:
st.error("OpenAI API Error. Your request timed out. Please try again a short time later. (" + str(e) + ")")
elif type(e) == openai.error.RateLimitError:
st.error("OpenAI API Error. You have exceeded your assigned rate limit. (" + str(e) + ")")
elif type(e) == openai.error.APIConnectionError:
st.error("OpenAI API Error. Error connecting to services. Please check your network/proxy/firewall settings. (" + str(e) + ")")
elif type(e) == openai.error.InvalidRequestError:
st.error("OpenAI API Error. Your request was malformed or missing required parameters. (" + str(e) + ")")
elif type(e) == openai.error.AuthenticationError:
st.error("Please enter a valid OpenAI API Key. (" + str(e) + ")")
elif type(e) == openai.error.ServiceUnavailableError:
st.error("OpenAI Service is currently unavailable. Please try again a short time later. (" + str(e) + ")")
else:
st.error("Unfortunately the code generated from the model contained errors and was unable to execute.")
# Display the datasets in a list of tabs
# Create the tabs
tab_list = st.tabs(datasets.keys())
# Load up each tab with a dataset
for dataset_num, tab in enumerate(tab_list):
with tab:
# Can't get the name of the tab! Can't index key list. So convert to list and index
dataset_name = list(datasets.keys())[dataset_num]
st.subheader(dataset_name)
st.dataframe(datasets[dataset_name],hide_index=True)
# Insert footer to reference dataset origin
footer="""<style>.footer {position: fixed;left: 0;bottom: 0;width: 100%;text-align: center;}</style><div class="footer">
<p> <a style='display: block; text-align: center;'> Datasets courtesy of NL4DV, nvBench and ADVISor </a></p></div>"""
st.caption("Datasets courtesy of NL4DV, nvBench and ADVISor")
# Hide menu and footer
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)