-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3038fe1
commit e8daa07
Showing
1 changed file
with
70 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,70 @@ | ||
import torch | ||
|
||
from modules.log import logly | ||
from modules.pipeline import chatbot | ||
|
||
import gradio as gr | ||
|
||
if torch.cuda.is_available(): | ||
device = "cuda" | ||
logly.info("CUDA is available and using GPU instead.") | ||
else: | ||
device = "cpu" | ||
logly.info("CUDA is not available and using CPU instead.") | ||
|
||
with gr.Blocks(theme=gr.themes.Soft(), title="GemGPT") as app:#head="<link rel='icon' href='' sizes='32x32' />" | ||
gr.HTML("<link rel='icon' href='https://gemmamodels.com/wp-content/uploads/2021/10/cropped-gem-32x32.png' sizes='32x32' />") | ||
gr.HTML("<h1 style='text-align: center;'>GemGPT</h1>") | ||
gr.HTML("<h3 style='text-align: center;'>Talk to GemGPT, Powered by Gemma Models</h3>") | ||
with gr.Row(): | ||
gr.Markdown("Select a model to run. Gemma-2b-it is a smaller model that is faster and uses less memory. Gemma-7b-it is a larger model that is slower and uses more memory.") | ||
model_options = gr.Dropdown(label="Select a Model", choices=["google/gemma-2b-it", "google/gemma-7b-it"], | ||
value="google/gemma-2b-it") | ||
with gr.Row(): | ||
gr.Markdown("Select the device to run the model on. If you are running this on a CPU, select CPU. If you are running this on a GPU, select CUDA.") | ||
device = gr.Dropdown(label="Device", choices=["cuda", "cpu"], value=device) | ||
with gr.Row(): | ||
gr.Markdown("Output Generated by Selected Model:") | ||
with gr.Row(): | ||
outputs = gr.Textbox(lines=15, label="Output", value="") | ||
with gr.Row(): | ||
gr.Markdown("Input your Prompt and click Generate to get a response.") | ||
with gr.Row(): | ||
inputs = gr.Textbox(lines=2, label="Prompt", placeholder="Type here") | ||
with gr.Row(): | ||
generate = gr.Button("Generate") | ||
with gr.Row(): | ||
advanced_checkbox = gr.Checkbox(label="Show Advanced Options", container=False, elem_classes='min_check', | ||
value=False) | ||
|
||
with gr.Column(scale=1,visible=False) as advanced_column: | ||
with gr.Row(): | ||
gr.Markdown("<h4>Adjust the parameters to control the model's output.</h4>") | ||
with gr.Row(): | ||
gr.Markdown("Max New Tokens is the maximum number of tokens that the model will generate.") | ||
with gr.Row(): | ||
tokens = gr.Slider(minimum=50, maximum=2000, label="Max New Tokens", value=1250) | ||
with gr.Row(): | ||
gr.Markdown("Temperature is a parameter that controls the randomness of the model's output. A higher temperature will produce more random output.") | ||
with gr.Row(): | ||
temp = gr.Slider(minimum=0.0, maximum=1.0, label="Temperature", value=0.7) | ||
with gr.Row(): | ||
gr.Markdown("Top K is a parameter that controls the diversity of the model's output. A higher value will produce more diverse output.") | ||
with gr.Row(): | ||
top_k = gr.Slider(minimum=1, maximum=100, label="Top K", value=50) | ||
with gr.Row(): | ||
gr.Markdown("Top P is an alternative to Top K that selects the smallest set of tokens whose cumulative probability exceeds the threshold P.") | ||
with gr.Row(): | ||
top_p = gr.Slider(minimum=0.0, maximum=1.0, label="Top P", value=0.95) | ||
with gr.Row(): | ||
gr.Markdown("Quantization is a technique to reduce the size of the model and speed up inference. 4-bit quantization is faster but less accurate than 8-bit.") | ||
with gr.Row(): | ||
quantization = gr.Dropdown(label="Quantization", choices=["8-bit", "4-bit"], value="4-bit") | ||
generate.click(fn=chatbot, inputs=[inputs, tokens, temp, top_k, top_p, model_options,quantization,device], outputs=outputs) | ||
|
||
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column, | ||
queue=False, show_progress=False) | ||
import torch | ||
|
||
from modules.log import logly | ||
from modules.pipeline import chatbot | ||
|
||
import gradio as gr | ||
|
||
if torch.cuda.is_available(): | ||
device = "cuda" | ||
logly.info("CUDA is available and using GPU instead.") | ||
elif torch.xpu.is_available(): | ||
device = "xpu" | ||
logly.info("XPU is available and using XPU instead.") | ||
logly.warn("this feature is not yet supported. Please use a GPU or CPU instead!.") | ||
else: | ||
device = "cpu" | ||
logly.info("CUDA is not available and using CPU instead.") | ||
|
||
with gr.Blocks(theme=gr.themes.Soft(), title="GemGPT") as app:#head="<link rel='icon' href='' sizes='32x32' />" | ||
gr.HTML("<link rel='icon' href='https://gemmamodels.com/wp-content/uploads/2021/10/cropped-gem-32x32.png' sizes='32x32' />") | ||
gr.HTML("<h1 style='text-align: center;'>GemGPT</h1>") | ||
gr.HTML("<h3 style='text-align: center;'>Talk to GemGPT, Powered by Gemma Models</h3>") | ||
with gr.Row(): | ||
gr.Markdown("Select a model to run. Gemma-2b-it is a smaller model that is faster and uses less memory. Gemma-7b-it is a larger model that is slower and uses more memory.") | ||
model_options = gr.Dropdown(label="Select a Model", choices=["google/gemma-2b-it", "google/gemma-7b-it"], | ||
value="google/gemma-2b-it") | ||
with gr.Row(): | ||
gr.Markdown("Select the device to run the model on. If you are running this on a CPU, select CPU. If you are running this on a GPU, select CUDA.") | ||
device = gr.Dropdown(label="Device", choices=["cuda", "cpu"], value=device) | ||
with gr.Row(): | ||
gr.Markdown("Output Generated by Selected Model:") | ||
with gr.Row(): | ||
outputs = gr.Textbox(lines=15, label="Output", value="") | ||
with gr.Row(): | ||
gr.Markdown("Input your Prompt and click Generate to get a response.") | ||
with gr.Row(): | ||
inputs = gr.Textbox(lines=2, label="Prompt", placeholder="Type here") | ||
with gr.Row(): | ||
generate = gr.Button("Generate") | ||
with gr.Row(): | ||
advanced_checkbox = gr.Checkbox(label="Show Advanced Options", container=False, elem_classes='min_check', | ||
value=False) | ||
|
||
with gr.Column(scale=1,visible=False) as advanced_column: | ||
with gr.Row(): | ||
gr.Markdown("<h4>Adjust the parameters to control the model's output.</h4>") | ||
with gr.Row(): | ||
gr.Markdown("Max New Tokens is the maximum number of tokens that the model will generate.") | ||
with gr.Row(): | ||
tokens = gr.Slider(minimum=50, maximum=2000, label="Max New Tokens", value=1250) | ||
with gr.Row(): | ||
gr.Markdown("Temperature is a parameter that controls the randomness of the model's output. A higher temperature will produce more random output.") | ||
with gr.Row(): | ||
temp = gr.Slider(minimum=0.0, maximum=1.0, label="Temperature", value=0.7) | ||
with gr.Row(): | ||
gr.Markdown("Top K is a parameter that controls the diversity of the model's output. A higher value will produce more diverse output.") | ||
with gr.Row(): | ||
top_k = gr.Slider(minimum=1, maximum=100, label="Top K", value=50) | ||
with gr.Row(): | ||
gr.Markdown("Top P is an alternative to Top K that selects the smallest set of tokens whose cumulative probability exceeds the threshold P.") | ||
with gr.Row(): | ||
top_p = gr.Slider(minimum=0.0, maximum=1.0, label="Top P", value=0.95) | ||
with gr.Row(): | ||
gr.Markdown("Quantization is a technique to reduce the size of the model and speed up inference. 4-bit quantization is faster but less accurate than 8-bit.") | ||
with gr.Row(): | ||
quantization = gr.Dropdown(label="Quantization", choices=["8-bit", "4-bit"], value="4-bit") | ||
generate.click(fn=chatbot, inputs=[inputs, tokens, temp, top_k, top_p, model_options,quantization,device], outputs=outputs) | ||
|
||
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column, | ||
queue=False, show_progress=False) |