From 771b08c8826c198ee38fad37c5b7d37d4537773a Mon Sep 17 00:00:00 2001 From: GTimothee Date: Tue, 15 Apr 2025 07:42:30 +0200 Subject: [PATCH 1/4] add audio input --- src/smolagents/gradio_ui.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index c67361edb..bbefd8e32 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -271,10 +271,17 @@ def log_user_message(self, text_input, file_uploads_log): gr.Button(interactive=False), ) - def launch(self, share: bool = True, **kwargs): - self.create_app().launch(debug=True, share=share, **kwargs) + def launch(self, share: bool = True, speech2text_func = None, **kwargs): + self.create_app(speech2text_func).launch(debug=True, share=share, **kwargs) + + def create_app(self, speech2text_func = None): + + def handle_input(text_input, audio_input): + if audio_input: + return speech2text_func(audio_input) + return text_input + - def create_app(self): import gradio as gr with gr.Blocks(theme="ocean", fill_height=True) as demo: @@ -298,6 +305,10 @@ def create_app(self): container=False, placeholder="Enter your prompt here and press Shift+Enter or press the button", ) + if speech2text_func: + audio_input = gr.Audio(source="microphone", type="filepath", label="Voice Input") + else: + audio_input = None submit_btn = gr.Button("Submit", variant="primary") # If an upload folder is provided, enable the upload feature @@ -346,10 +357,18 @@ def create_app(self): ) submit_btn.click( + handle_input, + inputs=[text_input, audio_input], + outputs=[text_input], # Update the text input with transcribed text if audio is provided + ).then( self.log_user_message, [text_input, file_uploads_log], [stored_messages, text_input, submit_btn], - ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then( + ).then( + self.interact_with_agent, + [stored_messages, chatbot, session_state], + [chatbot], + ).then( lambda: ( gr.Textbox( interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button" From 27ab909d682048b61c607ea786b4e0535b267968 Mon Sep 17 00:00:00 2001 From: GTimothee Date: Tue, 15 Apr 2025 07:51:55 +0200 Subject: [PATCH 2/4] add audio input --- src/smolagents/gradio_ui.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index bbefd8e32..52aa7640e 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -306,7 +306,15 @@ def handle_input(text_input, audio_input): placeholder="Enter your prompt here and press Shift+Enter or press the button", ) if speech2text_func: - audio_input = gr.Audio(source="microphone", type="filepath", label="Voice Input") + audio_input = gr.Audio( + sources=["microphone"], + label="Voice Input", + waveform_options=gr.WaveformOptions( + waveform_color="#01C6FF", + waveform_progress_color="#0066B4", + skip_length=2, + show_controls=False, + ),) else: audio_input = None submit_btn = gr.Button("Submit", variant="primary") From 3a756379f7bb39ccc5c868e15105f1c0e288fd50 Mon Sep 17 00:00:00 2001 From: GTimothee Date: Tue, 15 Apr 2025 07:57:16 +0200 Subject: [PATCH 3/4] add audio input file --- src/smolagents/gradio_ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index 52aa7640e..f36280497 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -307,7 +307,7 @@ def handle_input(text_input, audio_input): ) if speech2text_func: audio_input = gr.Audio( - sources=["microphone"], + sources=["upload", "microphone"], label="Voice Input", waveform_options=gr.WaveformOptions( waveform_color="#01C6FF", From 5af2e796582f0f18b9ad38ef68e6cf685a32d562 Mon Sep 17 00:00:00 2001 From: GTimothee Date: Tue, 15 Apr 2025 18:49:30 +0200 Subject: [PATCH 4/4] fix --- src/smolagents/gradio_ui.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index f36280497..4124c2ae4 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -368,6 +368,10 @@ def handle_input(text_input, audio_input): handle_input, inputs=[text_input, audio_input], outputs=[text_input], # Update the text input with transcribed text if audio is provided + ).then( + lambda: None, + inputs=None, + outputs=[audio_input], # Clear audio_input ).then( self.log_user_message, [text_input, file_uploads_log],