diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index c67361edb..4124c2ae4 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -271,10 +271,17 @@ def log_user_message(self, text_input, file_uploads_log): gr.Button(interactive=False), ) - def launch(self, share: bool = True, **kwargs): - self.create_app().launch(debug=True, share=share, **kwargs) + def launch(self, share: bool = True, speech2text_func = None, **kwargs): + self.create_app(speech2text_func).launch(debug=True, share=share, **kwargs) + + def create_app(self, speech2text_func = None): + + def handle_input(text_input, audio_input): + if audio_input: + return speech2text_func(audio_input) + return text_input + - def create_app(self): import gradio as gr with gr.Blocks(theme="ocean", fill_height=True) as demo: @@ -298,6 +305,18 @@ def create_app(self): container=False, placeholder="Enter your prompt here and press Shift+Enter or press the button", ) + if speech2text_func: + audio_input = gr.Audio( + sources=["upload", "microphone"], + label="Voice Input", + waveform_options=gr.WaveformOptions( + waveform_color="#01C6FF", + waveform_progress_color="#0066B4", + skip_length=2, + show_controls=False, + ),) + else: + audio_input = None submit_btn = gr.Button("Submit", variant="primary") # If an upload folder is provided, enable the upload feature @@ -346,10 +365,22 @@ def create_app(self): ) submit_btn.click( + handle_input, + inputs=[text_input, audio_input], + outputs=[text_input], # Update the text input with transcribed text if audio is provided + ).then( + lambda: None, + inputs=None, + outputs=[audio_input], # Clear audio_input + ).then( self.log_user_message, [text_input, file_uploads_log], [stored_messages, text_input, submit_btn], - ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then( + ).then( + self.interact_with_agent, + [stored_messages, chatbot, session_state], + [chatbot], + ).then( lambda: ( gr.Textbox( interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"