1
1
"""
2
- Airtime and Messaging Service using Africa's Talking API
2
+ Airtime and Messaging Servicea using Africa's Talking API
3
3
4
4
This script provides a Gradio-based web interface for sending airtime and messages
5
5
using the Africa's Talking API. It also tracks the carbon emissions of the operations
37
37
from logging .handlers import RotatingFileHandler
38
38
import asyncio
39
39
from importlib .metadata import version , PackageNotFoundError
40
+ import tempfile
40
41
41
42
# Third-Party Library Imports
42
43
import gradio as gr
45
46
import numpy as np
46
47
import soundfile as sf
47
48
import ollama
49
+ import edge_tts
48
50
49
51
# Local Module Imports
50
52
from utils .function_call import send_airtime , send_message , search_news , translate_text
60
62
langtrace .init (api_key = os .getenv ("LANGTRACE_API_KEY" ))
61
63
groq_client = groq .Client (api_key = os .getenv ("GROQ_API_KEY" ))
62
64
63
-
64
65
# Set up the logger
65
66
logger = logging .getLogger (__name__ )
66
67
logger .setLevel (logging .DEBUG ) # Set the logger to handle all levels DEBUG and above
109
110
"groq" ,
110
111
"soundfile" ,
111
112
"numpy" ,
113
+ "edge-tts" , # Add edge-tts to version checking
112
114
]
113
115
114
116
for pkg in pkgs :
120
122
except Exception as e :
121
123
logger .error ("Failed to retrieve version for %s: %s" , pkg , str (e ))
122
124
125
+ # ------------------------------------------------------------------------------------
126
+ # Add TTS Configuration after version checking
127
+ # ------------------------------------------------------------------------------------
128
+
129
+ VOICE = "sw-TZ-RehemaNeural"
130
+ OUTPUT_FILE = "tts_output.mp3" # Saved in current working directory
131
+
132
+
133
+ async def text_to_speech (text : str ) -> None :
134
+ try :
135
+ communicate = edge_tts .Communicate (text , VOICE )
136
+ await communicate .save (OUTPUT_FILE )
137
+ logger .info (f"Generated speech output: { OUTPUT_FILE } " )
138
+ except Exception as e :
139
+ logger .error (f"TTS Error: { str (e )} " )
140
+ raise
141
+
142
+
123
143
# ------------------------------------------------------------------------------------
124
144
# Define Tools Schema
125
145
# ------------------------------------------------------------------------------------
@@ -384,12 +404,12 @@ async def process_audio_and_llm(audio):
384
404
y /= np .max (np .abs (y ))
385
405
386
406
# Write audio to buffer
407
+ buffer = io .BytesIO ()
387
408
sf .write (buffer , y , sr , format = "wav" )
388
409
buffer .seek (0 )
389
410
390
411
try :
391
412
# Get transcription from Groq
392
- # add the import here then text will be cut out for the client
393
413
transcription = groq_client .audio .transcriptions .create (
394
414
model = "distil-whisper-large-v3-en" ,
395
415
file = ("audio.wav" , buffer ),
@@ -471,6 +491,9 @@ def gradio_interface(message: str, history: list) -> str:
471
491
audio_output = gr .Textbox (
472
492
label = "Final Result" , placeholder = "LLM response will appear here..."
473
493
)
494
+ tts_button = gr .Button ("Play TTS" )
495
+ tts_audio = gr .Audio (label = "TTS Output" )
496
+
474
497
with gr .Row ():
475
498
transcribe_button = gr .Button ("Transcribe" )
476
499
process_button = gr .Button ("Process Edited Text" , variant = "primary" )
@@ -521,6 +544,20 @@ def show_transcription(audio):
521
544
logger .exception ("Error during transcription: %s" , e )
522
545
return f"Error: { str (e )} "
523
546
547
+ # Define TTS Function
548
+ async def generate_tts (text : str ) -> str :
549
+ """
550
+ Generate TTS audio and return the file path.
551
+ """
552
+ try :
553
+ communicate = edge_tts .Communicate (text , VOICE )
554
+ await communicate .save (OUTPUT_FILE )
555
+ logger .info (f"TTS audio generated successfully: { OUTPUT_FILE } " )
556
+ return OUTPUT_FILE
557
+ except Exception as e :
558
+ logger .error (f"TTS Generation Error: { str (e )} " )
559
+ return None
560
+
524
561
# Wire up the components
525
562
transcribe_button .click (
526
563
fn = show_transcription , inputs = audio_input , outputs = transcription_preview
@@ -533,6 +570,13 @@ def show_transcription(audio):
533
570
outputs = audio_output ,
534
571
)
535
572
573
+ # Connect TTS Button to Function
574
+ tts_button .click (
575
+ fn = lambda txt : asyncio .run (generate_tts (txt )),
576
+ inputs = audio_output , # Replace with the component holding the final text
577
+ outputs = tts_audio ,
578
+ )
579
+
536
580
# Text input tab
537
581
with gr .Tab ("Text Input" ):
538
582
chat_interface = gr .ChatInterface (
@@ -551,16 +595,27 @@ def show_transcription(audio):
551
595
scan_button = gr .Button ("Scan Receipt" )
552
596
result_text = gr .Textbox (label = "Analysis Result" )
553
597
554
- scan_button .click (
555
- fn = lambda img : asyncio .run (
556
- process_user_message (
557
- "Analyze this receipt" , [], use_vision = True , image_path = img
598
+ async def process_with_speech (image ):
599
+ try :
600
+ # Get text result first
601
+ text_result = await process_user_message (
602
+ "Analyze this receipt" , [], use_vision = True , image_path = image
558
603
)
559
- ),
604
+ return text_result
605
+ except Exception as e :
606
+ logger .error (f"Processing error: { str (e )} " )
607
+ return str (e )
608
+
609
+ scan_button .click (
610
+ fn = lambda img : asyncio .run (process_with_speech (img )),
560
611
inputs = image_input ,
561
612
outputs = result_text ,
562
613
)
563
614
615
+ # ------------------------------------------------------------------------------------
616
+ # Launch Gradio Interface
617
+ # ------------------------------------------------------------------------------------
618
+
564
619
if __name__ == "__main__" :
565
620
try :
566
621
logger .info ("Launching Gradio interface..." )
0 commit comments