Shuyib · Shuyib · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/Makefile b/Makefile
@@ -21,7 +21,7 @@ venv/bin/activate: requirements.txt #requirements.txt is a requirement, otherwis
 	# make command executable
 	# chmod is a bash command, +x is giving the ADMIN user permission to execute it
 	# if it's a+x, that means anyone can run it, even if you aren't an ADMIN
-	chmod +x .venv/bin/activate 
+	chmod +x .venv/bin/activate
 	# activate virtual environment
 	. .venv/bin/activate
 
@@ -37,19 +37,19 @@ install: venv/bin/activate requirements.txt # prerequisite
 
 docstring: activate
 	# format docstring, might have to change this as well
-	# write a template using a numpydoc convention and output it to my python file 
+	# write a template using a numpydoc convention and output it to my python file
 	# so basically just document functions, classes etc. in the numpy style
 	pyment -w -o numpydoc *.py
 
-format: activate 
+format: activate
 	# format code
-	black utils/*.py tests/*.py
+	black *.py utils/*.py tests/*.py
 
 clean:
 	# clean directory of cache
 	# files like pychache are gen'd after running py files
-	# the data speeds up execution of py files in subsequent runs 
-	# reduces size of repo 
+	# the data speeds up execution of py files in subsequent runs
+	# reduces size of repo
 	# during version control, removing them would avoid conflicts with other dev's cached files
 	# add code to remove ipynb checkpoints
 	# the &&\ is used to say, after running this successfully, run the next...
@@ -63,12 +63,12 @@ clean:
 	rm -rf *.log
 	rm -rf tests/__pycache__
 
-lint: activate install 
+lint: activate install
 	#flake8 or #pylint
 	# In this scenario it'll only tell as errors found in your code
-	# R - refactor 
+	# R - refactor
 	# C - convention
-	pylint --disable=R,C --errors-only *.py 
+	pylint --disable=R,C --errors-only *.py
 
 test: activate install
 	# run tests
@@ -87,6 +87,10 @@ run_gradio: activate install format
 	# run gradio
 	$(PYTHON) app.py
 
+run_gradio_stt: activate install format
+	# run gradio
+	$(PYTHON) voice_stt_mode.py
+
 docker_build: Dockerfile
 	#build container
 	# docker build -t $(DOCKER_IMAGE_TAG) .
@@ -95,7 +99,7 @@ docker_run_test: Dockerfile.app Dockerfile.ollama
 	# linting Dockerfile
 	docker run --rm -i hadolint/hadolint < Dockerfile.ollama
 	docker run --rm -i hadolint/hadolint < Dockerfile.app
-	
+
 
 docker_clean: Dockerfile.ollama Dockerfile.app
 	# clean docker
@@ -109,7 +113,7 @@ docker_run: Dockerfile.ollama Dockerfile.app
 	# run docker
 	# this is basically a test to see if a docker image is being created successfully
 	docker-compose up --build
-	
+
 setup_readme:  ## Create a README.md
 	@if [ ! -f README.md ]; then \
 		echo "# Project Name\n\

diff --git a/README.md b/README.md
@@ -6,11 +6,11 @@ Function-calling with Python and ollama. We are going to use the Africa's Talkin
 
 NB: The phone numbers are placeholders for the actual phone numbers.
 You need some VRAM to run this project. You can get VRAM from [here](https://vast.ai/)
-We recommend 400MB-8GB of VRAM for this project. It can run on CPU however, I recommend smaller models for this.   
+We recommend 400MB-8GB of VRAM for this project. It can run on CPU however, I recommend smaller models for this.
 
-[Mistral 7B](https://ollama.com/library/mistral), **llama 3.2 3B/1B**, [**Qwen 2.5: 0.5/1.5B**](https://ollama.com/library/qwen2.5:1.5b), [nemotron-mini 4b](https://ollama.com/library/nemotron-mini) and [llama3.1 8B](https://ollama.com/library/llama3.1) are the recommended models for this project.    
+[Mistral 7B](https://ollama.com/library/mistral), **llama 3.2 3B/1B**, [**Qwen 2.5: 0.5/1.5B**](https://ollama.com/library/qwen2.5:1.5b), [nemotron-mini 4b](https://ollama.com/library/nemotron-mini) and [llama3.1 8B](https://ollama.com/library/llama3.1) are the recommended models for this project.
 
-Ensure ollama is installed on your laptop/server and running before running this project. You can install ollama from [here](ollama.com)   
+Ensure ollama is installed on your laptop/server and running before running this project. You can install ollama from [here](ollama.com)
 Learn more about tool calling <https://gorilla.cs.berkeley.edu/leaderboard.html>
 
 
@@ -22,41 +22,41 @@ Learn more about tool calling <https://gorilla.cs.berkeley.edu/leaderboard.html>
 - [Usage](#usage)
 - [Use cases](#use-cases)
 - [Responsible AI Practices](#responsible-ai-practices)
-- [Limitations](#limitations)   
+- [Limitations](#limitations)
 - [Contributing](#contributing)
-- [License](#license)    
+- [License](#license)
 
 
 ## File structure
-.   
-├── Dockerfile.app - template to run the gradio dashboard.   
-├── Dockerfile.ollama - template to run the ollama server.       
-├── docker-compose.yml - use the ollama project and gradio dashboard.   
-├── docker-compose-codecarbon.yml - use the codecarbon project, ollama and gradio dashboard.   
-├── .env - This file contains the environment variables for the project. (Not included in the repository)   
-├── app.py - the function_call.py using gradio as the User Interface.    
-├── Makefile - This file contains the commands to run the project.   
-├── README.md - This file contains the project documentation. This is the file you are currently reading.       
-├── requirements.txt - This file contains the dependencies for the project.  
-├── summary.png - How function calling works with a diagram.   
-├── tests - This directory contains the test files for the project.    
-│   ├── __init__.py - This file initializes the tests directory as a package.     
-│   ├── test_cases.py - This file contains the test cases for the project.   
-│   └── test_run.py - This file contains the code to run the test cases for the function calling LLM.      
-└── utils - This directory contains the utility files for the project.       
-    ├── __init__.py - This file initializes the utils directory as a package.     
-    ├── function_call.py - This file contains the code to call a function using LLMs.        
-    └── communication_apis.py - This file contains the code to do with communication apis & experiments.       
+.
+├── Dockerfile.app - template to run the gradio dashboard.
+├── Dockerfile.ollama - template to run the ollama server.
+├── docker-compose.yml - use the ollama project and gradio dashboard.
+├── docker-compose-codecarbon.yml - use the codecarbon project, ollama and gradio dashboard.
+├── .env - This file contains the environment variables for the project. (Not included in the repository)
+├── app.py - the function_call.py using gradio as the User Interface.
+├── Makefile - This file contains the commands to run the project.
+├── README.md - This file contains the project documentation. This is the file you are currently reading.
+├── requirements.txt - This file contains the dependencies for the project.
+├── summary.png - How function calling works with a diagram.
+├── tests - This directory contains the test files for the project.
+│   ├── __init__.py - This file initializes the tests directory as a package.
+│   ├── test_cases.py - This file contains the test cases for the project.
+│   └── test_run.py - This file contains the code to run the test cases for the function calling LLM.
+└── utils - This directory contains the utility files for the project.
+    ├── __init__.py - This file initializes the utils directory as a package.
+    ├── function_call.py - This file contains the code to call a function using LLMs.
+    └── communication_apis.py - This file contains the code to do with communication apis & experiments.
 
 ### attribution
-This project uses the Qwen2.5-0.5B model developed by Alibaba Cloud under the Apache License 2.0. The original project can be found at [Qwen technical report](https://arxiv.org/abs/2412.15115)    
+This project uses the Qwen2.5-0.5B model developed by Alibaba Cloud under the Apache License 2.0. The original project can be found at [Qwen technical report](https://arxiv.org/abs/2412.15115)
 
 ### License
 
 This project is licensed under the Apache License 2.0. See the [LICENSE](./LICENSE) file for more details.
-    
+
 ## Installation
-The project uses python 3.12. To install the project, follow the steps below:    
+The project uses python 3.12. To install the project, follow the steps below:
 
 - Clone the repository
 ```bash
@@ -65,7 +65,7 @@ git clone https://github.com/Shuyib/tool_calling_api.git
 - Change directory to the project directory
 ```bash
 cd tool_calling_api
-```   
+```
 Create a virtual environment
 ```bash
 python3 -m venv .venv
@@ -88,7 +88,7 @@ make install
 ```bash
 make run
 ```
-Long way to run the project   
+Long way to run the project
 
 - Change directory to the utils directory
 ```bash
@@ -121,82 +121,89 @@ make docker_run
 ```
 
 Notes:
--  The .env file contains the environment variables for the project. You can create a .env file and add the following environment variables: 
+-  The .env file contains the environment variables for the project. You can create a .env file and add the following environment variables:
 
 ```bash
 echo "AT_API_KEY = yourapikey" >> .env
 echo "AT_USERNAME = yourusername" >> .env
-echo "LANGTRACE_API_KEY= yourlangtraceapikey" >> .env  
+echo "GROQ_API_KEY = yourgroqapikey" >> .env
+echo "LANGTRACE_API_KEY= yourlangtraceapikey" >> .env
 echo "TEST_PHONE_NUMBER = yourphonenumber" >> .env
 echo "TEST_PHONE_NUMBER_2 = yourphonenumber" >> .env
 echo "TEST_PHONE_NUMBER_3 = yourphonenumber" >> .env
 ```
-- The Dockerfile creates 2 images for the ollama server and the gradio dashboard. The ollama server is running on port 11434 and the gradio dashboard is running on port 7860 . You can access the gradio dashboard by visiting <http://localhost:7860> in your browser & the ollama server by visiting <http://localhost:11434> in your browser. They consume about 2.72GB of storage in the container.       
+- The Dockerfile creates 2 images for the ollama server and the gradio dashboard. The ollama server is running on port 11434 and the gradio dashboard is running on port 7860 . You can access the gradio dashboard by visiting <http://localhost:7860> in your browser & the ollama server by visiting <http://localhost:11434> in your browser. They consume about 2.72GB of storage in the container.
 - The docker-compose.yml file is used to run the ollama server and the gradio dashboard. The docker-compose-codecarbon.yml file is used to run the ollama server, the gradio dashboard and the codecarbon project.
-- You can learn more about how to make this system even more secure. Do this [course](https://www.kaggle.com/learn-guide/5-day-genai#GenAI).     
+- You can learn more about how to make this system even more secure. Do this [course](https://www.kaggle.com/learn-guide/5-day-genai#GenAI).
 
 
 ## Run in runpod.io
-Make an account if you haven't already. Once that's settled.    
+Make an account if you haven't already. Once that's settled.
 
-- Click on Deploy under Pods.   
-- Select the cheapest option pod to deploy for example RTX 2000 Ada.    
-- This will create a jupyter lab instance.   
-- Follow the Installation steps in the terminal available. Until the make install.    
-- Run this command. Install ollama and serve it then redirect output to a log file.    
+- Click on Deploy under Pods.
+- Select the cheapest option pod to deploy for example RTX 2000 Ada.
+- This will create a jupyter lab instance.
+- Follow the Installation steps in the terminal available. Until the make install.
+- Run this command. Install ollama and serve it then redirect output to a log file.
 
 ```bash
 curl -fsSL https://ollama.com/install.sh | sh && ollama serve > ollama.log 2>&1 &
 ```
-- Install your preferred model in the same terminal.     
+- Install your preferred model in the same terminal.
 
 ```bash
 ollama run qwen2.5:0.5b
 ```
-- Export your credentials but, if you are using a .env file, you can skip this step. It will be useful for Docker.   
+- Export your credentials but, if you are using a .env file, you can skip this step. It will be useful for Docker.
 
 ```bash
 export AT_API_KEY=yourapikey
 export AT_USERNAME=yourusername
+export GROQ_API_KEY=yourgroqapikey
 export LANGTRACE_API_KEY=yourlangtraceapikey
 export TEST_PHONE_NUMBER=yourphonenumber
 export TEST_PHONE_NUMBER_2=yourphonenumber
 export TEST_PHONE_NUMBER_3=yourphonenumber
 ```
-- Continue running the installation steps in the terminal.    
-- Send your first message and airtime with an LLM. 🌠     
+- Continue running the installation steps in the terminal.
+- Send your first message and airtime with an LLM. 🌠
 
-Read more about setting up ollama and serveless options <https://blog.runpod.io/run-llama-3-1-405b-with-ollama-a-step-by-step-guide/> & <https://blog.runpod.io/run-llama-3-1-with-vllm-on-runpod-serverless/>    
+Read more about setting up ollama and serveless options <https://blog.runpod.io/run-llama-3-1-405b-with-ollama-a-step-by-step-guide/> & <https://blog.runpod.io/run-llama-3-1-with-vllm-on-runpod-serverless/>
 
 ## Usage
-This project uses LLMs to send airtime to a phone number. The difference is that we are going to use the Africa's Talking API to send airtime to a phone number using Natural language. Here are examples of prompts you can use to send airtime to a phone number:    
-- Send airtime to xxxxxxxxxx046 and xxxxxxxxxx524 with an amount of 10 in currency KES.   
+This project uses LLMs to send airtime to a phone number. The difference is that we are going to use the Africa's Talking API to send airtime to a phone number using Natural language. Here are examples of prompts you can use to send airtime to a phone number:
+- Send airtime to xxxxxxxxxx046 and xxxxxxxxxx524 with an amount of 10 in currency KES.
 - Send a message to xxxxxxxxxx046 and xxxxxxxxxx524 with a message "Hello, how are you?", using the username "username".
 
+## Updated Usage Instructions
+- The app now supports both Text and Voice input tabs.
+- In the Voice Input tab, record audio and click "Transcribe" to preview the transcription. Then click "Process Edited Text" to execute voice commands.
+- In the Text Input tab, directly type commands to send airtime or messages or to search news.
+
 ### Responsible AI Practices
-This project implements several responsible AI practices:   
-- All test data is anonymized to protect privacy.      
-- Input validation to prevent misuse (negative amounts, spam detection).    
-- Handling of sensitive content and edge cases.       
-- Comprehensive test coverage for various scenarios.    
-- Secure handling of credentials and personal information.    
+This project implements several responsible AI practices:
+- All test data is anonymized to protect privacy.
+- Input validation to prevent misuse (negative amounts, spam detection).
+- Handling of sensitive content and edge cases.
+- Comprehensive test coverage for various scenarios.
+- Secure handling of credentials and personal information.
 
 ![Process Summary](summary.png)
 
 ## Use cases
-    * Non-Technical User Interfaces: Simplifies the process for non-coders to interact with APIs, making it easier for them to send airtime and messages without needing to understand the underlying code.    
-    * Customer Support Automation: Enables customer support teams to quickly send airtime or messages to clients using natural language commands, improving efficiency and response times.    
-    * Marketing Campaigns: Facilitates the automation of promotional messages and airtime rewards to customers, enhancing engagement and retention.    
-    * Emergency Notifications: Allows rapid dissemination of urgent alerts and notifications to a large number of recipients using simple prompts.    
-    * Educational Tools: Provides a practical example for teaching how to integrate APIs with natural language processing, which can be beneficial for coding bootcamps and workshops.   
-    * Multilingual Support: Supports multiple languages when sending messages and airtime, making it accessible to a diverse range of users. Testing for Arabic, French, English and Portuguese.        
+    * Non-Technical User Interfaces: Simplifies the process for non-coders to interact with APIs, making it easier for them to send airtime and messages without needing to understand the underlying code.
+    * Customer Support Automation: Enables customer support teams to quickly send airtime or messages to clients using natural language commands, improving efficiency and response times.
+    * Marketing Campaigns: Facilitates the automation of promotional messages and airtime rewards to customers, enhancing engagement and retention.
+    * Emergency Notifications: Allows rapid dissemination of urgent alerts and notifications to a large number of recipients using simple prompts.
+    * Educational Tools: Provides a practical example for teaching how to integrate APIs with natural language processing, which can be beneficial for coding bootcamps and workshops.
+    * Multilingual Support: Supports multiple languages when sending messages and airtime, making it accessible to a diverse range of users. Testing for Arabic, French, English and Portuguese.
 
 ## Limitations
-- The project is limited to sending airtime, searching for news, and messages using the Africa's Talking API. The functionality can be expanded to include other APIs and services.    
+- The project is limited to sending airtime, searching for news, and messages using the Africa's Talking API. The functionality can be expanded to include other APIs and services.
 
-- The jailbreaking of the LLMS is a limitation. The LLMS are not perfect and can be manipulated to produce harmful outputs. This can be mitigated by using a secure environment and monitoring the outputs for any malicious content. However, the Best of N technique and prefix injection were effective in changing model behavior.   
+- The jailbreaking of the LLMS is a limitation. The LLMS are not perfect and can be manipulated to produce harmful outputs. This can be mitigated by using a secure environment and monitoring the outputs for any malicious content. However, the Best of N technique and prefix injection were effective in changing model behavior.
 
-- A small number of test cases were used to test the project. More test cases can be added to cover a wider range of scenarios and edge cases.   
+- A small number of test cases were used to test the project. More test cases can be added to cover a wider range of scenarios and edge cases.
 
 ## Contributing
 Contributions are welcome. If you would like to contribute to the project, you can fork the repository, create a new branch, make your changes and then create a pull request.