diff --git a/Makefile b/Makefile index e5780c1..9e1eedc 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ docstring: activate format: activate # format code - black utils/*.py *.py + black utils/*.py tests/*.py clean: # clean directory of cache @@ -61,6 +61,7 @@ clean: rm -rf utils/__pycache__ rm -rf utils/*.log rm -rf *.log + rm -rf tests/__pycache__ lint: activate install #flake8 or #pylint @@ -69,6 +70,14 @@ lint: activate install # C - convention pylint --disable=R,C --errors-only *.py +test: activate install + # run tests + echo @running tests + echo @we used this signature to run tests: $(PYTHON) -m pytest tests/testcases.py + echo @for single tests, we used this signature: $(PYTHON) -m pytest tests/testcases.py::test_function_name + $(PYTHON) -m pytest tests/test_cases.py -v + $(PYTHON) -m pytest tests/test_run.py -v --asyncio-mode=strict + run: activate install format # run test_app # run each file separately, bc if one fails, all fail diff --git a/README.md b/README.md index aefdf05..8aac693 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,15 @@ Learn more about tool calling ├── README.md - This file contains the project documentation. This is the file you are currently reading. ├── requirements.txt - This file contains the dependencies for the project. ├── summary.png - How function calling works with a diagram. +├── tests - This directory contains the test files for the project. +│ ├── __init__.py - This file initializes the tests directory as a package. +│ ├── test_cases.py - This file contains the test cases for the project. +│ └── test_run.py - This file contains the code to run the test cases for the function calling LLM. └── utils - This directory contains the utility files for the project. ├── __init__.py - This file initializes the utils directory as a package. ├── function_call.py - This file contains the code to call a function using LLMs. - └── communication_apis.py - This file contains the code to do with communication apis & experiments. + └── communication_apis.py - This file contains the code to do with communication apis & experiments. + ## Installation The project uses python 3.12. To install the project, follow the steps below: @@ -113,6 +118,9 @@ Notes: echo "AT_API_KEY = yourapikey" >> .env echo "AT_USERNAME = yourusername" >> .env echo "LANGTRACE_API_KEY= yourlangtraceapikey" >> .env +echo "TEST_PHONE_NUMBER = yourphonenumber" >> .env +echo "TEST_PHONE_NUMBER_2 = yourphonenumber" >> .env +echo "TEST_PHONE_NUMBER_3 = yourphonenumber" >> .env ``` - The Dockerfile creates 2 images for the ollama server and the gradio dashboard. The ollama server is running on port 11434 and the gradio dashboard is running on port 7860 . You can access the gradio dashboard by visiting in your browser & the ollama server by visiting in your browser. They consume about 2.72GB of storage in the container. - The docker-compose.yml file is used to run the ollama server and the gradio dashboard. The docker-compose-codecarbon.yml file is used to run the ollama server, the gradio dashboard and the codecarbon project. @@ -141,6 +149,10 @@ ollama run qwen2.5:0.5b ```bash export AT_API_KEY=yourapikey export AT_USERNAME=yourusername +export LANGTRACE_API_KEY=yourlangtraceapikey +export TEST_PHONE_NUMBER=yourphonenumber +export TEST_PHONE_NUMBER_2=yourphonenumber +export TEST_PHONE_NUMBER_3=yourphonenumber ``` - Continue running the installation steps in the terminal. - Send your first message and airtime with an LLM. 🌠 @@ -152,6 +164,14 @@ This project uses LLMs to send airtime to a phone number. The difference is that - Send airtime to xxxxxxxxxx046 and xxxxxxxxxx524 with an amount of 10 in currency KES. - Send a message to xxxxxxxxxx046 and xxxxxxxxxx524 with a message "Hello, how are you?", using the username "username". +### Responsible AI Practices +This project implements several responsible AI practices: +- All test data is anonymized to protect privacy. +- Input validation to prevent misuse (negative amounts, spam detection). +- Handling of sensitive content and edge cases. +- Comprehensive test coverage for various scenarios. +- Secure handling of credentials and personal information. + ![Process Summary](summary.png) ## Use cases @@ -164,5 +184,12 @@ This project uses LLMs to send airtime to a phone number. The difference is that ## Contributing Contributions are welcome. If you would like to contribute to the project, you can fork the repository, create a new branch, make your changes and then create a pull request. +### Testing Guidelines +When contributing, please ensure: +- All test data uses anonymized placeholders +- Edge cases and invalid inputs are properly tested +- Sensitive content handling is verified +- No real personal information is included in tests + ## License [License information](https://github.com/Shuyib/tool_calling_api/blob/main/LICENSE). diff --git a/requirements.txt b/requirements.txt index 3aca41d..54c3eb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ gradio==5.7.1 duckduckgo_search==6.3.2 langtrace-python-sdk==3.3.14 setuptools==75.6.0 +pytest==8.3.4 +pytest-asyncio==0.25.0 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cases.py b/tests/test_cases.py new file mode 100644 index 0000000..f1da9d6 --- /dev/null +++ b/tests/test_cases.py @@ -0,0 +1,131 @@ +""" +Unit tests for the function calling utilities. + +This module contains tests for sending airtime, sending messages, and searching news +using the Africa's Talking API and DuckDuckGo News API. The tests mock external +dependencies to ensure isolation and reliability. +""" + +import os +import re +from unittest.mock import patch +from utils.function_call import send_airtime, send_message, search_news + +# Load environment variables: TEST_PHONE_NUMBER +PHONE_NUMBER = os.getenv("TEST_PHONE_NUMBER") + + +@patch("utils.function_call.africastalking.Airtime") +def test_send_airtime_success(mock_airtime): + """ + Test the send_airtime function to ensure it successfully sends airtime. + + This test mocks the Africa's Talking Airtime API and verifies that the + send_airtime function returns a response containing the word 'Sent'. + + Parameters + ---------- + mock_airtime : MagicMock + Mocked Airtime API from Africa's Talking. + """ + # Configure the mock Airtime response + mock_airtime.return_value.send.return_value = { + "numSent": 1, + "responses": [{"status": "Sent"}], + } + + # Call the send_airtime function + result = send_airtime(PHONE_NUMBER, "KES", 5) + + # Define patterns to check in the response + message_patterns = [ + r"Sent", + ] + + # Assert each pattern is found in the response + for pattern in message_patterns: + assert re.search( + pattern, str(result) + ), f"Pattern '{pattern}' not found in response" + + +@patch("utils.function_call.africastalking.SMS") +def test_send_message_success(mock_sms): + """ + Test the send_message function to ensure it successfully sends a message. + + This test mocks the Africa's Talking SMS API and verifies that the + send_message function returns a response containing 'Sent to 1/1'. + + Parameters + ---------- + mock_sms : MagicMock + Mocked SMS API from Africa's Talking. + """ + # Configure the mock SMS response + mock_sms.return_value.send.return_value = { + "SMSMessageData": {"Message": "Sent to 1/1"} + } + + # Call the send_message function + result = send_message(PHONE_NUMBER, "In Qwen, we trust", os.getenv("AT_USERNAME")) + + # Define patterns to check in the response + message_patterns = [r"Sent to 1/1"] + + # Assert each pattern is found in the response + for pattern in message_patterns: + assert re.search( + pattern, str(result) + ), f"Pattern '{pattern}' not found in response" + + +@patch("utils.function_call.DDGS") +def test_search_news_success(mock_ddgs): + """ + Test the search_news function to ensure it retrieves news articles correctly. + + This test mocks the DuckDuckGo News API and verifies that the + search_news function returns results matching the expected patterns. + + Parameters + ---------- + mock_ddgs : MagicMock + Mocked DuckDuckGo DDGS API. + """ + # Configure the mock DDGS response with a realistic news article + mock_ddgs.return_value.news.return_value = [ + { + "date": "2024-12-20T02:07:00+00:00", + "title": "Hedge fund leader loves this AI stock", + "body": "Sample article body text", + "url": "https://example.com/article", + "image": "https://example.com/image.jpg", + "source": "MSN", + } + ] + + # Call the search_news function + result = search_news("AI") + + # Define regex patterns to validate response format + patterns = [ + r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}", # Date format + r'"title":\s*"[^"]+?"', # Title field + r'"source":\s*"[^"]+?"', # Source field + r'https?://[^\s<>"]+?', # URL format + ] + + # Convert result to string for regex matching + result_str = str(result) + + # Assert all patterns match in the result + for pattern in patterns: + assert re.search( + pattern, result_str + ), f"Pattern '{pattern}' not found in response" + + # Verify that the news method was called with expected arguments + mock_ddgs.return_value.news.assert_called_once_with( + keywords="AI", region="wt-wt", safesearch="off", timelimit="d", max_results=5 + ) diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..6558a67 --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,198 @@ +""" +Tests the ollama function calling system + +This module tests the function calling system in the ollama package. + +The tests are run using the pytest framework. The tests are run in the following order: +1. test_run_send_airtime: Tests the run function with an airtime request. +2. test_run_send_message: Tests the run function with a message-sending request. +3. test_run_search_news: Tests the run function with a news search request. + +The tests are run asynchronously to allow for the use of the asyncio library. + +NB: ensure you have the environment variables set in the .env file/.bashrc +file before running the tests. + +How to run the tests: +pytest test/test_run.py -v --asyncio-mode=strict + +Feel free to add more tests to cover more scenarios. + +""" + +import os +import pytest +from utils.function_call import run + +# Load environment variables +TEST_PHONE_NUMBER = os.getenv("TEST_PHONE_NUMBER") +TEST_PHONE_NUMBER_2 = os.getenv("TEST_PHONE_NUMBER_2") +TEST_PHONE_NUMBER_3 = os.getenv("TEST_PHONE_NUMBER_3") +USERNAME = os.getenv("USERNAME") + + +@pytest.mark.asyncio +async def test_run_send_airtime(): + """ + Test the run function with an airtime request. + Checks for any runtime errors while processing the prompt. + """ + user_prompt = ( + f"Send airtime to {TEST_PHONE_NUMBER} with an amount of 5 in currency KES" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_message(): + """ + Test the run function with a message-sending request. + Ensures no exceptions are raised while handling the prompt. + """ + user_prompt = ( + f"Send a message to {TEST_PHONE_NUMBER} with the message 'Hello', " + f"using the username {USERNAME}" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_search_news(): + """ + Test the run function with a news search request. + Verifies the function completes without errors. + """ + user_prompt = "Search for news about 'Global Tech Events'" + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_zero_amount(): + """ + Test sending airtime with zero amount. + """ + user_prompt = ( + f"Send airtime to {TEST_PHONE_NUMBER} with an amount of 0 in currency KES" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_invalid_currency(): + """ + Test sending airtime with an invalid currency code. + """ + user_prompt = ( + f"Send airtime to {TEST_PHONE_NUMBER} with an amount of 10 in currency XYZ" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_message_missing_username(): + """ + Test sending a message without providing a username. + """ + user_prompt = f"Send a message to {TEST_PHONE_NUMBER} with the message 'Hello'" + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_search_news_empty_query(): + """ + Test searching news with an empty query. + """ + user_prompt = "Search for news about ''" + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_multiple_numbers(): + """ + Test sending airtime to multiple phone numbers. + """ + user_prompt = f"Send airtime to {TEST_PHONE_NUMBER}, {TEST_PHONE_NUMBER_2}, and {TEST_PHONE_NUMBER_3} with an amount of 5 in currency KES" + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_synonym(): + """ + Test sending airtime using synonymous phrasing. + """ + user_prompt = f"Top-up {TEST_PHONE_NUMBER} with 10 KES airtime." + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_different_order(): + """ + Test sending airtime with parameters in a different order. + """ + user_prompt = f"With an amount of 15 KES, send airtime to {TEST_PHONE_NUMBER}." + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_message_polite_request(): + """ + Test sending a message with a polite request phrasing. + """ + user_prompt = f"Could you please send a message saying 'Good morning' to {TEST_PHONE_NUMBER}, using the username {USERNAME}?" + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_search_news_synonym(): + """ + Test searching news using synonymous phrasing. + """ + user_prompt = "Find articles related to 'Artificial Intelligence advancements'." + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_airtime_invalid_amount(): + """ + Test sending airtime with a negative amount. + """ + user_prompt = ( + f"Send airtime to {TEST_PHONE_NUMBER} with an amount of -5 in currency KES" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_send_message_spam_detection(): + """ + Test sending a message that may be considered spam. + """ + user_prompt = ( + f"Send a message to {TEST_PHONE_NUMBER} with the message 'Buy now! '*50, " + f"using the username {USERNAME}" + ) + await run("qwen2.5:0.5b", user_prompt) + assert True + + +@pytest.mark.asyncio +async def test_run_search_news_sensitive_content(): + """ + Test searching for news with potentially sensitive content. + """ + user_prompt = "Search for news about 'Illegal Activities'" + await run("qwen2.5:0.5b", user_prompt) + assert True