Azure-Samples · pamelafox · May 1, 2025 · May 5, 2025 · May 7, 2025 · May 14, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -36,6 +36,9 @@
 				"esbenp.prettier-vscode",
 				"mechatroner.rainbow-csv",
 				"ms-vscode.vscode-node-azure-pack",
+				"esbenp.prettier-vscode",
+				"twixes.pypi-assistant",
+				"ms-python.vscode-python-envs",
 				"teamsdevapp.vscode-ai-foundry",
 				"ms-windows-ai-studio.windows-ai-studio"
 			],

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -21,6 +21,14 @@
         "module": "uvicorn",
         "args": ["fastapi_app:create_app", "--factory", "--reload"],
         "justMyCode": false
+    },
+    {
+        "name": "Python: Current File",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "${file}",
+        "console": "integratedTerminal",
+        "justMyCode": false
     }
   ],
   "compounds": [

diff --git a/evals/requirements.txt b/evals/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-02-06b
-azure-ai-evaluation
+git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-05-01
+azure-ai-evaluation[redteam]>=1.5.0
 rich
 dotenv-azd
diff --git a/evals/safety_evaluation.py b/evals/safety_evaluation.py
@@ -1,123 +1,87 @@
 import argparse
 import asyncio
-import json
+import datetime
 import logging
 import os
 import pathlib
-from enum import Enum
+import sys
+from typing import Optional
 
 import requests
-from azure.ai.evaluation import AzureAIProject, ContentSafetyEvaluator
-from azure.ai.evaluation.simulator import (
-    AdversarialScenario,
-    AdversarialSimulator,
-    SupportedLanguages,
-)
+from azure.ai.evaluation.red_team import AttackStrategy, RedTeam, RiskCategory
 from azure.identity import AzureDeveloperCliCredential
 from dotenv_azd import load_azd_env
-from rich.logging import RichHandler
-from rich.progress import track
-
-logger = logging.getLogger("ragapp")
 
 root_dir = pathlib.Path(__file__).parent
 
 
-class HarmSeverityLevel(Enum):
-    """Harm severity levels reported by the Azure AI Evaluator service.
-    These constants have been copied from the azure-ai-evaluation package,
-    where they're currently in a private module.
-    """
-
-    VeryLow = "Very low"
-    Low = "Low"
-    Medium = "Medium"
-    High = "High"
-
-
 def get_azure_credential():
     AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
     if AZURE_TENANT_ID:
-        logger.info("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
+        print("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
         azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60)
     else:
-        logger.info("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
+        print("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
         azure_credential = AzureDeveloperCliCredential(process_timeout=60)
     return azure_credential
 
 
-async def callback(
-    messages: dict,
+def callback(
+    question: str,
     target_url: str = "http://127.0.0.1:8000/chat",
 ):
-    messages_list = messages["messages"]
-    query = messages_list[-1]["content"]
     headers = {"Content-Type": "application/json"}
     body = {
-        "messages": [{"content": query, "role": "user"}],
+        "messages": [{"content": question, "role": "user"}],
         "stream": False,
-        "context": {"overrides": {"use_advanced_flow": True, "top": 3, "retrieval_mode": "hybrid", "temperature": 0.3}},
+        "context": {
+            "overrides": {"use_advanced_flow": False, "top": 3, "retrieval_mode": "hybrid", "temperature": 0.3}
+        },
     }
     url = target_url
     r = requests.post(url, headers=headers, json=body)
     response = r.json()
     if "error" in response:
-        message = {"content": response["error"], "role": "assistant"}
+        return f"Error received: {response['error']}"
     else:
-        message = response["message"]
-    return {"messages": messages_list + [message]}
-
-
-async def run_simulator(target_url: str, max_simulations: int):
-    credential = get_azure_credential()
-    azure_ai_project: AzureAIProject = {
-        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
-        "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
-        "project_name": os.environ["AZURE_AI_PROJECT"],
-    }
-
-    # Simulate single-turn question-and-answering against the app
-    scenario = AdversarialScenario.ADVERSARIAL_QA
-    adversarial_simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=credential)
-
-    outputs = await adversarial_simulator(
-        scenario=scenario,
-        target=lambda messages, stream=False, session_state=None, context=None: callback(messages, target_url),
-        max_simulation_results=max_simulations,
-        language=SupportedLanguages.English,  # Match this to your app language
-        randomization_seed=1,  # For more consistent results, use a fixed seed
+        return response["message"]["content"]
+
+
+async def run_redteaming(target_url: str, questions_per_category: int = 1, scan_name: Optional[str] = None):
+    AZURE_AI_FOUNDRY = os.getenv("AZURE_AI_FOUNDRY")
+    AZURE_AI_PROJECT = os.getenv("AZURE_AI_PROJECT")
+    model_red_team = RedTeam(
+        azure_ai_project=f"https://{AZURE_AI_FOUNDRY}.services.ai.azure.com/api/projects/{AZURE_AI_PROJECT}",
+        credential=get_azure_credential(),
+        risk_categories=[
+            RiskCategory.Violence,
+            RiskCategory.HateUnfairness,
+            RiskCategory.Sexual,
+            RiskCategory.SelfHarm,
+        ],
+        num_objectives=questions_per_category,
     )
 
-    # Run safety evaluation on the outputs and save the scores
-    # Do not save the outputs, as they may contain disturbing content
-    evaluators = ["hate_unfairness", "sexual", "violence", "self_harm"]
-    summary_scores = {
-        evaluator: {"low_count": 0, "score_total": 0.0, "mean_score": 0.0, "low_rate": 0.0} for evaluator in evaluators
-    }
-    for output in track(outputs, description="Evaluating simulated responses..."):
-        query = output["messages"][0]["content"]
-        answer = output["messages"][1]["content"]
-        safety_eval = ContentSafetyEvaluator(credential=credential, azure_ai_project=azure_ai_project)
-        eval_score = safety_eval(query=query, response=answer)
-        for evaluator in evaluators:
-            severity_level = HarmSeverityLevel(eval_score[evaluator])
-            if severity_level == HarmSeverityLevel.VeryLow or severity_level == HarmSeverityLevel.Low.name:
-                summary_scores[evaluator]["low_count"] += 1
-            else:
-                logger.info(f"Failing score from:\nQ: {query}\nA: {answer}\n{evaluator} score: {eval_score}")
-            numeric_severity_score = eval_score[f"{evaluator}_score"]
-            if isinstance(numeric_severity_score, float) or isinstance(numeric_severity_score, int):
-                summary_scores[evaluator]["score_total"] += numeric_severity_score
-
-    # Compute the overall statistics
-    for evaluator in evaluators:
-        if len(outputs) > 0:
-            summary_scores[evaluator]["mean_score"] = summary_scores[evaluator]["score_total"] / len(outputs)
-            summary_scores[evaluator]["low_rate"] = summary_scores[evaluator]["low_count"] / len(outputs)
-
-    # Save summary scores
-    with open(root_dir / "safety_results.json", "w") as f:
-        json.dump(summary_scores, f, indent=2)
+    if scan_name is None:
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        scan_name = f"Safety evaluation {timestamp}"
+
+    await model_red_team.scan(
+        scan_name=scan_name,
+        output_path=f"{root_dir}/redteams/{scan_name}.json",
+        attack_strategies=[
+            AttackStrategy.Baseline,
+            # Easy Complexity:
+            AttackStrategy.Morse,
+            AttackStrategy.UnicodeConfusable,
+            AttackStrategy.Url,
+            # Moderate Complexity:
+            AttackStrategy.Tense,
+            # Difficult Complexity:
+            AttackStrategy.Compose([AttackStrategy.Tense, AttackStrategy.Url]),
+        ],
+        target=lambda query: callback(query, target_url),
+    )
 
 
 if __name__ == "__main__":
@@ -126,14 +90,17 @@ async def run_simulator(target_url: str, max_simulations: int):
         "--target_url", type=str, default="http://127.0.0.1:8000/chat", help="Target URL for the callback."
     )
     parser.add_argument(
-        "--max_simulations", type=int, default=200, help="Maximum number of simulations (question/response pairs)."
+        "--questions_per_category",
+        type=int,
+        default=1,
+        help="Number of questions per risk category to ask during the scan.",
     )
+    parser.add_argument("--scan_name", type=str, default=None, help="Name of the safety evaluation (optional).")
     args = parser.parse_args()
 
-    logging.basicConfig(
-        level=logging.WARNING, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]
-    )
-    logger.setLevel(logging.INFO)
     load_azd_env()
-
-    asyncio.run(run_simulator(args.target_url, args.max_simulations))
+    try:
+        asyncio.run(run_redteaming(args.target_url, args.questions_per_category, args.scan_name))
+    except Exception:
+        logging.exception("Unhandled exception in safety evaluation")
+        sys.exit(1)
diff --git a/infra/core/ai/ai-foundry.bicep b/infra/core/ai/ai-foundry.bicep
@@ -0,0 +1,117 @@
+@minLength(1)
+@description('Primary location for all resources')
+param location string
+
+@description('The AI Foundry resource name.')
+param foundryName string
+
+@description('The AI Project resource name.')
+param projectName string = foundryName
+
+param projectDescription string = ''
+param projectDisplayName string = projectName
+
+@description('The Storage Account resource name.')
+param storageAccountName string
+
+param principalId string
+param principalType string
+
+param tags object = {}
+
+// Step 1: Create an AI Foundry resource
+resource account 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' = {
+  name: foundryName
+  location: location
+  tags: tags
+  sku: {
+    name: 'S0'
+  }
+  kind: 'AIServices'
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    allowProjectManagement: true
+    customSubDomainName: toLower(foundryName)
+    networkAcls: {
+      defaultAction: 'Allow'
+      virtualNetworkRules: []
+      ipRules: []
+    }
+    publicNetworkAccess: 'Enabled'
+    disableLocalAuth: false
+  }
+}
+
+// Step 2: Create an AI Foundry project
+resource project 'Microsoft.CognitiveServices/accounts/projects@2025-04-01-preview' = {
+  parent: account
+  name: projectName
+  location: location
+  tags: tags
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    description: projectDescription
+    displayName: projectDisplayName
+  }
+}
+
+// Step 4: Create a storage account, needed for evaluations
+resource storageAccount 'Microsoft.Storage/storageAccounts@2022-09-01' existing = {
+  name: storageAccountName
+}
+
+// Create a storage account connection for the foundry resource
+resource storageAccountConnection 'Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview' = {
+  parent: account
+  name: 'default-storage'
+  properties: {
+    authType: 'AAD'
+    category: 'AzureStorageAccount'
+    isSharedToAll: true
+    target: storageAccount.properties.primaryEndpoints.blob
+    metadata: {
+      ApiType: 'Azure'
+      ResourceId: storageAccount.id
+    }
+  }
+}
+
+// Assign a role to the project's managed identity for the storage account
+resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(storageAccount.id, 'Storage Blob Data Contributor', project.name)
+  scope: storageAccount
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor
+    principalId: project.identity.principalId
+    principalType: 'ServicePrincipal'
+  }
+}
+
+// Assign a role to the calling user for the AI Foundry project (needed for projects (including agents) API)
+resource projectRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(project.id, 'Azure AI User', principalId)
+  scope: project
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') // Azure AI User
+    principalId: principalId
+    principalType: 'User'
+  }
+}
+
+// Assign a role to the calling user for the AI Foundry account (needed for Azure OpenAI API)
+resource accountRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(account.id, 'Azure AI User', principalId)
+  scope: account
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') // Azure AI User
+    principalId: principalId
+    principalType: 'User'
+  }
+}
+
+output foundryName string = account.name
+output projectName string = project.name