Skip to content

Commit ed11262

Browse files
committed
feat: Add output schema enhancement utilities and tests for semantic metadata detection
1 parent 9e8bae2 commit ed11262

File tree

4 files changed

+932
-0
lines changed

4 files changed

+932
-0
lines changed

src/mcp/server/fastmcp/tools/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from mcp.server.fastmcp.exceptions import ToolError
1010
from mcp.server.fastmcp.utilities.func_metadata import FuncMetadata, func_metadata
11+
from mcp.server.fastmcp.utilities.schema import enhance_output_schema
1112
from mcp.types import ToolAnnotations
1213

1314
if TYPE_CHECKING:
@@ -97,6 +98,12 @@ def from_function(
9798
# Try to generate schema using TypeAdapter
9899
return_type_adapter = TypeAdapter(sig.return_annotation)
99100
output_schema = return_type_adapter.json_schema()
101+
102+
# Enhance the schema with detailed field information
103+
if output_schema:
104+
output_schema = enhance_output_schema(
105+
output_schema, sig.return_annotation
106+
)
100107
except Exception:
101108
# If we can't generate a schema, we'll leave it as None
102109
pass
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""Schema enhancement utilities for FastMCP tools.
2+
3+
This module provides utilities for enhancing JSON Schema definitions with semantic
4+
metadata that helps client applications render and display tool outputs intelligently.
5+
The enhancement process detects semantic meaning from field names and types, adding
6+
metadata like semantic_type, datetime_type, and media_format to JSON Schema properties.
7+
"""
8+
9+
from typing import Any
10+
11+
12+
def detect_semantic_format(
13+
field_name: str, field_schema: dict[str, Any]
14+
) -> dict[str, Any]:
15+
"""Detect semantic format information for a field based on its name and schema.
16+
17+
Analyzes field names and JSON Schema types to determine semantic meaning,
18+
enabling client applications to provide appropriate UI rendering and formatting.
19+
20+
Args:
21+
field_name: The name of the field to analyze
22+
field_schema: JSON Schema definition for the field
23+
24+
Returns:
25+
Dictionary containing detected semantic information:
26+
- semantic_type: The detected semantic type (url, email, datetime, etc.)
27+
- datetime_type: For datetime fields, specifies date_only, time_only, or
28+
datetime
29+
- media_format: For media fields, specifies the format type (audio_file,
30+
video_file, etc.)
31+
32+
Examples:
33+
>>> detect_semantic_format("email", {"type": "string"})
34+
{"semantic_type": "email"}
35+
36+
>>> detect_semantic_format("created_date", {"type": "string"})
37+
{"semantic_type": "datetime", "datetime_type": "date_only"}
38+
39+
>>> detect_semantic_format("profile_image", {"type": "string"})
40+
{"semantic_type": "image"}
41+
"""
42+
format_info: dict[str, Any] = {}
43+
44+
# Convert field name to lowercase for pattern matching
45+
name_lower = field_name.lower()
46+
field_type = field_schema.get("type", "")
47+
48+
# URL detection
49+
if any(keyword in name_lower for keyword in ["url", "uri", "link", "href"]):
50+
format_info["semantic_type"] = "url"
51+
52+
# Email detection
53+
elif "email" in name_lower:
54+
format_info["semantic_type"] = "email"
55+
56+
# Date/time detection
57+
elif any(
58+
keyword in name_lower
59+
for keyword in ["date", "time", "timestamp", "created", "updated", "modified"]
60+
):
61+
format_info["semantic_type"] = "datetime"
62+
if "date" in name_lower and "time" not in name_lower:
63+
format_info["datetime_type"] = "date_only"
64+
elif "time" in name_lower and "date" not in name_lower:
65+
format_info["datetime_type"] = "time_only"
66+
else:
67+
format_info["datetime_type"] = "datetime"
68+
69+
# Audio format detection
70+
elif any(
71+
keyword in name_lower
72+
for keyword in ["audio", "sound", "music", "voice", "recording"]
73+
):
74+
format_info["semantic_type"] = "audio"
75+
if any(ext in name_lower for ext in ["mp3", "wav", "ogg", "m4a", "flac"]):
76+
format_info["media_format"] = "audio_file"
77+
78+
# Video format detection
79+
elif any(
80+
keyword in name_lower for keyword in ["video", "movie", "clip", "recording"]
81+
):
82+
format_info["semantic_type"] = "video"
83+
if any(ext in name_lower for ext in ["mp4", "avi", "mov", "mkv", "webm"]):
84+
format_info["media_format"] = "video_file"
85+
86+
# Image format detection
87+
elif any(
88+
keyword in name_lower
89+
for keyword in ["image", "photo", "picture", "img", "thumbnail", "avatar"]
90+
):
91+
format_info["semantic_type"] = "image"
92+
if any(
93+
ext in name_lower for ext in ["jpg", "jpeg", "png", "gif", "svg", "webp"]
94+
):
95+
format_info["media_format"] = "image_file"
96+
97+
# File path detection
98+
elif any(
99+
keyword in name_lower for keyword in ["path", "file", "filename", "filepath"]
100+
):
101+
format_info["semantic_type"] = "file_path"
102+
103+
# Color detection
104+
elif any(keyword in name_lower for keyword in ["color", "colour"]):
105+
format_info["semantic_type"] = "color"
106+
107+
# Currency/money detection
108+
elif any(
109+
keyword in name_lower
110+
for keyword in ["price", "cost", "amount", "money", "currency", "fee"]
111+
):
112+
if field_type in ["number", "integer"]:
113+
format_info["semantic_type"] = "currency"
114+
115+
# Percentage detection
116+
elif any(keyword in name_lower for keyword in ["percent", "percentage", "rate"]):
117+
if field_type in ["number", "integer"]:
118+
format_info["semantic_type"] = "percentage"
119+
120+
# ID/identifier detection
121+
elif any(keyword in name_lower for keyword in ["id", "identifier", "uuid", "guid"]):
122+
format_info["semantic_type"] = "identifier"
123+
124+
# Status/state detection
125+
elif any(keyword in name_lower for keyword in ["status", "state", "condition"]):
126+
format_info["semantic_type"] = "status"
127+
128+
return format_info
129+
130+
131+
def enhance_output_schema(schema: dict[str, Any], return_type: Any) -> dict[str, Any]:
132+
"""Enhance output schema with semantic metadata embedded within JSON Schema
133+
structure.
134+
135+
Takes a standard JSON Schema and enhances it with semantic information that helps
136+
client applications understand how to render and display the data. The enhancement
137+
preserves JSON Schema compliance while adding optional semantic metadata.
138+
139+
Args:
140+
schema: Standard JSON Schema definition to enhance
141+
return_type: Python type annotation for the return type (for future use)
142+
143+
Returns:
144+
Enhanced JSON Schema with embedded semantic metadata
145+
146+
Examples:
147+
>>> schema = {
148+
... "type": "object",
149+
... "properties": {
150+
... "email": {"type": "string", "title": "Email"},
151+
... "created_date": {"type": "string", "title": "Created Date"}
152+
... }
153+
... }
154+
>>> enhanced = enhance_output_schema(schema, None)
155+
>>> enhanced["properties"]["email"]["semantic_type"]
156+
'email'
157+
>>> enhanced["properties"]["created_date"]["semantic_type"]
158+
'datetime'
159+
"""
160+
enhanced_schema = schema.copy()
161+
162+
# Add enhanced field information for object types
163+
if schema.get("type") == "object" and "properties" in schema:
164+
enhanced_properties = {}
165+
166+
for field_name, field_schema in schema["properties"].items():
167+
# Start with the original field schema
168+
enhanced_field = field_schema.copy()
169+
170+
# Determine the primary data type
171+
primary_type = field_schema.get("type", "unknown")
172+
173+
# Handle complex nested types (anyOf, etc.)
174+
if "anyOf" in field_schema:
175+
# Extract the primary type from anyOf (excluding null)
176+
non_null_types = [
177+
t for t in field_schema["anyOf"] if t.get("type") != "null"
178+
]
179+
if non_null_types:
180+
primary_type = non_null_types[0].get("type", "unknown")
181+
182+
# Get format information
183+
format_info = detect_semantic_format(field_name, {"type": primary_type})
184+
185+
# Add semantic information only if detected
186+
if format_info.get("semantic_type"):
187+
enhanced_field["semantic_type"] = format_info["semantic_type"]
188+
189+
# Add additional format metadata if present
190+
for key, value in format_info.items():
191+
if key not in ["semantic_type"] and value:
192+
enhanced_field[key] = value
193+
194+
enhanced_properties[field_name] = enhanced_field
195+
196+
enhanced_schema["properties"] = enhanced_properties
197+
198+
# Remove 'required' field from output schemas - it's not needed for outputs
199+
# Tools always return complete objects as defined, so all fields are guaranteed
200+
if "required" in enhanced_schema:
201+
del enhanced_schema["required"]
202+
203+
# Handle array types - enhance the items schema
204+
elif schema.get("type") == "array" and "items" in schema:
205+
enhanced_schema = schema.copy()
206+
item_schema = schema["items"]
207+
208+
# If items have a type, we can enhance them
209+
if isinstance(item_schema, dict) and "type" in item_schema:
210+
enhanced_item: dict[str, Any] = item_schema.copy()
211+
# Type-cast item_schema to ensure proper typing for detect_semantic_format
212+
typed_item_schema: dict[str, Any] = item_schema
213+
214+
# For arrays, we can't use field names for detection, so minimal enhancement
215+
format_info = detect_semantic_format("array_item", typed_item_schema)
216+
if (
217+
format_info.get("semantic_type")
218+
and format_info["semantic_type"] != "primitive"
219+
):
220+
enhanced_item["semantic_type"] = format_info["semantic_type"]
221+
222+
# Add additional format metadata if present
223+
enhanced_item.update(
224+
{
225+
key: value
226+
for key, value in format_info.items()
227+
if key not in ["semantic_type"] and value
228+
}
229+
)
230+
231+
enhanced_schema["items"] = enhanced_item
232+
233+
# Handle simple types - minimal enhancement since no field names available
234+
elif schema.get("type") in ["string", "integer", "number", "boolean"]:
235+
# For primitive return types, no enhancement needed - JSON Schema type is
236+
# sufficient
237+
pass
238+
239+
return enhanced_schema

0 commit comments

Comments
 (0)