Skip to content

Commit 59c2d9b

Browse files
authored
Output field descriptions as extra output for ollama which doesn't really support description (#220)
* Refactor `JsonSchemaBuilder` to a struct with states. * Dump description as extra output for ollama.
1 parent 15967c4 commit 59c2d9b

File tree

6 files changed

+191
-108
lines changed

6 files changed

+191
-108
lines changed

src/base/json_schema.rs

Lines changed: 164 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
use super::schema;
1+
use crate::utils::immutable::RefList;
2+
3+
use super::{schema, spec::FieldName};
4+
use anyhow::Result;
5+
use indexmap::IndexMap;
26
use schemars::schema::{
3-
ArrayValidation, InstanceType, Metadata, ObjectValidation, Schema, SchemaObject, SingleOrVec,
7+
ArrayValidation, InstanceType, ObjectValidation, Schema, SchemaObject, SingleOrVec,
48
};
9+
use std::fmt::Write;
510

611
pub struct ToJsonSchemaOptions {
712
/// If true, mark all fields as required.
@@ -11,16 +16,47 @@ pub struct ToJsonSchemaOptions {
1116

1217
/// If true, the JSON schema supports the `format` keyword.
1318
pub supports_format: bool,
19+
20+
/// If true, extract descriptions to a separate extra instruction.
21+
pub extract_descriptions: bool,
1422
}
1523

16-
pub trait ToJsonSchema {
17-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject;
24+
struct JsonSchemaBuilder {
25+
options: ToJsonSchemaOptions,
26+
extra_instructions_per_field: IndexMap<String, String>,
1827
}
1928

20-
impl ToJsonSchema for schema::BasicValueType {
21-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
29+
impl JsonSchemaBuilder {
30+
fn new(options: ToJsonSchemaOptions) -> Self {
31+
Self {
32+
options,
33+
extra_instructions_per_field: IndexMap::new(),
34+
}
35+
}
36+
37+
fn set_description(
38+
&mut self,
39+
schema: &mut SchemaObject,
40+
description: impl ToString,
41+
field_path: RefList<'_, &'_ FieldName>,
42+
) {
43+
if self.options.extract_descriptions {
44+
let mut fields: Vec<_> = field_path.iter().map(|f| f.as_str()).collect();
45+
fields.reverse();
46+
self.extra_instructions_per_field
47+
.insert(fields.join("."), description.to_string());
48+
} else {
49+
schema.metadata.get_or_insert_default().description = Some(description.to_string());
50+
}
51+
}
52+
53+
fn for_basic_value_type(
54+
&mut self,
55+
basic_type: &schema::BasicValueType,
56+
field_path: RefList<'_, &'_ FieldName>,
57+
) -> SchemaObject {
2258
let mut schema = SchemaObject::default();
23-
match self {
59+
match basic_type {
2460
schema::BasicValueType::Str => {
2561
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
2662
}
@@ -52,51 +88,66 @@ impl ToJsonSchema for schema::BasicValueType {
5288
max_items: Some(2),
5389
..Default::default()
5490
}));
55-
schema.metadata.get_or_insert_default().description =
56-
Some("A range, start pos (inclusive), end pos (exclusive).".to_string());
91+
self.set_description(
92+
&mut schema,
93+
"A range represented by a list of two positions, start pos (inclusive), end pos (exclusive).",
94+
field_path,
95+
);
5796
}
5897
schema::BasicValueType::Uuid => {
5998
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
60-
if options.supports_format {
99+
if self.options.supports_format {
61100
schema.format = Some("uuid".to_string());
62-
} else {
63-
schema.metadata.get_or_insert_default().description =
64-
Some("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000".to_string());
65101
}
102+
self.set_description(
103+
&mut schema,
104+
"A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000",
105+
field_path,
106+
);
66107
}
67108
schema::BasicValueType::Date => {
68109
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
69-
if options.supports_format {
110+
if self.options.supports_format {
70111
schema.format = Some("date".to_string());
71-
} else {
72-
schema.metadata.get_or_insert_default().description =
73-
Some("A date, e.g. 2025-03-27".to_string());
74112
}
113+
self.set_description(
114+
&mut schema,
115+
"A date in YYYY-MM-DD format, e.g. 2025-03-27",
116+
field_path,
117+
);
75118
}
76119
schema::BasicValueType::Time => {
77120
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
78-
if options.supports_format {
121+
if self.options.supports_format {
79122
schema.format = Some("time".to_string());
80-
} else {
81-
schema.metadata.get_or_insert_default().description =
82-
Some("A time, e.g. 13:32:12".to_string());
83123
}
124+
self.set_description(
125+
&mut schema,
126+
"A time in HH:MM:SS format, e.g. 13:32:12",
127+
field_path,
128+
);
84129
}
85130
schema::BasicValueType::LocalDateTime => {
86131
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
87-
if options.supports_format {
132+
if self.options.supports_format {
88133
schema.format = Some("date-time".to_string());
89134
}
90-
schema.metadata.get_or_insert_default().description =
91-
Some("Date time without timezone offset, e.g. 2025-03-27T13:32:12".to_string());
135+
self.set_description(
136+
&mut schema,
137+
"Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12",
138+
field_path,
139+
);
92140
}
93141
schema::BasicValueType::OffsetDateTime => {
94142
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
95-
if options.supports_format {
143+
if self.options.supports_format {
96144
schema.format = Some("date-time".to_string());
97145
}
98-
schema.metadata.get_or_insert_default().description =
99-
Some("Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00".to_string());
146+
self.set_description(
147+
&mut schema,
148+
"Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00",
149+
field_path,
150+
);
100151
}
101152
schema::BasicValueType::Json => {
102153
// Can be any value. No type constraint.
@@ -105,7 +156,8 @@ impl ToJsonSchema for schema::BasicValueType {
105156
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::Array)));
106157
schema.array = Some(Box::new(ArrayValidation {
107158
items: Some(SingleOrVec::Single(Box::new(
108-
s.element_type.to_json_schema(options).into(),
159+
self.for_basic_value_type(&s.element_type, field_path)
160+
.into(),
109161
))),
110162
min_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
111163
max_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
@@ -115,70 +167,106 @@ impl ToJsonSchema for schema::BasicValueType {
115167
}
116168
schema
117169
}
118-
}
119170

120-
impl ToJsonSchema for schema::StructSchema {
121-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
122-
SchemaObject {
123-
metadata: Some(Box::new(Metadata {
124-
description: self.description.as_ref().map(|s| s.to_string()),
125-
..Default::default()
126-
})),
127-
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
128-
object: Some(Box::new(ObjectValidation {
129-
properties: self
130-
.fields
131-
.iter()
132-
.map(|f| {
133-
let mut schema = f.value_type.to_json_schema(options);
134-
if options.fields_always_required && f.value_type.nullable {
135-
if let Some(instance_type) = &mut schema.instance_type {
136-
let mut types = match instance_type {
137-
SingleOrVec::Single(t) => vec![**t],
138-
SingleOrVec::Vec(t) => std::mem::take(t),
139-
};
140-
types.push(InstanceType::Null);
141-
*instance_type = SingleOrVec::Vec(types);
142-
}
171+
fn for_struct_schema(
172+
&mut self,
173+
struct_schema: &schema::StructSchema,
174+
field_path: RefList<'_, &'_ FieldName>,
175+
) -> SchemaObject {
176+
let mut schema = SchemaObject::default();
177+
if let Some(description) = &struct_schema.description {
178+
self.set_description(&mut schema, description, field_path);
179+
}
180+
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::Object)));
181+
schema.object = Some(Box::new(ObjectValidation {
182+
properties: struct_schema
183+
.fields
184+
.iter()
185+
.map(|f| {
186+
let mut schema =
187+
self.for_enriched_value_type(&f.value_type, field_path.prepend(&f.name));
188+
if self.options.fields_always_required && f.value_type.nullable {
189+
if let Some(instance_type) = &mut schema.instance_type {
190+
let mut types = match instance_type {
191+
SingleOrVec::Single(t) => vec![**t],
192+
SingleOrVec::Vec(t) => std::mem::take(t),
193+
};
194+
types.push(InstanceType::Null);
195+
*instance_type = SingleOrVec::Vec(types);
143196
}
144-
(f.name.to_string(), schema.into())
145-
})
146-
.collect(),
147-
required: self
148-
.fields
149-
.iter()
150-
.filter(|&f| (options.fields_always_required || !f.value_type.nullable))
151-
.map(|f| f.name.to_string())
152-
.collect(),
153-
additional_properties: Some(Schema::Bool(false).into()),
154-
..Default::default()
155-
})),
197+
}
198+
(f.name.to_string(), schema.into())
199+
})
200+
.collect(),
201+
required: struct_schema
202+
.fields
203+
.iter()
204+
.filter(|&f| (self.options.fields_always_required || !f.value_type.nullable))
205+
.map(|f| f.name.to_string())
206+
.collect(),
207+
additional_properties: Some(Schema::Bool(false).into()),
156208
..Default::default()
157-
}
209+
}));
210+
schema
158211
}
159-
}
160212

161-
impl ToJsonSchema for schema::ValueType {
162-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
163-
match self {
164-
schema::ValueType::Basic(b) => b.to_json_schema(options),
165-
schema::ValueType::Struct(s) => s.to_json_schema(options),
213+
fn for_value_type(
214+
&mut self,
215+
value_type: &schema::ValueType,
216+
field_path: RefList<'_, &'_ FieldName>,
217+
) -> SchemaObject {
218+
match value_type {
219+
schema::ValueType::Basic(b) => self.for_basic_value_type(b, field_path),
220+
schema::ValueType::Struct(s) => self.for_struct_schema(s, field_path),
166221
schema::ValueType::Collection(c) => SchemaObject {
167222
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Array))),
168223
array: Some(Box::new(ArrayValidation {
169224
items: Some(SingleOrVec::Single(Box::new(
170-
c.row.to_json_schema(options).into(),
225+
self.for_struct_schema(&c.row, field_path).into(),
171226
))),
172227
..Default::default()
173228
})),
174229
..Default::default()
175230
},
176231
}
177232
}
178-
}
179233

180-
impl ToJsonSchema for schema::EnrichedValueType {
181-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
182-
self.typ.to_json_schema(options)
234+
fn for_enriched_value_type(
235+
&mut self,
236+
enriched_value_type: &schema::EnrichedValueType,
237+
field_path: RefList<'_, &'_ FieldName>,
238+
) -> SchemaObject {
239+
self.for_value_type(&enriched_value_type.typ, field_path)
240+
}
241+
242+
fn build_extra_instructions(&self) -> Result<Option<String>> {
243+
if self.extra_instructions_per_field.is_empty() {
244+
return Ok(None);
245+
}
246+
247+
let mut instructions = String::new();
248+
write!(&mut instructions, "Instructions for specific fields:\n\n")?;
249+
for (field_path, instruction) in self.extra_instructions_per_field.iter() {
250+
write!(
251+
&mut instructions,
252+
"- {}: {}\n\n",
253+
if field_path.is_empty() {
254+
"(root object)"
255+
} else {
256+
field_path.as_str()
257+
},
258+
instruction
259+
)?;
260+
}
261+
Ok(Some(instructions))
183262
}
184263
}
264+
265+
pub fn build_json_schema(
266+
value_type: &schema::EnrichedValueType,
267+
options: ToJsonSchemaOptions,
268+
) -> Result<(SchemaObject, Option<String>)> {
269+
let mut builder = JsonSchemaBuilder::new(options);
270+
let schema = builder.for_enriched_value_type(value_type, RefList::Nil);
271+
Ok((schema, builder.build_extra_instructions()?))
272+
}

src/base/value.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{api_bail, api_error};
22

33
use super::schema::*;
4-
use anyhow::{Context, Result};
4+
use anyhow::Result;
55
use base64::prelude::*;
66
use chrono::Offset;
77
use log::warn;
@@ -10,7 +10,7 @@ use serde::{
1010
ser::{SerializeMap, SerializeSeq, SerializeTuple},
1111
Deserialize, Serialize,
1212
};
13-
use std::{collections::BTreeMap, ops::Deref, str::FromStr, sync::Arc};
13+
use std::{collections::BTreeMap, ops::Deref, sync::Arc};
1414

1515
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
1616
pub struct RangeValue {

src/llm/mod.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,7 @@ pub trait LlmGenerationClient: Send + Sync {
4747
request: LlmGenerateRequest<'req>,
4848
) -> Result<LlmGenerateResponse>;
4949

50-
/// If true, the LLM only accepts a JSON schema with all fields required.
51-
/// This is a limitation of LLM models such as OpenAI.
52-
/// Otherwise, the LLM will accept a JSON schema with optional fields.
53-
fn json_schema_fields_always_required(&self) -> bool {
54-
false
55-
}
56-
57-
/// If true, the LLM supports the `format` keyword in the JSON schema.
58-
fn json_schema_supports_format(&self) -> bool {
59-
true
60-
}
61-
62-
fn to_json_schema_options(&self) -> ToJsonSchemaOptions {
63-
ToJsonSchemaOptions {
64-
fields_always_required: self.json_schema_fields_always_required(),
65-
supports_format: self.json_schema_supports_format(),
66-
}
67-
}
50+
fn json_schema_options(&self) -> ToJsonSchemaOptions;
6851
}
6952

7053
mod ollama;

src/llm/ollama.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,12 @@ impl LlmGenerationClient for Client {
7575
text: json.response,
7676
})
7777
}
78+
79+
fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
80+
super::ToJsonSchemaOptions {
81+
fields_always_required: false,
82+
supports_format: true,
83+
extract_descriptions: true,
84+
}
85+
}
7886
}

src/llm/openai.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ impl LlmGenerationClient for Client {
9898
Ok(super::LlmGenerateResponse { text })
9999
}
100100

101-
fn json_schema_fields_always_required(&self) -> bool {
102-
true
103-
}
104-
105-
fn json_schema_supports_format(&self) -> bool {
106-
false
101+
fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
102+
super::ToJsonSchemaOptions {
103+
fields_always_required: true,
104+
supports_format: false,
105+
extract_descriptions: false,
106+
}
107107
}
108108
}

0 commit comments

Comments
 (0)