Skip to content

Commit 3dac5ae

Browse files
authored
[Refactor][Breaking] Rename start->begin in StructuralTag (#221)
This PR renames the field "start" to "begin" in StructuralTagItem. The name "begin" better aligns with the naming convention in python.
1 parent 6996ded commit 3dac5ae

File tree

7 files changed

+32
-32
lines changed

7 files changed

+32
-32
lines changed

cpp/grammar_compiler.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ template <>
2222
struct hash<xgrammar::StructuralTagItem> {
2323
size_t operator()(const xgrammar::StructuralTagItem& tag) const {
2424
return xgrammar::HashCombine(
25-
std::hash<std::string>{}(tag.start),
25+
std::hash<std::string>{}(tag.begin),
2626
std::hash<std::string>{}(tag.schema),
2727
std::hash<std::string>{}(tag.end)
2828
);

cpp/grammar_functor.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -639,11 +639,11 @@ class StructuralTagGrammarCreatorImpl : public SubGrammarAdder {
639639
std::vector<int32_t> seq_elements;
640640
seq_elements.reserve(3);
641641

642-
// Add start suffix (everything after trigger)
643-
XGRAMMAR_DCHECK(tag.start.size() >= triggers[i].size())
644-
<< "Tag start must be at least as long as trigger";
645-
if (tag.start.size() > triggers[i].size()) {
646-
seq_elements.push_back(builder_.AddByteString(tag.start.substr(triggers[i].size())));
642+
// Add begin suffix (everything after trigger)
643+
XGRAMMAR_DCHECK(tag.begin.size() >= triggers[i].size())
644+
<< "Tag begin must be at least as long as trigger";
645+
if (tag.begin.size() > triggers[i].size()) {
646+
seq_elements.push_back(builder_.AddByteString(tag.begin.substr(triggers[i].size())));
647647
}
648648

649649
// Create and visit schema grammar for this tag

cpp/structural_tag.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,24 +44,24 @@ Grammar StructuralTagToGrammar(
4444
bool found = false;
4545
for (int it_trigger = 0; it_trigger < static_cast<int>(sorted_triggers.size()); ++it_trigger) {
4646
const auto& trigger = sorted_triggers[it_trigger];
47-
if (trigger.size() <= tag.start.size() &&
48-
std::string_view(tag.start).substr(0, trigger.size()) == trigger) {
47+
if (trigger.size() <= tag.begin.size() &&
48+
std::string_view(tag.begin).substr(0, trigger.size()) == trigger) {
4949
tag_groups[it_trigger].push_back(std::make_pair(tag, schema_grammars[it_tag]));
5050
found = true;
5151
break;
5252
}
5353
}
54-
XGRAMMAR_CHECK(found) << "Tag " << tag.start << " does not match any trigger";
54+
XGRAMMAR_CHECK(found) << "Tag " << tag.begin << " does not match any trigger";
5555
}
5656

5757
// Step 3: Combine the tags to form a grammar
5858
// root ::= TagDispatch((trigger1, rule1), (trigger2, rule2), ...)
5959
// Suppose tag1 and tag2 matches trigger1, then
60-
// rule1 ::= (tag1.start[trigger1.size():] + ToEBNF(tag1.schema) + tag1.end) |
61-
// (tag2.start[trigger1.size():] + ToEBNF(tag2.schema) + tag2.end) | ...
60+
// rule1 ::= (tag1.begin[trigger1.size():] + ToEBNF(tag1.schema) + tag1.end) |
61+
// (tag2.begin[trigger1.size():] + ToEBNF(tag2.schema) + tag2.end) | ...
6262
//
6363
// Suppose tag3 matches trigger2, then
64-
// rule2 ::= (tag3.start[trigger2.size():] + ToEBNF(tag3.schema) + tag3.end)
64+
// rule2 ::= (tag3.begin[trigger2.size():] + ToEBNF(tag3.schema) + tag3.end)
6565
//
6666
// ...
6767
return StructuralTagGrammarCreator::Apply(sorted_triggers, tag_groups);

include/xgrammar/grammar.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
namespace xgrammar {
1818

1919
struct StructuralTagItem {
20-
std::string start;
20+
std::string begin;
2121
std::string schema;
2222
std::string end;
2323

2424
bool operator==(const StructuralTagItem& other) const {
25-
return start == other.start && schema == other.schema && end == other.end;
25+
return begin == other.begin && schema == other.schema && end == other.end;
2626
}
2727
};
2828

python/xgrammar/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def compile_structural_tag(
147147
compiled_grammar : CompiledGrammar
148148
The compiled grammar.
149149
"""
150-
tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
150+
tags_tuple = [(tag.begin, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
151151
return CompiledGrammar._create_from_handle(
152152
self._handle.compile_structural_tag(tags_tuple, triggers)
153153
)

python/xgrammar/grammar.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ class StructuralTagItem(BaseModel):
1313
1414
Attributes
1515
----------
16-
start : str
17-
The start tag.
16+
begin : str
17+
The begin tag.
1818
1919
schema_ : Union[str, Type[BaseModel]]
2020
The schema.
@@ -23,7 +23,7 @@ class StructuralTagItem(BaseModel):
2323
The end tag.
2424
"""
2525

26-
start: str
26+
begin: str
2727
schema_: Union[str, Type[BaseModel]] = Field(alias="schema")
2828
end: str
2929

@@ -190,14 +190,14 @@ def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> "
190190
The tags parameter is used to specify the output pattern. It is especially useful for LLM
191191
function calling, where the pattern is:
192192
<function=func_name>{"arg1": ..., "arg2": ...}</function>.
193-
This pattern consists of three parts: a start tag (<function=func_name>), a parameter list
193+
This pattern consists of three parts: a begin tag (<function=func_name>), a parameter list
194194
according to some schema ({"arg1": ..., "arg2": ...}), and an end tag (</function>). This
195-
pattern can be described in a StructuralTagItem with a start tag, a schema, and an end tag.
195+
pattern can be described in a StructuralTagItem with a begin tag, a schema, and an end tag.
196196
The structural tag is able to handle multiple such patterns by passing them into multiple
197197
tags.
198198
199199
The triggers parameter is used to trigger the dispatching of different grammars. The trigger
200-
should be a prefix of a provided start tag. When the trigger is encountered, the
200+
should be a prefix of a provided begin tag. When the trigger is encountered, the
201201
corresponding tag should be used to constrain the following output. There can be multiple
202202
tags matching the same trigger. Then if the trigger is encountered, the following output
203203
should match one of the tags. For example, in function calling, the triggers can be
@@ -235,13 +235,13 @@ def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> "
235235
>>> arg3: float
236236
>>> arg4: List[str]
237237
>>> tags = [
238-
>>> StructuralTagItem(start="<function=f>", schema=Schema1, end="</function>"),
239-
>>> StructuralTagItem(start="<function=g>", schema=Schema2, end="</function>"),
238+
>>> StructuralTagItem(begin="<function=f>", schema=Schema1, end="</function>"),
239+
>>> StructuralTagItem(begin="<function=g>", schema=Schema2, end="</function>"),
240240
>>> ]
241241
>>> triggers = ["<function="]
242242
>>> grammar = Grammar.from_structural_tag(tags, triggers)
243243
"""
244-
tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
244+
tags_tuple = [(tag.begin, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
245245
return Grammar._create_from_handle(_core.Grammar.from_structural_tag(tags_tuple, triggers))
246246

247247
@staticmethod

tests/python/test_grammar_matcher_tag_dispatch.py renamed to tests/python/test_grammar_matcher_structural_tag.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ class Schema2(BaseModel):
171171
arg4: List[str]
172172

173173
tags = [
174-
xgr.StructuralTagItem(start="<function=f1>", schema=Schema1, end="</function>"),
175-
xgr.StructuralTagItem(start="<function=f2>", schema=Schema1, end="</function>"),
176-
xgr.StructuralTagItem(start="<function=g>", schema=Schema2, end="</function>"),
174+
xgr.StructuralTagItem(begin="<function=f1>", schema=Schema1, end="</function>"),
175+
xgr.StructuralTagItem(begin="<function=f2>", schema=Schema1, end="</function>"),
176+
xgr.StructuralTagItem(begin="<function=g>", schema=Schema2, end="</function>"),
177177
]
178178
# in real cases, we should use one trigger: "<function=" and dispatch to two tags
179179
# but here we use two triggers for testing such cases
@@ -203,9 +203,9 @@ class Schema2(BaseModel):
203203
arg4: List[str]
204204

205205
tags = [
206-
xgr.StructuralTagItem(start="<function=f1>", schema=Schema1, end="</function>"),
207-
xgr.StructuralTagItem(start="<function=f2>", schema=Schema1, end="</function>"),
208-
xgr.StructuralTagItem(start="<function=g>", schema=Schema2, end="</function>"),
206+
xgr.StructuralTagItem(begin="<function=f1>", schema=Schema1, end="</function>"),
207+
xgr.StructuralTagItem(begin="<function=f2>", schema=Schema1, end="</function>"),
208+
xgr.StructuralTagItem(begin="<function=g>", schema=Schema2, end="</function>"),
209209
]
210210

211211
# in real cases, we should use one trigger: "<function=" and dispatch to two tags
@@ -232,10 +232,10 @@ class Schema2(BaseModel):
232232
# Set up grammar from schemas
233233
tags = [
234234
xgr.StructuralTagItem(
235-
start="<function=f>", schema=json.dumps(Schema1.model_json_schema()), end="</function>"
235+
begin="<function=f>", schema=json.dumps(Schema1.model_json_schema()), end="</function>"
236236
),
237237
xgr.StructuralTagItem(
238-
start="<function=g>", schema=json.dumps(Schema2.model_json_schema()), end="</function>"
238+
begin="<function=g>", schema=json.dumps(Schema2.model_json_schema()), end="</function>"
239239
),
240240
]
241241
triggers = ["<function=f", "<function=g"]

0 commit comments

Comments
 (0)