diff --git a/cpp/grammar_compiler.cc b/cpp/grammar_compiler.cc index 58706a24..7fddb3f3 100644 --- a/cpp/grammar_compiler.cc +++ b/cpp/grammar_compiler.cc @@ -22,7 +22,7 @@ template <> struct hash { size_t operator()(const xgrammar::StructuralTagItem& tag) const { return xgrammar::HashCombine( - std::hash{}(tag.start), + std::hash{}(tag.begin), std::hash{}(tag.schema), std::hash{}(tag.end) ); diff --git a/cpp/grammar_functor.cc b/cpp/grammar_functor.cc index 653e38fe..09a2928e 100644 --- a/cpp/grammar_functor.cc +++ b/cpp/grammar_functor.cc @@ -639,11 +639,11 @@ class StructuralTagGrammarCreatorImpl : public SubGrammarAdder { std::vector seq_elements; seq_elements.reserve(3); - // Add start suffix (everything after trigger) - XGRAMMAR_DCHECK(tag.start.size() >= triggers[i].size()) - << "Tag start must be at least as long as trigger"; - if (tag.start.size() > triggers[i].size()) { - seq_elements.push_back(builder_.AddByteString(tag.start.substr(triggers[i].size()))); + // Add begin suffix (everything after trigger) + XGRAMMAR_DCHECK(tag.begin.size() >= triggers[i].size()) + << "Tag begin must be at least as long as trigger"; + if (tag.begin.size() > triggers[i].size()) { + seq_elements.push_back(builder_.AddByteString(tag.begin.substr(triggers[i].size()))); } // Create and visit schema grammar for this tag diff --git a/cpp/structural_tag.cc b/cpp/structural_tag.cc index 9d3e9b56..f7d4196d 100644 --- a/cpp/structural_tag.cc +++ b/cpp/structural_tag.cc @@ -44,24 +44,24 @@ Grammar StructuralTagToGrammar( bool found = false; for (int it_trigger = 0; it_trigger < static_cast(sorted_triggers.size()); ++it_trigger) { const auto& trigger = sorted_triggers[it_trigger]; - if (trigger.size() <= tag.start.size() && - std::string_view(tag.start).substr(0, trigger.size()) == trigger) { + if (trigger.size() <= tag.begin.size() && + std::string_view(tag.begin).substr(0, trigger.size()) == trigger) { tag_groups[it_trigger].push_back(std::make_pair(tag, schema_grammars[it_tag])); found = true; break; } } - XGRAMMAR_CHECK(found) << "Tag " << tag.start << " does not match any trigger"; + XGRAMMAR_CHECK(found) << "Tag " << tag.begin << " does not match any trigger"; } // Step 3: Combine the tags to form a grammar // root ::= TagDispatch((trigger1, rule1), (trigger2, rule2), ...) // Suppose tag1 and tag2 matches trigger1, then - // rule1 ::= (tag1.start[trigger1.size():] + ToEBNF(tag1.schema) + tag1.end) | - // (tag2.start[trigger1.size():] + ToEBNF(tag2.schema) + tag2.end) | ... + // rule1 ::= (tag1.begin[trigger1.size():] + ToEBNF(tag1.schema) + tag1.end) | + // (tag2.begin[trigger1.size():] + ToEBNF(tag2.schema) + tag2.end) | ... // // Suppose tag3 matches trigger2, then - // rule2 ::= (tag3.start[trigger2.size():] + ToEBNF(tag3.schema) + tag3.end) + // rule2 ::= (tag3.begin[trigger2.size():] + ToEBNF(tag3.schema) + tag3.end) // // ... return StructuralTagGrammarCreator::Apply(sorted_triggers, tag_groups); diff --git a/include/xgrammar/grammar.h b/include/xgrammar/grammar.h index 7f2ce965..616b5a2b 100644 --- a/include/xgrammar/grammar.h +++ b/include/xgrammar/grammar.h @@ -17,12 +17,12 @@ namespace xgrammar { struct StructuralTagItem { - std::string start; + std::string begin; std::string schema; std::string end; bool operator==(const StructuralTagItem& other) const { - return start == other.start && schema == other.schema && end == other.end; + return begin == other.begin && schema == other.schema && end == other.end; } }; diff --git a/python/xgrammar/compiler.py b/python/xgrammar/compiler.py index b5736b4b..cdf2c7e6 100644 --- a/python/xgrammar/compiler.py +++ b/python/xgrammar/compiler.py @@ -147,7 +147,7 @@ def compile_structural_tag( compiled_grammar : CompiledGrammar The compiled grammar. """ - tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] + tags_tuple = [(tag.begin, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] return CompiledGrammar._create_from_handle( self._handle.compile_structural_tag(tags_tuple, triggers) ) diff --git a/python/xgrammar/grammar.py b/python/xgrammar/grammar.py index 8cf33582..7e01489f 100644 --- a/python/xgrammar/grammar.py +++ b/python/xgrammar/grammar.py @@ -13,8 +13,8 @@ class StructuralTagItem(BaseModel): Attributes ---------- - start : str - The start tag. + begin : str + The begin tag. schema_ : Union[str, Type[BaseModel]] The schema. @@ -23,7 +23,7 @@ class StructuralTagItem(BaseModel): The end tag. """ - start: str + begin: str schema_: Union[str, Type[BaseModel]] = Field(alias="schema") end: str @@ -190,14 +190,14 @@ def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> " The tags parameter is used to specify the output pattern. It is especially useful for LLM function calling, where the pattern is: {"arg1": ..., "arg2": ...}. - This pattern consists of three parts: a start tag (), a parameter list + This pattern consists of three parts: a begin tag (), a parameter list according to some schema ({"arg1": ..., "arg2": ...}), and an end tag (). This - pattern can be described in a StructuralTagItem with a start tag, a schema, and an end tag. + pattern can be described in a StructuralTagItem with a begin tag, a schema, and an end tag. The structural tag is able to handle multiple such patterns by passing them into multiple tags. The triggers parameter is used to trigger the dispatching of different grammars. The trigger - should be a prefix of a provided start tag. When the trigger is encountered, the + should be a prefix of a provided begin tag. When the trigger is encountered, the corresponding tag should be used to constrain the following output. There can be multiple tags matching the same trigger. Then if the trigger is encountered, the following output should match one of the tags. For example, in function calling, the triggers can be @@ -235,13 +235,13 @@ def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> " >>> arg3: float >>> arg4: List[str] >>> tags = [ - >>> StructuralTagItem(start="", schema=Schema1, end=""), - >>> StructuralTagItem(start="", schema=Schema2, end=""), + >>> StructuralTagItem(begin="", schema=Schema1, end=""), + >>> StructuralTagItem(begin="", schema=Schema2, end=""), >>> ] >>> triggers = [">> grammar = Grammar.from_structural_tag(tags, triggers) """ - tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] + tags_tuple = [(tag.begin, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] return Grammar._create_from_handle(_core.Grammar.from_structural_tag(tags_tuple, triggers)) @staticmethod diff --git a/tests/python/test_grammar_matcher_tag_dispatch.py b/tests/python/test_grammar_matcher_structural_tag.py similarity index 96% rename from tests/python/test_grammar_matcher_tag_dispatch.py rename to tests/python/test_grammar_matcher_structural_tag.py index 75ef2aac..a2456a3d 100644 --- a/tests/python/test_grammar_matcher_tag_dispatch.py +++ b/tests/python/test_grammar_matcher_structural_tag.py @@ -171,9 +171,9 @@ class Schema2(BaseModel): arg4: List[str] tags = [ - xgr.StructuralTagItem(start="", schema=Schema1, end=""), - xgr.StructuralTagItem(start="", schema=Schema1, end=""), - xgr.StructuralTagItem(start="", schema=Schema2, end=""), + xgr.StructuralTagItem(begin="", schema=Schema1, end=""), + xgr.StructuralTagItem(begin="", schema=Schema1, end=""), + xgr.StructuralTagItem(begin="", schema=Schema2, end=""), ] # in real cases, we should use one trigger: "", schema=json.dumps(Schema1.model_json_schema()), end="" + begin="", schema=json.dumps(Schema1.model_json_schema()), end="" ), xgr.StructuralTagItem( - start="", schema=json.dumps(Schema2.model_json_schema()), end="" + begin="", schema=json.dumps(Schema2.model_json_schema()), end="" ), ] triggers = ["