@@ -891,6 +891,7 @@ def handle_repetitions(min_times: int, max_times: int) -> None:
891
891
pos += 1
892
892
last_sym_start = out_elements .size ()
893
893
while pos [0 ] != '"' :
894
+ assert pos [0 ] is not None , "Unexpected end of input"
894
895
char_pair = parse_char (pos ) # type: Tuple[int, const_char_p]
895
896
pos = char_pair [1 ]
896
897
out_elements .push_back (
@@ -920,6 +921,7 @@ def handle_repetitions(min_times: int, max_times: int) -> None:
920
921
# : start_type;
921
922
# out_elements.push_back({type, char_pair.first});
922
923
while pos [0 ] != "]" :
924
+ assert pos [0 ] is not None , "Unexpected end of input"
923
925
char_pair = parse_char (pos ) # type: Tuple[int, const_char_p]
924
926
pos = char_pair [1 ]
925
927
_type = (
@@ -935,6 +937,7 @@ def handle_repetitions(min_times: int, max_times: int) -> None:
935
937
# }
936
938
# }
937
939
if pos [0 ] == "-" and pos [1 ] != "]" :
940
+ assert pos [1 ] is not None , "Unexpected end of input"
938
941
endchar_pair = parse_char (pos + 1 ) # type: Tuple[int, const_char_p]
939
942
pos = endchar_pair [1 ]
940
943
out_elements .push_back (
@@ -1159,33 +1162,59 @@ def parse_rule(state: parse_state, src: const_char_p) -> const_char_p:
1159
1162
elif pos [0 ]:
1160
1163
raise RuntimeError ("expecting newline or end at " + str (pos ))
1161
1164
return parse_space (pos , True )
1165
+
1166
+ #parse_state parse(const char * src) {
1167
+ # try {
1168
+ # parse_state state;
1169
+ # const char * pos = parse_space(src, true);
1170
+ # while (*pos) {
1171
+ # pos = parse_rule(state, pos);
1172
+ # }
1173
+ # // Validate the state to ensure that all rules are defined
1174
+ # for (const auto & rule : state.rules) {
1175
+ # for (const auto & elem : rule) {
1176
+ # if (elem.type == LLAMA_GRETYPE_RULE_REF) {
1177
+ # // Ensure that the rule at that location exists
1178
+ # if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
1179
+ # // Get the name of the rule that is missing
1180
+ # for (const auto & kv : state.symbol_ids) {
1181
+ # if (kv.second == elem.value) {
1182
+ # throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
1183
+ # }
1184
+ # }
1185
+ # }
1186
+ # }
1187
+ # }
1188
+ # }
1189
+ # return state;
1190
+ # } catch (const std::exception & err) {
1191
+ # fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
1192
+ # return parse_state();
1193
+ # }
1194
+ #}
1162
1195
1163
1196
1164
- # parse_state parse(const char * src) {
1165
- # try {
1166
- # parse_state state;
1167
- # const char * pos = parse_space(src, true);
1168
- # while (*pos) {
1169
- # pos = parse_rule(state, pos);
1170
- # }
1171
- # return state;
1172
- # } catch (const std::exception & err) {
1173
- # fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
1174
- # return parse_state();
1175
- # }
1176
- # }
1177
1197
def parse (src : const_char_p ) -> parse_state :
1178
1198
try :
1179
1199
state = parse_state () # type: parse_state
1180
1200
pos = parse_space (src , True ) # type: const_char_p
1181
1201
while pos [0 ]:
1182
1202
pos = parse_rule (state , pos )
1203
+ # Validate the state to ensure that all rules are defined
1204
+ for rule in state .rules :
1205
+ for elem in rule :
1206
+ if elem .type == llama_gretype .LLAMA_GRETYPE_RULE_REF :
1207
+ # Ensure that the rule at that location exists
1208
+ if elem .value >= len (state .rules ) or not state .rules [elem .value ]:
1209
+ # Get the name of the rule that is missing
1210
+ for kv in state .symbol_ids :
1211
+ if kv .second == elem .value :
1212
+ raise RuntimeError ("Undefined rule identifier '" + kv .first + "'" )
1183
1213
return state
1184
1214
except Exception as err :
1185
1215
print (f"{ parse .__name__ } : error parsing grammar: { err } " )
1186
1216
return parse_state ()
1187
1217
1188
-
1189
1218
# void print_grammar_char(FILE * file, uint32_t c) {
1190
1219
# if (0x20 <= c && c <= 0x7f) {
1191
1220
# fprintf(file, "%c", static_cast<char>(c));
@@ -1283,6 +1312,7 @@ def print_rule(
1283
1312
# }
1284
1313
1285
1314
1315
+
1286
1316
for i , elem in enumerate (rule [:- 1 ]):
1287
1317
case = elem .type # type: llama_gretype
1288
1318
if case is llama_gretype .LLAMA_GRETYPE_END :
0 commit comments