Use Token.done field instead of Token.more

chqrlie · chqrlie · commit ff685ef9d757 · 2025-03-17T13:02:51.000+01:00
* use `done` positive end of parse indicator instead of `more`.
* optimize `c2recipe.Token` struct layout
* simplify `Tokenizer.init()`
* initialize `result` token in `lex_internal`. This is more consistent and token
  was not properly initialized in multiple cases, including lookahead
* pass `result` to `parse_ppexpr()` for `done` flag to be properly updated
diff --git a/compiler/c2recipe_parser.c2 b/compiler/c2recipe_parser.c2
@@ -73,15 +73,14 @@ const char*[] kind_names = {
 static_assert(elemsof(Kind), elemsof(kind_names));
 
 type Token struct {
-    Kind kind;
     SrcLoc loc;
-    bool more;
+    Kind kind;
+    bool done;
     u32 value;  // allocated in StringPool
 }
 
 fn void Token.init(Token* t) {
     string.memset(t, 0, sizeof(Token));
-    t.more = true;
 }
 
 type Parser struct {
@@ -115,7 +114,7 @@ fn bool Parser.parse(Recipe* recipe, string_pool.Pool* pool, source_mgr.SourceMg
     p.token.init();
 
 #if 0
-    while (p.token.more) {
+    while (!p.token.done) {
         p.consumeToken();
     }
 #else
@@ -184,7 +183,7 @@ fn void Parser.lex(Parser* p, Token* result) {
             p.cur--;
             result.loc = p.loc_start + cast<SrcLoc>(p.cur - p.input_start);
             result.kind = Kind.Eof;
-            result.more = false;
+            result.done = true;
             return;
         case ' ':
         case '\t':
diff --git a/parser/c2_parser.c2 b/parser/c2_parser.c2
@@ -94,7 +94,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
                      true);
     p.tok.init();
     u32 token_count = 0;
-    while (p.tok.more) {
+    while (!p.tok.done) {
         p.tokenizer.lex(&p.tok);
         p.dump_token(&p.tok);
         p.tok.has_error = 0;
@@ -120,7 +120,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
         p.parseModule(is_generated);
         p.parseImports();
 
-        while (p.tok.more) {
+        while (!p.tok.done) {
             p.parseTopLevel();
         }
     }
diff --git a/parser/c2_tokenizer.c2 b/parser/c2_tokenizer.c2
@@ -283,26 +283,18 @@ public fn void Tokenizer.init(Tokenizer* t,
                               const string_list.List* features,
                               bool raw_mode)
 {
+    string.memset(t, 0, sizeof(Tokenizer));
     t.cur = input;
     t.loc_start = loc_start;
     t.input_start = input;
     t.kwinfo = kwinfo;
 
-    for (u32 i=0; i<MaxLookahead; i++) {
-        t.next[i].init();
-    }
-    t.next_count = 0;
-    t.next_head = 0;
     t.line_start = input;
     t.pool = pool;
     t.buf = buf;
 
-    string.memset(&t.feature_stack, 0, sizeof(t.feature_stack));
-    t.feature_count = 0;
     t.features = features;
     t.raw_mode = raw_mode;
-    t.stop_at_eol = false;
-    t.error_msg[0] = 0;
 }
 
 public fn void Tokenizer.lex(Tokenizer* t, Token* result) {
@@ -320,6 +312,7 @@ fn void Tokenizer.lex_internal(Tokenizer* t, Token* result) {
     // TODO if end/error stop (dont retry) (t.done = 1)
 
     while (1) {
+        result.init();
         result.loc = t.loc_start + cast<SrcLoc>(t.cur - t.input_start);
         Action act = Char_lookup[cast<u8>(*t.cur)];
         switch (act) {
@@ -650,7 +643,7 @@ fn void Tokenizer.lex_internal(Tokenizer* t, Token* result) {
                 return;
             }
             result.kind = Kind.Eof;
-            result.more = false;
+            result.done = true;
             return;
         }
     }
@@ -692,7 +685,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
     result.loc = t.loc_start + cast<SrcLoc>(t.cur - t.input_start);
     result.kind = Kind.Error;
     result.error_msg = t.error_msg;
-    result.more = false;
+    result.done = true;
     result.has_error = true;
 }
 
@@ -1006,7 +999,7 @@ too_large:
 // Returns how much to shift in source code (0 = error)
 fn u32 Tokenizer.lex_escaped_char(Tokenizer* t, Token* result, const char* stype) {
     // Note: t.cur is on '\'
-    const char* input = t.cur + 1;  // skip backspace
+    const char* input = t.cur + 1;  // skip backslash
     switch (input[0]) {
     case 0:
     case '\r':
@@ -1139,7 +1132,6 @@ fn void Tokenizer.lex_char_literal(Tokenizer* t, Token* result) {
             t.error(result, "multi-character character constant");
         } else {
             t.error(result, "missing terminating ' character (GOT %c)", *t.cur);
-            //t.error(result, "missing terminating ' character");
         }
         return;
     }
@@ -1357,36 +1349,35 @@ type Operand struct {
     u8 prec;
 }
 
-fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
+fn i64 Tokenizer.parse_ppexpr(Tokenizer* t, Token *result) {
     Operand[MAX_LEVEL] stack;
     Operand *sp;
     Kind op;
     u8 prec;
     i64 val = 0;
-    Token tok;
     bool prefix = true;
 
     for (sp = stack;;) {
-        op = t.lex_preproc(&tok);
+        op = t.lex_preproc(result);
         if (prefix) {
             switch (op) {
             case Identifier:
                 val = 0;
-                const char *id = t.pool.idx2str(tok.text_idx);
+                const char *id = t.pool.idx2str(result.text_idx);
                 if (!string.strcmp(id, "defined")) {
                     bool has_paren = false;
-                    if (t.lex_preproc(&tok) == Kind.LParen) {
+                    if (t.lex_preproc(result) == Kind.LParen) {
                         has_paren = true;
-                        t.lex_preproc(&tok);
+                        t.lex_preproc(result);
                     }
-                    if (tok.kind == Kind.Identifier) {
-                        id = t.pool.idx2str(tok.text_idx);
+                    if (result.kind == Kind.Identifier) {
+                        id = t.pool.idx2str(result.text_idx);
                     } else {
-                        t.error(&tok, "missing identifier after 'defined'");
+                        t.error(result, "missing identifier after 'defined'");
                         return 0;
                     }
                     if (has_paren) {
-                        if (t.lex_preproc(&tok) != Kind.RParen)
+                        if (t.lex_preproc(result) != Kind.RParen)
                             goto syntax_error;
                     }
                     val = t.features.contains(id);
@@ -1398,11 +1389,11 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
                 continue;
             case IntegerLiteral:
                 // TODO: handle signed/unsigned issues
-                val = cast<i64>(tok.int_value);
+                val = cast<i64>(result.int_value);
                 prefix = false;
                 continue;
             case CharLiteral:
-                val = tok.char_value;
+                val = result.char_value;
                 prefix = false;
                 continue;
             case LParen:
@@ -1423,15 +1414,15 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
             default:
                 break;
             }
-            t.error(&tok, "missing operand in preprocessor expression");
+            t.error(result, "missing operand in preprocessor expression");
             return 0;
         }
         switch (op) {
         case Identifier:
         case IntegerLiteral:
         case CharLiteral:
         case LParen:
-            t.error(&tok, "missing operator in preprocessor expression");
+            t.error(result, "missing operator in preprocessor expression");
             return 0;
         default:
             break;
@@ -1510,8 +1501,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
             prec = 13;
             break;
         default:
-            t.error(&tok, "invalid token in preprocessor expression '%s'",
-                    tok.kind.str());
+            t.error(result, "invalid token in preprocessor expression '%s'", result.kind.str());
             return 0;
         }
 
@@ -1521,7 +1511,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
             switch (sp.op) {
             case LParen:
                 if (op != Kind.RParen) {
-                    t.error(&tok, "missing parenthesis in preprocessor expression");
+                    t.error(result, "missing parenthesis in preprocessor expression");
                     return 0;
                 }
                 op = Kind.None;
@@ -1589,8 +1579,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
                 }
                 fallthrough;
             default:
-                t.error(&tok, "invalid token in preprocessor expression '%s'",
-                        sp.op.str());
+                t.error(result, "invalid token in preprocessor expression '%s'", sp.op.str());
                 return 0;
             }
             break;
@@ -1599,7 +1588,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
             break;
         if (sp >= stack + MAX_LEVEL) {
         too_deep:
-            t.error(&tok, "preprocessor expression too complex");
+            t.error(result, "preprocessor expression too complex");
             return 0;
         }
         sp.val = val;
@@ -1609,7 +1598,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
     }
     if (sp > stack) {
     syntax_error:
-        t.error(&tok, "syntax error in preprocessor expression");
+        t.error(result, "syntax error in preprocessor expression");
         return 0;
     }
     return val;
@@ -1650,7 +1639,7 @@ fn bool Tokenizer.handle_if(Tokenizer* t, Token* result, Kind kind) {
     }
 
     if (kind == Kind.Feat_if || kind == Kind.Feat_elif) {
-        if (!t.parse_ppexpr())
+        if (!t.parse_ppexpr(result))
             top.skipping = 1;
     } else {
         /* handle Kind.Feat_ifdef, Kind.Feat_ifndef */
diff --git a/parser/token.c2 b/parser/token.c2
@@ -301,7 +301,7 @@ public type Radix enum u8 {
 public type Token struct {
     SrcLoc loc;
     Kind kind;
-    bool more;
+    bool done;
     bool has_error;
     Radix radix;   // Radix: for IntegerLiteral (2,8,10,16), FloatLiteral(10,16) and CharLiteral (8,16)
     union {
@@ -320,7 +320,6 @@ static_assert(16, sizeof(Token));
 
 public fn void Token.init(Token* tok) {
     string.memset(tok, 0, sizeof(Token));
-    tok.more = true;
 }
 
 public type KWInfo struct {
diff --git a/tools/c2cat.c2 b/tools/c2cat.c2
@@ -123,7 +123,7 @@ fn void C2cat.print_token(C2cat* ctx, const Token* tok) {
 
     if (ctx.offset != 0) {
         // copy stuff from file to out (from end of last token to start of current)
-        if (!tok.more) return;
+        if (tok.done) return;
         if (ctx.offset <= tok.loc) {
             u32 len = tok.loc - ctx.offset;
             if (len) out.add2(ctx.input + ctx.offset, len);
@@ -300,7 +300,7 @@ public fn i32 c2cat(const char* filename)
     Token tok;
     tok.init();
 
-    while (tok.more) {
+    while (!tok.done) {
         tokenizer.lex(&tok);
         //printf("%4d %s\n", tok.loc, tok.kind.str());
 
@@ -319,7 +319,7 @@ public fn i32 c2cat(const char* filename)
         ctx.out.color(col_normal);
         ctx.out.add1('\n');
     }
-    printf("%s", ctx.out.data());
+    fputs(ctx.out.data(), stdout);
     fflush(stdout);
 
     ctx.pool.free();