Skip to content

Commit ff685ef

Browse files
committed
Use Token.done field instead of Token.more
* use `done` positive end of parse indicator instead of `more`. * optimize `c2recipe.Token` struct layout * simplify `Tokenizer.init()` * initialize `result` token in `lex_internal`. This is more consistent and token was not properly initialized in multiple cases, including lookahead * pass `result` to `parse_ppexpr()` for `done` flag to be properly updated
1 parent 64329a7 commit ff685ef

File tree

5 files changed

+34
-47
lines changed

5 files changed

+34
-47
lines changed

compiler/c2recipe_parser.c2

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,14 @@ const char*[] kind_names = {
7373
static_assert(elemsof(Kind), elemsof(kind_names));
7474

7575
type Token struct {
76-
Kind kind;
7776
SrcLoc loc;
78-
bool more;
77+
Kind kind;
78+
bool done;
7979
u32 value; // allocated in StringPool
8080
}
8181

8282
fn void Token.init(Token* t) {
8383
string.memset(t, 0, sizeof(Token));
84-
t.more = true;
8584
}
8685

8786
type Parser struct {
@@ -115,7 +114,7 @@ fn bool Parser.parse(Recipe* recipe, string_pool.Pool* pool, source_mgr.SourceMg
115114
p.token.init();
116115

117116
#if 0
118-
while (p.token.more) {
117+
while (!p.token.done) {
119118
p.consumeToken();
120119
}
121120
#else
@@ -184,7 +183,7 @@ fn void Parser.lex(Parser* p, Token* result) {
184183
p.cur--;
185184
result.loc = p.loc_start + cast<SrcLoc>(p.cur - p.input_start);
186185
result.kind = Kind.Eof;
187-
result.more = false;
186+
result.done = true;
188187
return;
189188
case ' ':
190189
case '\t':

parser/c2_parser.c2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
9494
true);
9595
p.tok.init();
9696
u32 token_count = 0;
97-
while (p.tok.more) {
97+
while (!p.tok.done) {
9898
p.tokenizer.lex(&p.tok);
9999
p.dump_token(&p.tok);
100100
p.tok.has_error = 0;
@@ -120,7 +120,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
120120
p.parseModule(is_generated);
121121
p.parseImports();
122122

123-
while (p.tok.more) {
123+
while (!p.tok.done) {
124124
p.parseTopLevel();
125125
}
126126
}

parser/c2_tokenizer.c2

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -283,26 +283,18 @@ public fn void Tokenizer.init(Tokenizer* t,
283283
const string_list.List* features,
284284
bool raw_mode)
285285
{
286+
string.memset(t, 0, sizeof(Tokenizer));
286287
t.cur = input;
287288
t.loc_start = loc_start;
288289
t.input_start = input;
289290
t.kwinfo = kwinfo;
290291

291-
for (u32 i=0; i<MaxLookahead; i++) {
292-
t.next[i].init();
293-
}
294-
t.next_count = 0;
295-
t.next_head = 0;
296292
t.line_start = input;
297293
t.pool = pool;
298294
t.buf = buf;
299295

300-
string.memset(&t.feature_stack, 0, sizeof(t.feature_stack));
301-
t.feature_count = 0;
302296
t.features = features;
303297
t.raw_mode = raw_mode;
304-
t.stop_at_eol = false;
305-
t.error_msg[0] = 0;
306298
}
307299

308300
public fn void Tokenizer.lex(Tokenizer* t, Token* result) {
@@ -320,6 +312,7 @@ fn void Tokenizer.lex_internal(Tokenizer* t, Token* result) {
320312
// TODO if end/error stop (dont retry) (t.done = 1)
321313

322314
while (1) {
315+
result.init();
323316
result.loc = t.loc_start + cast<SrcLoc>(t.cur - t.input_start);
324317
Action act = Char_lookup[cast<u8>(*t.cur)];
325318
switch (act) {
@@ -650,7 +643,7 @@ fn void Tokenizer.lex_internal(Tokenizer* t, Token* result) {
650643
return;
651644
}
652645
result.kind = Kind.Eof;
653-
result.more = false;
646+
result.done = true;
654647
return;
655648
}
656649
}
@@ -692,7 +685,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
692685
result.loc = t.loc_start + cast<SrcLoc>(t.cur - t.input_start);
693686
result.kind = Kind.Error;
694687
result.error_msg = t.error_msg;
695-
result.more = false;
688+
result.done = true;
696689
result.has_error = true;
697690
}
698691

@@ -1006,7 +999,7 @@ too_large:
1006999
// Returns how much to shift in source code (0 = error)
10071000
fn u32 Tokenizer.lex_escaped_char(Tokenizer* t, Token* result, const char* stype) {
10081001
// Note: t.cur is on '\'
1009-
const char* input = t.cur + 1; // skip backspace
1002+
const char* input = t.cur + 1; // skip backslash
10101003
switch (input[0]) {
10111004
case 0:
10121005
case '\r':
@@ -1139,7 +1132,6 @@ fn void Tokenizer.lex_char_literal(Tokenizer* t, Token* result) {
11391132
t.error(result, "multi-character character constant");
11401133
} else {
11411134
t.error(result, "missing terminating ' character (GOT %c)", *t.cur);
1142-
//t.error(result, "missing terminating ' character");
11431135
}
11441136
return;
11451137
}
@@ -1357,36 +1349,35 @@ type Operand struct {
13571349
u8 prec;
13581350
}
13591351

1360-
fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
1352+
fn i64 Tokenizer.parse_ppexpr(Tokenizer* t, Token *result) {
13611353
Operand[MAX_LEVEL] stack;
13621354
Operand *sp;
13631355
Kind op;
13641356
u8 prec;
13651357
i64 val = 0;
1366-
Token tok;
13671358
bool prefix = true;
13681359

13691360
for (sp = stack;;) {
1370-
op = t.lex_preproc(&tok);
1361+
op = t.lex_preproc(result);
13711362
if (prefix) {
13721363
switch (op) {
13731364
case Identifier:
13741365
val = 0;
1375-
const char *id = t.pool.idx2str(tok.text_idx);
1366+
const char *id = t.pool.idx2str(result.text_idx);
13761367
if (!string.strcmp(id, "defined")) {
13771368
bool has_paren = false;
1378-
if (t.lex_preproc(&tok) == Kind.LParen) {
1369+
if (t.lex_preproc(result) == Kind.LParen) {
13791370
has_paren = true;
1380-
t.lex_preproc(&tok);
1371+
t.lex_preproc(result);
13811372
}
1382-
if (tok.kind == Kind.Identifier) {
1383-
id = t.pool.idx2str(tok.text_idx);
1373+
if (result.kind == Kind.Identifier) {
1374+
id = t.pool.idx2str(result.text_idx);
13841375
} else {
1385-
t.error(&tok, "missing identifier after 'defined'");
1376+
t.error(result, "missing identifier after 'defined'");
13861377
return 0;
13871378
}
13881379
if (has_paren) {
1389-
if (t.lex_preproc(&tok) != Kind.RParen)
1380+
if (t.lex_preproc(result) != Kind.RParen)
13901381
goto syntax_error;
13911382
}
13921383
val = t.features.contains(id);
@@ -1398,11 +1389,11 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
13981389
continue;
13991390
case IntegerLiteral:
14001391
// TODO: handle signed/unsigned issues
1401-
val = cast<i64>(tok.int_value);
1392+
val = cast<i64>(result.int_value);
14021393
prefix = false;
14031394
continue;
14041395
case CharLiteral:
1405-
val = tok.char_value;
1396+
val = result.char_value;
14061397
prefix = false;
14071398
continue;
14081399
case LParen:
@@ -1423,15 +1414,15 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
14231414
default:
14241415
break;
14251416
}
1426-
t.error(&tok, "missing operand in preprocessor expression");
1417+
t.error(result, "missing operand in preprocessor expression");
14271418
return 0;
14281419
}
14291420
switch (op) {
14301421
case Identifier:
14311422
case IntegerLiteral:
14321423
case CharLiteral:
14331424
case LParen:
1434-
t.error(&tok, "missing operator in preprocessor expression");
1425+
t.error(result, "missing operator in preprocessor expression");
14351426
return 0;
14361427
default:
14371428
break;
@@ -1510,8 +1501,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
15101501
prec = 13;
15111502
break;
15121503
default:
1513-
t.error(&tok, "invalid token in preprocessor expression '%s'",
1514-
tok.kind.str());
1504+
t.error(result, "invalid token in preprocessor expression '%s'", result.kind.str());
15151505
return 0;
15161506
}
15171507

@@ -1521,7 +1511,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
15211511
switch (sp.op) {
15221512
case LParen:
15231513
if (op != Kind.RParen) {
1524-
t.error(&tok, "missing parenthesis in preprocessor expression");
1514+
t.error(result, "missing parenthesis in preprocessor expression");
15251515
return 0;
15261516
}
15271517
op = Kind.None;
@@ -1589,8 +1579,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
15891579
}
15901580
fallthrough;
15911581
default:
1592-
t.error(&tok, "invalid token in preprocessor expression '%s'",
1593-
sp.op.str());
1582+
t.error(result, "invalid token in preprocessor expression '%s'", sp.op.str());
15941583
return 0;
15951584
}
15961585
break;
@@ -1599,7 +1588,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
15991588
break;
16001589
if (sp >= stack + MAX_LEVEL) {
16011590
too_deep:
1602-
t.error(&tok, "preprocessor expression too complex");
1591+
t.error(result, "preprocessor expression too complex");
16031592
return 0;
16041593
}
16051594
sp.val = val;
@@ -1609,7 +1598,7 @@ fn i64 Tokenizer.parse_ppexpr(Tokenizer* t) {
16091598
}
16101599
if (sp > stack) {
16111600
syntax_error:
1612-
t.error(&tok, "syntax error in preprocessor expression");
1601+
t.error(result, "syntax error in preprocessor expression");
16131602
return 0;
16141603
}
16151604
return val;
@@ -1650,7 +1639,7 @@ fn bool Tokenizer.handle_if(Tokenizer* t, Token* result, Kind kind) {
16501639
}
16511640

16521641
if (kind == Kind.Feat_if || kind == Kind.Feat_elif) {
1653-
if (!t.parse_ppexpr())
1642+
if (!t.parse_ppexpr(result))
16541643
top.skipping = 1;
16551644
} else {
16561645
/* handle Kind.Feat_ifdef, Kind.Feat_ifndef */

parser/token.c2

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ public type Radix enum u8 {
301301
public type Token struct {
302302
SrcLoc loc;
303303
Kind kind;
304-
bool more;
304+
bool done;
305305
bool has_error;
306306
Radix radix; // Radix: for IntegerLiteral (2,8,10,16), FloatLiteral(10,16) and CharLiteral (8,16)
307307
union {
@@ -320,7 +320,6 @@ static_assert(16, sizeof(Token));
320320

321321
public fn void Token.init(Token* tok) {
322322
string.memset(tok, 0, sizeof(Token));
323-
tok.more = true;
324323
}
325324

326325
public type KWInfo struct {

tools/c2cat.c2

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ fn void C2cat.print_token(C2cat* ctx, const Token* tok) {
123123

124124
if (ctx.offset != 0) {
125125
// copy stuff from file to out (from end of last token to start of current)
126-
if (!tok.more) return;
126+
if (tok.done) return;
127127
if (ctx.offset <= tok.loc) {
128128
u32 len = tok.loc - ctx.offset;
129129
if (len) out.add2(ctx.input + ctx.offset, len);
@@ -300,7 +300,7 @@ public fn i32 c2cat(const char* filename)
300300
Token tok;
301301
tok.init();
302302

303-
while (tok.more) {
303+
while (!tok.done) {
304304
tokenizer.lex(&tok);
305305
//printf("%4d %s\n", tok.loc, tok.kind.str());
306306

@@ -319,7 +319,7 @@ public fn i32 c2cat(const char* filename)
319319
ctx.out.color(col_normal);
320320
ctx.out.add1('\n');
321321
}
322-
printf("%s", ctx.out.data());
322+
fputs(ctx.out.data(), stdout);
323323
fflush(stdout);
324324

325325
ctx.pool.free();

0 commit comments

Comments
 (0)