Skip to content

Commit fbf3596

Browse files
authored
gh-87092: change assembler to use instruction sequence instead of CFG (#103933)
1 parent 84e7d0f commit fbf3596

File tree

5 files changed

+92
-80
lines changed

5 files changed

+92
-80
lines changed

Include/internal/pycore_compile.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ PyAPI_FUNC(PyCodeObject*) _PyAST_Compile(
1919
int optimize,
2020
struct _arena *arena);
2121

22+
static const _PyCompilerSrcLocation NO_LOCATION = {-1, -1, -1, -1};
2223

2324
typedef struct {
2425
int optimize;
@@ -33,15 +34,21 @@ extern int _PyAST_Optimize(
3334
struct _arena *arena,
3435
_PyASTOptimizeState *state);
3536

37+
typedef struct {
38+
int h_offset;
39+
int h_startdepth;
40+
int h_preserve_lasti;
41+
} _PyCompile_ExceptHandlerInfo;
3642

3743
typedef struct {
3844
int i_opcode;
3945
int i_oparg;
4046
_PyCompilerSrcLocation i_loc;
41-
} _PyCompilerInstruction;
47+
_PyCompile_ExceptHandlerInfo i_except_handler_info;
48+
} _PyCompile_Instruction;
4249

4350
typedef struct {
44-
_PyCompilerInstruction *s_instrs;
51+
_PyCompile_Instruction *s_instrs;
4552
int s_allocated;
4653
int s_used;
4754

@@ -82,6 +89,8 @@ int _PyCompile_EnsureArrayLargeEnough(
8289

8390
int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
8491

92+
int _PyCompile_InstrSize(int opcode, int oparg);
93+
8594
/* Access compiler internals for unit testing */
8695

8796
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(

Include/internal/pycore_flowgraph.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ extern "C" {
1111
#include "pycore_opcode_utils.h"
1212
#include "pycore_compile.h"
1313

14-
static const _PyCompilerSrcLocation NO_LOCATION = {-1, -1, -1, -1};
1514

1615
typedef struct {
1716
int i_opcode;
@@ -97,7 +96,6 @@ int _PyCfg_OptimizeCodeUnit(_PyCfgBuilder *g, PyObject *consts, PyObject *const_
9796
int _PyCfg_Stackdepth(_PyCfgBasicblock *entryblock, int code_flags);
9897
void _PyCfg_ConvertExceptionHandlersToNops(_PyCfgBasicblock *entryblock);
9998
int _PyCfg_ResolveJumps(_PyCfgBuilder *g);
100-
int _PyCfg_InstrSize(_PyCfgInstruction *instruction);
10199

102100

103101
static inline int
@@ -113,7 +111,7 @@ basicblock_nofallthrough(const _PyCfgBasicblock *b) {
113111

114112
PyCodeObject *
115113
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *u, PyObject *const_cache,
116-
PyObject *consts, int maxdepth, _PyCfgBasicblock *entryblock,
114+
PyObject *consts, int maxdepth, _PyCompile_InstructionSequence *instrs,
117115
int nlocalsplus, int code_flags, PyObject *filename);
118116

119117
#ifdef __cplusplus

Python/assemble.c

+46-49
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#include <stdbool.h>
22

33
#include "Python.h"
4-
#include "pycore_flowgraph.h"
4+
#include "pycore_code.h" // write_location_entry_start()
55
#include "pycore_compile.h"
6+
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
67
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
7-
#include "pycore_code.h" // write_location_entry_start()
88

99

1010
#define DEFAULT_CODE_SIZE 128
@@ -22,8 +22,8 @@
2222
}
2323

2424
typedef _PyCompilerSrcLocation location;
25-
typedef _PyCfgInstruction cfg_instr;
26-
typedef _PyCfgBasicblock basicblock;
25+
typedef _PyCompile_Instruction instruction;
26+
typedef _PyCompile_InstructionSequence instr_sequence;
2727

2828
static inline bool
2929
same_location(location a, location b)
@@ -117,21 +117,22 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
117117
#define MAX_SIZE_OF_ENTRY 20
118118

119119
static int
120-
assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler)
120+
assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
121+
_PyCompile_ExceptHandlerInfo *handler)
121122
{
122123
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
123124
if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) {
124125
RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2));
125126
}
126127
int size = end-start;
127128
assert(end > start);
128-
int target = handler->b_offset;
129-
int depth = handler->b_startdepth - 1;
130-
if (handler->b_preserve_lasti) {
129+
int target = handler->h_offset;
130+
int depth = handler->h_startdepth - 1;
131+
if (handler->h_preserve_lasti) {
131132
depth -= 1;
132133
}
133134
assert(depth >= 0);
134-
int depth_lasti = (depth<<1) | handler->b_preserve_lasti;
135+
int depth_lasti = (depth<<1) | handler->h_preserve_lasti;
135136
assemble_emit_exception_table_item(a, start, (1<<7));
136137
assemble_emit_exception_table_item(a, size, 0);
137138
assemble_emit_exception_table_item(a, target, 0);
@@ -140,29 +141,26 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end, bas
140141
}
141142

142143
static int
143-
assemble_exception_table(struct assembler *a, basicblock *entryblock)
144+
assemble_exception_table(struct assembler *a, instr_sequence *instrs)
144145
{
145-
basicblock *b;
146146
int ioffset = 0;
147-
basicblock *handler = NULL;
147+
_PyCompile_ExceptHandlerInfo handler;
148+
handler.h_offset = -1;
148149
int start = -1;
149-
for (b = entryblock; b != NULL; b = b->b_next) {
150-
ioffset = b->b_offset;
151-
for (int i = 0; i < b->b_iused; i++) {
152-
cfg_instr *instr = &b->b_instr[i];
153-
if (instr->i_except != handler) {
154-
if (handler != NULL) {
155-
RETURN_IF_ERROR(
156-
assemble_emit_exception_table_entry(a, start, ioffset, handler));
157-
}
158-
start = ioffset;
159-
handler = instr->i_except;
150+
for (int i = 0; i < instrs->s_used; i++) {
151+
instruction *instr = &instrs->s_instrs[i];
152+
if (instr->i_except_handler_info.h_offset != handler.h_offset) {
153+
if (handler.h_offset >= 0) {
154+
RETURN_IF_ERROR(
155+
assemble_emit_exception_table_entry(a, start, ioffset, &handler));
160156
}
161-
ioffset += _PyCfg_InstrSize(instr);
157+
start = ioffset;
158+
handler = instr->i_except_handler_info;
162159
}
160+
ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
163161
}
164-
if (handler != NULL) {
165-
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, handler));
162+
if (handler.h_offset >= 0) {
163+
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
166164
}
167165
return SUCCESS;
168166
}
@@ -316,31 +314,31 @@ assemble_emit_location(struct assembler* a, location loc, int isize)
316314
}
317315

318316
static int
319-
assemble_location_info(struct assembler *a, basicblock *entryblock, int firstlineno)
317+
assemble_location_info(struct assembler *a, instr_sequence *instrs,
318+
int firstlineno)
320319
{
321320
a->a_lineno = firstlineno;
322321
location loc = NO_LOCATION;
323322
int size = 0;
324-
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
325-
for (int j = 0; j < b->b_iused; j++) {
326-
if (!same_location(loc, b->b_instr[j].i_loc)) {
323+
for (int i = 0; i < instrs->s_used; i++) {
324+
instruction *instr = &instrs->s_instrs[i];
325+
if (!same_location(loc, instr->i_loc)) {
327326
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
328-
loc = b->b_instr[j].i_loc;
327+
loc = instr->i_loc;
329328
size = 0;
330-
}
331-
size += _PyCfg_InstrSize(&b->b_instr[j]);
332329
}
330+
size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
333331
}
334332
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
335333
return SUCCESS;
336334
}
337335

338336
static void
339-
write_instr(_Py_CODEUNIT *codestr, cfg_instr *instruction, int ilen)
337+
write_instr(_Py_CODEUNIT *codestr, instruction *instr, int ilen)
340338
{
341-
int opcode = instruction->i_opcode;
339+
int opcode = instr->i_opcode;
342340
assert(!IS_PSEUDO_OPCODE(opcode));
343-
int oparg = instruction->i_oparg;
341+
int oparg = instr->i_oparg;
344342
assert(HAS_ARG(opcode) || oparg == 0);
345343
int caches = _PyOpcode_Caches[opcode];
346344
switch (ilen - caches) {
@@ -380,12 +378,12 @@ write_instr(_Py_CODEUNIT *codestr, cfg_instr *instruction, int ilen)
380378
*/
381379

382380
static int
383-
assemble_emit_instr(struct assembler *a, cfg_instr *i)
381+
assemble_emit_instr(struct assembler *a, instruction *instr)
384382
{
385383
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
386384
_Py_CODEUNIT *code;
387385

388-
int size = _PyCfg_InstrSize(i);
386+
int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
389387
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
390388
if (len > PY_SSIZE_T_MAX / 2) {
391389
return ERROR;
@@ -394,25 +392,24 @@ assemble_emit_instr(struct assembler *a, cfg_instr *i)
394392
}
395393
code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
396394
a->a_offset += size;
397-
write_instr(code, i, size);
395+
write_instr(code, instr, size);
398396
return SUCCESS;
399397
}
400398

401399
static int
402-
assemble_emit(struct assembler *a, basicblock *entryblock, int first_lineno,
403-
PyObject *const_cache)
400+
assemble_emit(struct assembler *a, instr_sequence *instrs,
401+
int first_lineno, PyObject *const_cache)
404402
{
405403
RETURN_IF_ERROR(assemble_init(a, first_lineno));
406404

407-
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
408-
for (int j = 0; j < b->b_iused; j++) {
409-
RETURN_IF_ERROR(assemble_emit_instr(a, &b->b_instr[j]));
410-
}
405+
for (int i = 0; i < instrs->s_used; i++) {
406+
instruction *instr = &instrs->s_instrs[i];
407+
RETURN_IF_ERROR(assemble_emit_instr(a, instr));
411408
}
412409

413-
RETURN_IF_ERROR(assemble_location_info(a, entryblock, a->a_lineno));
410+
RETURN_IF_ERROR(assemble_location_info(a, instrs, a->a_lineno));
414411

415-
RETURN_IF_ERROR(assemble_exception_table(a, entryblock));
412+
RETURN_IF_ERROR(assemble_exception_table(a, instrs));
416413

417414
RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off));
418415
RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table));
@@ -586,13 +583,13 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
586583

587584
PyCodeObject *
588585
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
589-
PyObject *consts, int maxdepth, basicblock *entryblock,
586+
PyObject *consts, int maxdepth, instr_sequence *instrs,
590587
int nlocalsplus, int code_flags, PyObject *filename)
591588
{
592589
PyCodeObject *co = NULL;
593590

594591
struct assembler a;
595-
int res = assemble_emit(&a, entryblock, umd->u_firstlineno, const_cache);
592+
int res = assemble_emit(&a, instrs, umd->u_firstlineno, const_cache);
596593
if (res == SUCCESS) {
597594
co = makecode(umd, &a, const_cache, consts, maxdepth, nlocalsplus,
598595
code_flags, filename);

Python/compile.c

+27-13
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,18 @@ enum {
149149
COMPILER_SCOPE_COMPREHENSION,
150150
};
151151

152-
typedef _PyCompilerInstruction instruction;
152+
153+
int
154+
_PyCompile_InstrSize(int opcode, int oparg)
155+
{
156+
assert(!IS_PSEUDO_OPCODE(opcode));
157+
assert(HAS_ARG(opcode) || oparg == 0);
158+
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
159+
int caches = _PyOpcode_Caches[opcode];
160+
return extended_args + 1 + caches;
161+
}
162+
163+
typedef _PyCompile_Instruction instruction;
153164
typedef _PyCompile_InstructionSequence instr_sequence;
154165

155166
#define INITIAL_INSTR_SEQUENCE_SIZE 100
@@ -6968,10 +6979,6 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
69686979
goto error;
69696980
}
69706981

6971-
if (cfg_to_instr_sequence(&g, &optimized_instrs) < 0) {
6972-
goto error;
6973-
}
6974-
69756982
/** Assembly **/
69766983
int nlocalsplus = prepare_localsplus(u, &g, code_flags);
69776984
if (nlocalsplus < 0) {
@@ -6990,15 +6997,15 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
69906997
if (_PyCfg_ResolveJumps(&g) < 0) {
69916998
goto error;
69926999
}
7000+
7001+
/* Can't modify the bytecode after computing jump offsets. */
7002+
69937003
if (cfg_to_instr_sequence(&g, &optimized_instrs) < 0) {
69947004
goto error;
69957005
}
69967006

6997-
6998-
/* Can't modify the bytecode after computing jump offsets. */
6999-
70007007
co = _PyAssemble_MakeCodeObject(&u->u_metadata, const_cache, consts,
7001-
maxdepth, g.g_entryblock, nlocalsplus,
7008+
maxdepth, &optimized_instrs, nlocalsplus,
70027009
code_flags, filename);
70037010

70047011
error:
@@ -7039,11 +7046,18 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
70397046
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
70407047
for (int i = 0; i < b->b_iused; i++) {
70417048
cfg_instr *instr = &b->b_instr[i];
7042-
int arg = HAS_TARGET(instr->i_opcode) ?
7043-
instr->i_target->b_label.id :
7044-
instr->i_oparg;
70457049
RETURN_IF_ERROR(
7046-
instr_sequence_addop(seq, instr->i_opcode, arg, instr->i_loc));
7050+
instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
7051+
7052+
_PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
7053+
if (instr->i_except != NULL) {
7054+
hi->h_offset = instr->i_except->b_offset;
7055+
hi->h_startdepth = instr->i_except->b_startdepth;
7056+
hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
7057+
}
7058+
else {
7059+
hi->h_offset = -1;
7060+
}
70477061
}
70487062
}
70497063
return SUCCESS;

Python/flowgraph.c

+7-13
Original file line numberDiff line numberDiff line change
@@ -166,24 +166,18 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) {
166166
return SUCCESS;
167167
}
168168

169-
int
170-
_PyCfg_InstrSize(cfg_instr *instruction)
169+
static int
170+
instr_size(cfg_instr *instruction)
171171
{
172-
int opcode = instruction->i_opcode;
173-
assert(!IS_PSEUDO_OPCODE(opcode));
174-
int oparg = instruction->i_oparg;
175-
assert(HAS_ARG(opcode) || oparg == 0);
176-
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
177-
int caches = _PyOpcode_Caches[opcode];
178-
return extended_args + 1 + caches;
172+
return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg);
179173
}
180174

181175
static int
182176
blocksize(basicblock *b)
183177
{
184178
int size = 0;
185179
for (int i = 0; i < b->b_iused; i++) {
186-
size += _PyCfg_InstrSize(&b->b_instr[i]);
180+
size += instr_size(&b->b_instr[i]);
187181
}
188182
return size;
189183
}
@@ -492,7 +486,7 @@ resolve_jump_offsets(basicblock *entryblock)
492486
bsize = b->b_offset;
493487
for (int i = 0; i < b->b_iused; i++) {
494488
cfg_instr *instr = &b->b_instr[i];
495-
int isize = _PyCfg_InstrSize(instr);
489+
int isize = instr_size(instr);
496490
/* jump offsets are computed relative to
497491
* the instruction pointer after fetching
498492
* the jump instruction.
@@ -508,7 +502,7 @@ resolve_jump_offsets(basicblock *entryblock)
508502
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
509503
instr->i_oparg -= bsize;
510504
}
511-
if (_PyCfg_InstrSize(instr) != isize) {
505+
if (instr_size(instr) != isize) {
512506
extended_arg_recompile = 1;
513507
}
514508
}
@@ -520,7 +514,7 @@ resolve_jump_offsets(basicblock *entryblock)
520514
with a better solution.
521515
522516
The issue is that in the first loop blocksize() is called
523-
which calls _PyCfg_InstrSize() which requires i_oparg be set
517+
which calls instr_size() which requires i_oparg be set
524518
appropriately. There is a bootstrap problem because
525519
i_oparg is calculated in the second loop above.
526520

0 commit comments

Comments
 (0)