micropython/py/emitnative.c

2810 lines
115 KiB
C
Raw Normal View History

/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2013, 2014 Damien P. George
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
// Essentially normal Python has 1 type: Python objects
// Viper has more than 1 type, and is just a more complicated (a superset of) Python.
// If you declare everything in Viper as a Python object (ie omit type decls) then
// it should in principle be exactly the same as Python native.
// Having types means having more opcodes, like binary_op_nat_nat, binary_op_nat_obj etc.
// In practice we won't have a VM but rather do this in asm which is actually very minimal.
// Because it breaks strict Python equivalence it should be a completely separate
// decorator. It breaks equivalence because overflow on integers wraps around.
// It shouldn't break equivalence if you don't use the new types, but since the
// type decls might be used in normal Python for other reasons, it's probably safest,
// cleanest and clearest to make it a separate decorator.
// Actually, it does break equivalence because integers default to native integers,
// not Python objects.
// for x in l[0:8]: can be compiled into a native loop if l has pointer type
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "py/emit.h"
#include "py/bc.h"
#if MICROPY_DEBUG_VERBOSE // print debugging info
#define DEBUG_PRINT (1)
#define DEBUG_printf DEBUG_printf
#else // don't print debugging info
#define DEBUG_printf(...) (void)0
#endif
// wrapper around everything in this file
#if N_X64 || N_X86 || N_THUMB || N_ARM || N_XTENSA
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// C stack layout for native functions:
// 0: nlr_buf_t [optional]
// emit->code_state_start: mp_code_state_t
// emit->stack_start: Python object stack | emit->n_state
// locals (reversed, L0 at end) |
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
//
// C stack layout for native generator functions:
// 0=emit->stack_start: nlr_buf_t
//
// Then REG_GENERATOR_STATE points to:
// 0=emit->code_state_start: mp_code_state_t
// emit->stack_start: Python object stack | emit->n_state
// locals (reversed, L0 at end) |
//
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// C stack layout for viper functions:
// 0: nlr_buf_t [optional]
// emit->code_state_start: fun_obj, old_globals [optional]
// emit->stack_start: Python object stack | emit->n_state
// locals (reversed, L0 at end) |
// (L0-L2 may be in regs instead)
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Word index of nlr_buf_t.ret_val
#define NLR_BUF_IDX_RET_VAL (1)
// Whether the viper function needs access to fun_obj
#define NEED_FUN_OBJ(emit) ((emit)->scope->exc_stack_size > 0 \
|| ((emit)->scope->scope_flags & (MP_SCOPE_FLAG_REFGLOBALS | MP_SCOPE_FLAG_HASCONSTS)))
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Whether the native/viper function needs to be wrapped in an exception handler
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
#define NEED_GLOBAL_EXC_HANDLER(emit) ((emit)->scope->exc_stack_size > 0 \
|| ((emit)->scope->scope_flags & (MP_SCOPE_FLAG_GENERATOR | MP_SCOPE_FLAG_REFGLOBALS)))
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Whether registers can be used to store locals (only true if there are no
// exception handlers, because otherwise an nlr_jump will restore registers to
// their state at the start of the function and updates to locals will be lost)
#define CAN_USE_REGS_FOR_LOCALS(emit) ((emit)->scope->exc_stack_size == 0 && !(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR))
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Indices within the local C stack for various variables
#define LOCAL_IDX_EXC_VAL(emit) (NLR_BUF_IDX_RET_VAL)
#define LOCAL_IDX_EXC_HANDLER_PC(emit) (NLR_BUF_IDX_LOCAL_1)
#define LOCAL_IDX_EXC_HANDLER_UNWIND(emit) (NLR_BUF_IDX_LOCAL_2)
#define LOCAL_IDX_RET_VAL(emit) (NLR_BUF_IDX_LOCAL_3)
#define LOCAL_IDX_FUN_OBJ(emit) ((emit)->code_state_start + offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t))
#define LOCAL_IDX_OLD_GLOBALS(emit) ((emit)->code_state_start + offsetof(mp_code_state_t, ip) / sizeof(uintptr_t))
#define LOCAL_IDX_GEN_PC(emit) ((emit)->code_state_start + offsetof(mp_code_state_t, ip) / sizeof(uintptr_t))
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
#define LOCAL_IDX_LOCAL_VAR(emit, local_num) ((emit)->stack_start + (emit)->n_state - 1 - (local_num))
#define REG_GENERATOR_STATE (REG_LOCAL_3)
#define EMIT_NATIVE_VIPER_TYPE_ERROR(emit, ...) do { \
*emit->error_slot = mp_obj_new_exception_msg_varg(&mp_type_ViperTypeError, __VA_ARGS__); \
} while (0)
typedef enum {
STACK_VALUE,
STACK_REG,
STACK_IMM,
} stack_info_kind_t;
// these enums must be distinct and the bottom 4 bits
// must correspond to the correct MP_NATIVE_TYPE_xxx value
typedef enum {
VTYPE_PYOBJ = 0x00 | MP_NATIVE_TYPE_OBJ,
VTYPE_BOOL = 0x00 | MP_NATIVE_TYPE_BOOL,
VTYPE_INT = 0x00 | MP_NATIVE_TYPE_INT,
VTYPE_UINT = 0x00 | MP_NATIVE_TYPE_UINT,
VTYPE_PTR = 0x00 | MP_NATIVE_TYPE_PTR,
VTYPE_PTR8 = 0x00 | MP_NATIVE_TYPE_PTR8,
VTYPE_PTR16 = 0x00 | MP_NATIVE_TYPE_PTR16,
VTYPE_PTR32 = 0x00 | MP_NATIVE_TYPE_PTR32,
VTYPE_PTR_NONE = 0x50 | MP_NATIVE_TYPE_PTR,
VTYPE_UNBOUND = 0x60 | MP_NATIVE_TYPE_OBJ,
VTYPE_BUILTIN_CAST = 0x70 | MP_NATIVE_TYPE_OBJ,
} vtype_kind_t;
int mp_native_type_from_qstr(qstr qst) {
switch (qst) {
case MP_QSTR_object: return MP_NATIVE_TYPE_OBJ;
case MP_QSTR_bool: return MP_NATIVE_TYPE_BOOL;
case MP_QSTR_int: return MP_NATIVE_TYPE_INT;
case MP_QSTR_uint: return MP_NATIVE_TYPE_UINT;
case MP_QSTR_ptr: return MP_NATIVE_TYPE_PTR;
case MP_QSTR_ptr8: return MP_NATIVE_TYPE_PTR8;
case MP_QSTR_ptr16: return MP_NATIVE_TYPE_PTR16;
case MP_QSTR_ptr32: return MP_NATIVE_TYPE_PTR32;
default: return -1;
}
}
STATIC qstr vtype_to_qstr(vtype_kind_t vtype) {
switch (vtype) {
case VTYPE_PYOBJ: return MP_QSTR_object;
case VTYPE_BOOL: return MP_QSTR_bool;
case VTYPE_INT: return MP_QSTR_int;
case VTYPE_UINT: return MP_QSTR_uint;
case VTYPE_PTR: return MP_QSTR_ptr;
case VTYPE_PTR8: return MP_QSTR_ptr8;
case VTYPE_PTR16: return MP_QSTR_ptr16;
case VTYPE_PTR32: return MP_QSTR_ptr32;
case VTYPE_PTR_NONE: default: return MP_QSTR_None;
}
}
typedef struct _stack_info_t {
vtype_kind_t vtype;
stack_info_kind_t kind;
union {
int u_reg;
mp_int_t u_imm;
} data;
} stack_info_t;
#define UNWIND_LABEL_UNUSED (0x7fff)
#define UNWIND_LABEL_DO_FINAL_UNWIND (0x7ffe)
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
typedef struct _exc_stack_entry_t {
uint16_t label : 15;
uint16_t is_finally : 1;
uint16_t unwind_label : 15;
uint16_t is_active : 1;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
} exc_stack_entry_t;
struct _emit_t {
mp_obj_t *error_slot;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
uint *label_slot;
uint exit_label;
int pass;
bool do_viper_types;
mp_uint_t local_vtype_alloc;
vtype_kind_t *local_vtype;
mp_uint_t stack_info_alloc;
stack_info_t *stack_info;
vtype_kind_t saved_stack_vtype;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
size_t exc_stack_alloc;
size_t exc_stack_size;
exc_stack_entry_t *exc_stack;
int prelude_offset;
int start_offset;
int n_state;
uint16_t code_state_start;
uint16_t stack_start;
int stack_size;
uint16_t const_table_cur_obj;
uint16_t const_table_num_obj;
uint16_t const_table_cur_raw_code;
mp_uint_t *const_table;
#if MICROPY_PERSISTENT_CODE_SAVE
uint16_t qstr_link_cur;
mp_qstr_link_entry_t *qstr_link;
#endif
bool last_emit_was_return_value;
scope_t *scope;
ASM_T *as;
};
STATIC const uint8_t reg_local_table[REG_LOCAL_NUM] = {REG_LOCAL_1, REG_LOCAL_2, REG_LOCAL_3};
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
STATIC void emit_native_global_exc_entry(emit_t *emit);
STATIC void emit_native_global_exc_exit(emit_t *emit);
STATIC void emit_native_load_const_obj(emit_t *emit, mp_obj_t obj);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_t *EXPORT_FUN(new)(mp_obj_t *error_slot, uint *label_slot, mp_uint_t max_num_labels) {
emit_t *emit = m_new0(emit_t, 1);
emit->error_slot = error_slot;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit->label_slot = label_slot;
emit->stack_info_alloc = 8;
emit->stack_info = m_new(stack_info_t, emit->stack_info_alloc);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit->exc_stack_alloc = 8;
emit->exc_stack = m_new(exc_stack_entry_t, emit->exc_stack_alloc);
emit->as = m_new0(ASM_T, 1);
mp_asm_base_init(&emit->as->base, max_num_labels);
return emit;
}
void EXPORT_FUN(free)(emit_t *emit) {
mp_asm_base_deinit(&emit->as->base, false);
m_del_obj(ASM_T, emit->as);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
m_del(exc_stack_entry_t, emit->exc_stack, emit->exc_stack_alloc);
m_del(vtype_kind_t, emit->local_vtype, emit->local_vtype_alloc);
m_del(stack_info_t, emit->stack_info, emit->stack_info_alloc);
m_del_obj(emit_t, emit);
}
STATIC void emit_call_with_imm_arg(emit_t *emit, mp_fun_kind_t fun_kind, mp_int_t arg_val, int arg_reg);
STATIC void emit_native_mov_reg_const(emit_t *emit, int reg_dest, int const_val) {
ASM_LOAD_REG_REG_OFFSET(emit->as, reg_dest, REG_FUN_TABLE, const_val);
}
STATIC void emit_native_mov_state_reg(emit_t *emit, int local_num, int reg_src) {
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
ASM_STORE_REG_REG_OFFSET(emit->as, reg_src, REG_GENERATOR_STATE, local_num);
} else {
ASM_MOV_LOCAL_REG(emit->as, local_num, reg_src);
}
}
STATIC void emit_native_mov_reg_state(emit_t *emit, int reg_dest, int local_num) {
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
ASM_LOAD_REG_REG_OFFSET(emit->as, reg_dest, REG_GENERATOR_STATE, local_num);
} else {
ASM_MOV_REG_LOCAL(emit->as, reg_dest, local_num);
}
}
STATIC void emit_native_mov_reg_state_addr(emit_t *emit, int reg_dest, int local_num) {
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
ASM_MOV_REG_IMM(emit->as, reg_dest, local_num * ASM_WORD_SIZE);
ASM_ADD_REG_REG(emit->as, reg_dest, REG_GENERATOR_STATE);
} else {
ASM_MOV_REG_LOCAL_ADDR(emit->as, reg_dest, local_num);
}
}
STATIC void emit_native_mov_reg_qstr(emit_t *emit, int arg_reg, qstr qst) {
#if MICROPY_PERSISTENT_CODE_SAVE
size_t loc = ASM_MOV_REG_IMM_FIX_U16(emit->as, arg_reg, qst);
size_t link_idx = emit->qstr_link_cur++;
if (emit->pass == MP_PASS_EMIT) {
emit->qstr_link[link_idx].off = loc << 2 | 1;
emit->qstr_link[link_idx].qst = qst;
}
#else
ASM_MOV_REG_IMM(emit->as, arg_reg, qst);
#endif
}
STATIC void emit_native_mov_reg_qstr_obj(emit_t *emit, int reg_dest, qstr qst) {
#if MICROPY_PERSISTENT_CODE_SAVE
size_t loc = ASM_MOV_REG_IMM_FIX_WORD(emit->as, reg_dest, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
size_t link_idx = emit->qstr_link_cur++;
if (emit->pass == MP_PASS_EMIT) {
emit->qstr_link[link_idx].off = loc << 2 | 2;
emit->qstr_link[link_idx].qst = qst;
}
#else
ASM_MOV_REG_IMM(emit->as, reg_dest, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
#endif
}
#define emit_native_mov_state_imm_via(emit, local_num, imm, reg_temp) \
do { \
ASM_MOV_REG_IMM((emit)->as, (reg_temp), (imm)); \
emit_native_mov_state_reg((emit), (local_num), (reg_temp)); \
} while (false)
STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
DEBUG_printf("start_pass(pass=%u, scope=%p)\n", pass, scope);
emit->pass = pass;
emit->do_viper_types = scope->emit_options == MP_EMIT_OPT_VIPER;
emit->stack_size = 0;
emit->const_table_cur_obj = 0;
emit->const_table_cur_raw_code = 0;
#if MICROPY_PERSISTENT_CODE_SAVE
emit->qstr_link_cur = 0;
#endif
emit->last_emit_was_return_value = false;
emit->scope = scope;
// allocate memory for keeping track of the types of locals
if (emit->local_vtype_alloc < scope->num_locals) {
emit->local_vtype = m_renew(vtype_kind_t, emit->local_vtype, emit->local_vtype_alloc, scope->num_locals);
emit->local_vtype_alloc = scope->num_locals;
}
// set default type for arguments
mp_uint_t num_args = emit->scope->num_pos_args + emit->scope->num_kwonly_args;
if (scope->scope_flags & MP_SCOPE_FLAG_VARARGS) {
num_args += 1;
}
if (scope->scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) {
num_args += 1;
}
for (mp_uint_t i = 0; i < num_args; i++) {
emit->local_vtype[i] = VTYPE_PYOBJ;
}
// Set viper type for arguments
if (emit->do_viper_types) {
for (int i = 0; i < emit->scope->id_info_len; ++i) {
id_info_t *id = &emit->scope->id_info[i];
if (id->flags & ID_FLAG_IS_PARAM) {
assert(id->local_num < emit->local_vtype_alloc);
emit->local_vtype[id->local_num] = id->flags >> ID_FLAG_VIPER_TYPE_POS;
}
}
}
// local variables begin unbound, and have unknown type
for (mp_uint_t i = num_args; i < emit->local_vtype_alloc; i++) {
emit->local_vtype[i] = VTYPE_UNBOUND;
}
// values on stack begin unbound
for (mp_uint_t i = 0; i < emit->stack_info_alloc; i++) {
emit->stack_info[i].kind = STACK_VALUE;
emit->stack_info[i].vtype = VTYPE_UNBOUND;
}
mp_asm_base_start_pass(&emit->as->base, pass == MP_PASS_EMIT ? MP_ASM_PASS_EMIT : MP_ASM_PASS_COMPUTE);
// generate code for entry to function
// Work out start of code state (mp_code_state_t or reduced version for viper)
emit->code_state_start = 0;
if (NEED_GLOBAL_EXC_HANDLER(emit)) {
emit->code_state_start = sizeof(nlr_buf_t) / sizeof(uintptr_t);
}
if (emit->do_viper_types) {
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Work out size of state (locals plus stack)
// n_state counts all stack and locals, even those in registers
emit->n_state = scope->num_locals + scope->stack_size;
int num_locals_in_regs = 0;
if (CAN_USE_REGS_FOR_LOCALS(emit)) {
num_locals_in_regs = scope->num_locals;
if (num_locals_in_regs > REG_LOCAL_NUM) {
num_locals_in_regs = REG_LOCAL_NUM;
}
// Need a spot for REG_LOCAL_3 if 4 or more args (see below)
if (scope->num_pos_args >= 4) {
--num_locals_in_regs;
}
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Work out where the locals and Python stack start within the C stack
if (NEED_GLOBAL_EXC_HANDLER(emit)) {
// Reserve 2 words for function object and old globals
emit->stack_start = emit->code_state_start + 2;
} else if (scope->scope_flags & MP_SCOPE_FLAG_HASCONSTS) {
// Reserve 1 word for function object, to access const table
emit->stack_start = emit->code_state_start + 1;
} else {
emit->stack_start = emit->code_state_start + 0;
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Entry to function
ASM_ENTRY(emit->as, emit->stack_start + emit->n_state - num_locals_in_regs);
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
#endif
// Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, 0);
// Store function object (passed as first arg) to stack if needed
if (NEED_FUN_OBJ(emit)) {
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1);
}
// Put n_args in REG_ARG_1, n_kw in REG_ARG_2, args array in REG_LOCAL_3
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_1);
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_2);
asm_x86_mov_arg_to_r32(emit->as, 3, REG_LOCAL_3);
#else
ASM_MOV_REG_REG(emit->as, REG_ARG_1, REG_ARG_2);
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_3);
ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_4);
#endif
// Check number of args matches this function, and call mp_arg_check_num_sig if not
ASM_JUMP_IF_REG_NONZERO(emit->as, REG_ARG_2, *emit->label_slot + 4, true);
ASM_MOV_REG_IMM(emit->as, REG_ARG_3, scope->num_pos_args);
ASM_JUMP_IF_REG_EQ(emit->as, REG_ARG_1, REG_ARG_3, *emit->label_slot + 5);
mp_asm_base_label_assign(&emit->as->base, *emit->label_slot + 4);
ASM_MOV_REG_IMM(emit->as, REG_ARG_3, MP_OBJ_FUN_MAKE_SIG(scope->num_pos_args, scope->num_pos_args, false));
ASM_CALL_IND(emit->as, MP_F_ARG_CHECK_NUM_SIG);
mp_asm_base_label_assign(&emit->as->base, *emit->label_slot + 5);
// Store arguments into locals (reg or stack), converting to native if needed
for (int i = 0; i < emit->scope->num_pos_args; i++) {
int r = REG_ARG_1;
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_1, REG_LOCAL_3, i);
if (emit->local_vtype[i] != VTYPE_PYOBJ) {
emit_call_with_imm_arg(emit, MP_F_CONVERT_OBJ_TO_NATIVE, emit->local_vtype[i], REG_ARG_2);
r = REG_RET;
}
// REG_LOCAL_3 points to the args array so be sure not to overwrite it if it's still needed
if (i < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit) && (i != 2 || emit->scope->num_pos_args == 3)) {
ASM_MOV_REG_REG(emit->as, reg_local_table[i], r);
} else {
emit_native_mov_state_reg(emit, LOCAL_IDX_LOCAL_VAR(emit, i), r);
}
}
// Get 3rd local from the stack back into REG_LOCAL_3 if this reg couldn't be written to above
if (emit->scope->num_pos_args >= 4 && CAN_USE_REGS_FOR_LOCALS(emit)) {
ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_3, LOCAL_IDX_LOCAL_VAR(emit, 2));
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_global_exc_entry(emit);
} else {
// work out size of state (locals plus stack)
emit->n_state = scope->num_locals + scope->stack_size;
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
emit->code_state_start = 0;
emit->stack_start = sizeof(mp_code_state_t) / sizeof(mp_uint_t);
mp_asm_base_data(&emit->as->base, ASM_WORD_SIZE, (uintptr_t)emit->prelude_offset);
mp_asm_base_data(&emit->as->base, ASM_WORD_SIZE, (uintptr_t)emit->start_offset);
ASM_ENTRY(emit->as, sizeof(nlr_buf_t) / sizeof(uintptr_t));
// Put address of code_state into REG_GENERATOR_STATE
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 0, REG_GENERATOR_STATE);
#else
ASM_MOV_REG_REG(emit->as, REG_GENERATOR_STATE, REG_ARG_1);
#endif
// Put throw value into LOCAL_IDX_EXC_VAL slot, for yield/yield-from
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
#endif
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_ARG_2);
// Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_GENERATOR_STATE, LOCAL_IDX_FUN_OBJ(emit));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_TEMP0, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_TEMP0, emit->scope->num_pos_args + emit->scope->num_kwonly_args);
} else {
// The locals and stack start after the code_state structure
emit->stack_start = emit->code_state_start + sizeof(mp_code_state_t) / sizeof(mp_uint_t);
// Allocate space on C-stack for code_state structure, which includes state
ASM_ENTRY(emit->as, emit->stack_start + emit->n_state);
// Prepare incoming arguments for call to mp_setup_code_state
#if N_X86
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_3);
asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4);
#endif
// Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, emit->scope->num_pos_args + emit->scope->num_kwonly_args);
// Set code_state.fun_bc
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1);
// Set code_state.ip (offset from start of this function to prelude info)
// TODO this encoding may change size in the final pass, need to make it fixed
emit_native_mov_state_imm_via(emit, emit->code_state_start + offsetof(mp_code_state_t, ip) / sizeof(uintptr_t), emit->prelude_offset, REG_ARG_1);
// Put address of code_state into first arg
ASM_MOV_REG_LOCAL_ADDR(emit->as, REG_ARG_1, emit->code_state_start);
// Call mp_setup_code_state to prepare code_state structure
#if N_THUMB
asm_thumb_bl_ind(emit->as, MP_F_SETUP_CODE_STATE, ASM_THUMB_REG_R4);
#elif N_ARM
asm_arm_bl_ind(emit->as, MP_F_SETUP_CODE_STATE, ASM_ARM_REG_R4);
#else
ASM_CALL_IND(emit->as, MP_F_SETUP_CODE_STATE);
#endif
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_global_exc_entry(emit);
// cache some locals in registers, but only if no exception handlers
if (CAN_USE_REGS_FOR_LOCALS(emit)) {
for (int i = 0; i < REG_LOCAL_NUM && i < scope->num_locals; ++i) {
ASM_MOV_REG_LOCAL(emit->as, reg_local_table[i], LOCAL_IDX_LOCAL_VAR(emit, i));
}
}
// set the type of closed over variables
for (mp_uint_t i = 0; i < scope->id_info_len; i++) {
id_info_t *id = &scope->id_info[i];
if (id->kind == ID_INFO_KIND_CELL) {
emit->local_vtype[id->local_num] = VTYPE_PYOBJ;
}
}
if (pass == MP_PASS_EMIT) {
// write argument names as qstr objects
// see comment in corresponding part of emitbc.c about the logic here
for (int i = 0; i < scope->num_pos_args + scope->num_kwonly_args; i++) {
qstr qst = MP_QSTR__star_;
for (int j = 0; j < scope->id_info_len; ++j) {
id_info_t *id = &scope->id_info[j];
if ((id->flags & ID_FLAG_IS_PARAM) && id->local_num == i) {
qst = id->qst;
break;
}
}
emit->const_table[i] = (mp_uint_t)MP_OBJ_NEW_QSTR(qst);
}
}
}
}
STATIC void emit_native_end_pass(emit_t *emit) {
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_global_exc_exit(emit);
if (!emit->do_viper_types) {
emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_def_pos_args);
// write code info
#if MICROPY_PERSISTENT_CODE
mp_asm_base_data(&emit->as->base, 1, 5);
mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name);
mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name >> 8);
mp_asm_base_data(&emit->as->base, 1, emit->scope->source_file);
mp_asm_base_data(&emit->as->base, 1, emit->scope->source_file >> 8);
#else
mp_asm_base_data(&emit->as->base, 1, 1);
#endif
// bytecode prelude: initialise closed over variables
for (int i = 0; i < emit->scope->id_info_len; i++) {
id_info_t *id = &emit->scope->id_info[i];
if (id->kind == ID_INFO_KIND_CELL) {
assert(id->local_num < 255);
mp_asm_base_data(&emit->as->base, 1, id->local_num); // write the local which should be converted to a cell
}
}
mp_asm_base_data(&emit->as->base, 1, 255); // end of list sentinel
}
ASM_END_PASS(emit->as);
// check stack is back to zero size
assert(emit->stack_size == 0);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
assert(emit->exc_stack_size == 0);
// Deal with const table accounting
assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->const_table_num_obj == emit->const_table_cur_obj));
emit->const_table_num_obj = emit->const_table_cur_obj;
if (emit->pass == MP_PASS_CODE_SIZE) {
size_t const_table_alloc = 1 + emit->const_table_num_obj + emit->const_table_cur_raw_code;
size_t nqstr = 0;
if (!emit->do_viper_types) {
// Add room for qstr names of arguments
nqstr = emit->scope->num_pos_args + emit->scope->num_kwonly_args;
const_table_alloc += nqstr;
}
emit->const_table = m_new(mp_uint_t, const_table_alloc);
// Store mp_fun_table pointer just after qstrs
emit->const_table[nqstr] = (mp_uint_t)(uintptr_t)mp_fun_table;
#if MICROPY_PERSISTENT_CODE_SAVE
size_t qstr_link_alloc = emit->qstr_link_cur;
if (qstr_link_alloc > 0) {
emit->qstr_link = m_new(mp_qstr_link_entry_t, qstr_link_alloc);
}
#endif
}
if (emit->pass == MP_PASS_EMIT) {
void *f = mp_asm_base_get_code(&emit->as->base);
mp_uint_t f_len = mp_asm_base_get_code_size(&emit->as->base);
mp_emit_glue_assign_native(emit->scope->raw_code,
emit->do_viper_types ? MP_CODE_NATIVE_VIPER : MP_CODE_NATIVE_PY,
f, f_len, emit->const_table,
#if MICROPY_PERSISTENT_CODE_SAVE
emit->prelude_offset,
emit->const_table_cur_obj, emit->const_table_cur_raw_code,
emit->qstr_link_cur, emit->qstr_link,
#endif
emit->scope->num_pos_args, emit->scope->scope_flags, 0);
}
}
STATIC bool emit_native_last_emit_was_return_value(emit_t *emit) {
return emit->last_emit_was_return_value;
}
STATIC void ensure_extra_stack(emit_t *emit, size_t delta) {
if (emit->stack_size + delta > emit->stack_info_alloc) {
size_t new_alloc = (emit->stack_size + delta + 8) & ~3;
emit->stack_info = m_renew(stack_info_t, emit->stack_info, emit->stack_info_alloc, new_alloc);
emit->stack_info_alloc = new_alloc;
}
}
STATIC void adjust_stack(emit_t *emit, mp_int_t stack_size_delta) {
assert((mp_int_t)emit->stack_size + stack_size_delta >= 0);
assert((mp_int_t)emit->stack_size + stack_size_delta <= (mp_int_t)emit->stack_info_alloc);
emit->stack_size += stack_size_delta;
if (emit->pass > MP_PASS_SCOPE && emit->stack_size > emit->scope->stack_size) {
emit->scope->stack_size = emit->stack_size;
}
#ifdef DEBUG_PRINT
DEBUG_printf(" adjust_stack; stack_size=%d+%d; stack now:", emit->stack_size - stack_size_delta, stack_size_delta);
for (int i = 0; i < emit->stack_size; i++) {
stack_info_t *si = &emit->stack_info[i];
DEBUG_printf(" (v=%d k=%d %d)", si->vtype, si->kind, si->data.u_reg);
}
DEBUG_printf("\n");
#endif
}
STATIC void emit_native_adjust_stack_size(emit_t *emit, mp_int_t delta) {
DEBUG_printf("adjust_stack_size(" INT_FMT ")\n", delta);
if (delta > 0) {
ensure_extra_stack(emit, delta);
}
// If we are adjusting the stack in a positive direction (pushing) then we
// need to fill in values for the stack kind and vtype of the newly-pushed
// entries. These should be set to "value" (ie not reg or imm) because we
// should only need to adjust the stack due to a jump to this part in the
// code (and hence we have settled the stack before the jump).
for (mp_int_t i = 0; i < delta; i++) {
stack_info_t *si = &emit->stack_info[emit->stack_size + i];
si->kind = STACK_VALUE;
// TODO we don't know the vtype to use here. At the moment this is a
// hack to get the case of multi comparison working.
if (delta == 1) {
si->vtype = emit->saved_stack_vtype;
} else {
si->vtype = VTYPE_PYOBJ;
}
}
adjust_stack(emit, delta);
}
STATIC void emit_native_set_source_line(emit_t *emit, mp_uint_t source_line) {
(void)emit;
(void)source_line;
}
// this must be called at start of emit functions
STATIC void emit_native_pre(emit_t *emit) {
emit->last_emit_was_return_value = false;
}
// depth==0 is top, depth==1 is before top, etc
STATIC stack_info_t *peek_stack(emit_t *emit, mp_uint_t depth) {
return &emit->stack_info[emit->stack_size - 1 - depth];
}
// depth==0 is top, depth==1 is before top, etc
STATIC vtype_kind_t peek_vtype(emit_t *emit, mp_uint_t depth) {
if (emit->do_viper_types) {
return peek_stack(emit, depth)->vtype;
} else {
// Type is always PYOBJ even if the intermediate stored value is not
return VTYPE_PYOBJ;
}
}
// pos=1 is TOS, pos=2 is next, etc
// use pos=0 for no skipping
STATIC void need_reg_single(emit_t *emit, int reg_needed, int skip_stack_pos) {
skip_stack_pos = emit->stack_size - skip_stack_pos;
for (int i = 0; i < emit->stack_size; i++) {
if (i != skip_stack_pos) {
stack_info_t *si = &emit->stack_info[i];
if (si->kind == STACK_REG && si->data.u_reg == reg_needed) {
si->kind = STACK_VALUE;
emit_native_mov_state_reg(emit, emit->stack_start + i, si->data.u_reg);
}
}
}
}
STATIC void need_reg_all(emit_t *emit) {
for (int i = 0; i < emit->stack_size; i++) {
stack_info_t *si = &emit->stack_info[i];
if (si->kind == STACK_REG) {
si->kind = STACK_VALUE;
emit_native_mov_state_reg(emit, emit->stack_start + i, si->data.u_reg);
}
}
}
STATIC vtype_kind_t load_reg_stack_imm(emit_t *emit, int reg_dest, const stack_info_t *si, bool convert_to_pyobj) {
if (!convert_to_pyobj && emit->do_viper_types) {
ASM_MOV_REG_IMM(emit->as, reg_dest, si->data.u_imm);
return si->vtype;
} else {
if (si->vtype == VTYPE_PYOBJ) {
ASM_MOV_REG_IMM(emit->as, reg_dest, si->data.u_imm);
} else if (si->vtype == VTYPE_BOOL) {
emit_native_mov_reg_const(emit, reg_dest, MP_F_CONST_FALSE_OBJ + si->data.u_imm);
} else if (si->vtype == VTYPE_INT || si->vtype == VTYPE_UINT) {
ASM_MOV_REG_IMM(emit->as, reg_dest, (uintptr_t)MP_OBJ_NEW_SMALL_INT(si->data.u_imm));
} else if (si->vtype == VTYPE_PTR_NONE) {
emit_native_mov_reg_const(emit, reg_dest, MP_F_CONST_NONE_OBJ);
} else {
mp_raise_NotImplementedError("conversion to object");
}
return VTYPE_PYOBJ;
}
}
STATIC void need_stack_settled(emit_t *emit) {
DEBUG_printf(" need_stack_settled; stack_size=%d\n", emit->stack_size);
for (int i = 0; i < emit->stack_size; i++) {
stack_info_t *si = &emit->stack_info[i];
if (si->kind == STACK_REG) {
DEBUG_printf(" reg(%u) to local(%u)\n", si->data.u_reg, emit->stack_start + i);
si->kind = STACK_VALUE;
emit_native_mov_state_reg(emit, emit->stack_start + i, si->data.u_reg);
}
}
for (int i = 0; i < emit->stack_size; i++) {
stack_info_t *si = &emit->stack_info[i];
if (si->kind == STACK_IMM) {
DEBUG_printf(" imm(" INT_FMT ") to local(%u)\n", si->data.u_imm, emit->stack_start + i);
si->kind = STACK_VALUE;
si->vtype = load_reg_stack_imm(emit, REG_TEMP0, si, false);
emit_native_mov_state_reg(emit, emit->stack_start + i, REG_TEMP0);
}
}
}
// pos=1 is TOS, pos=2 is next, etc
STATIC void emit_access_stack(emit_t *emit, int pos, vtype_kind_t *vtype, int reg_dest) {
need_reg_single(emit, reg_dest, pos);
stack_info_t *si = &emit->stack_info[emit->stack_size - pos];
*vtype = si->vtype;
switch (si->kind) {
case STACK_VALUE:
emit_native_mov_reg_state(emit, reg_dest, emit->stack_start + emit->stack_size - pos);
break;
case STACK_REG:
if (si->data.u_reg != reg_dest) {
ASM_MOV_REG_REG(emit->as, reg_dest, si->data.u_reg);
}
break;
case STACK_IMM:
*vtype = load_reg_stack_imm(emit, reg_dest, si, false);
break;
}
}
// does an efficient X=pop(); discard(); push(X)
// needs a (non-temp) register in case the poped element was stored in the stack
STATIC void emit_fold_stack_top(emit_t *emit, int reg_dest) {
stack_info_t *si = &emit->stack_info[emit->stack_size - 2];
si[0] = si[1];
if (si->kind == STACK_VALUE) {
// if folded element was on the stack we need to put it in a register
emit_native_mov_reg_state(emit, reg_dest, emit->stack_start + emit->stack_size - 1);
si->kind = STACK_REG;
si->data.u_reg = reg_dest;
}
adjust_stack(emit, -1);
}
// If stacked value is in a register and the register is not r1 or r2, then
// *reg_dest is set to that register. Otherwise the value is put in *reg_dest.
STATIC void emit_pre_pop_reg_flexible(emit_t *emit, vtype_kind_t *vtype, int *reg_dest, int not_r1, int not_r2) {
emit->last_emit_was_return_value = false;
stack_info_t *si = peek_stack(emit, 0);
if (si->kind == STACK_REG && si->data.u_reg != not_r1 && si->data.u_reg != not_r2) {
*vtype = si->vtype;
*reg_dest = si->data.u_reg;
need_reg_single(emit, *reg_dest, 1);
} else {
emit_access_stack(emit, 1, vtype, *reg_dest);
}
adjust_stack(emit, -1);
}
STATIC void emit_pre_pop_discard(emit_t *emit) {
emit->last_emit_was_return_value = false;
adjust_stack(emit, -1);
}
STATIC void emit_pre_pop_reg(emit_t *emit, vtype_kind_t *vtype, int reg_dest) {
emit->last_emit_was_return_value = false;
emit_access_stack(emit, 1, vtype, reg_dest);
adjust_stack(emit, -1);
}
STATIC void emit_pre_pop_reg_reg(emit_t *emit, vtype_kind_t *vtypea, int rega, vtype_kind_t *vtypeb, int regb) {
emit_pre_pop_reg(emit, vtypea, rega);
emit_pre_pop_reg(emit, vtypeb, regb);
}
STATIC void emit_pre_pop_reg_reg_reg(emit_t *emit, vtype_kind_t *vtypea, int rega, vtype_kind_t *vtypeb, int regb, vtype_kind_t *vtypec, int regc) {
emit_pre_pop_reg(emit, vtypea, rega);
emit_pre_pop_reg(emit, vtypeb, regb);
emit_pre_pop_reg(emit, vtypec, regc);
}
STATIC void emit_post(emit_t *emit) {
(void)emit;
}
STATIC void emit_post_top_set_vtype(emit_t *emit, vtype_kind_t new_vtype) {
stack_info_t *si = &emit->stack_info[emit->stack_size - 1];
si->vtype = new_vtype;
}
STATIC void emit_post_push_reg(emit_t *emit, vtype_kind_t vtype, int reg) {
ensure_extra_stack(emit, 1);
stack_info_t *si = &emit->stack_info[emit->stack_size];
si->vtype = vtype;
si->kind = STACK_REG;
si->data.u_reg = reg;
adjust_stack(emit, 1);
}
STATIC void emit_post_push_imm(emit_t *emit, vtype_kind_t vtype, mp_int_t imm) {
ensure_extra_stack(emit, 1);
stack_info_t *si = &emit->stack_info[emit->stack_size];
si->vtype = vtype;
si->kind = STACK_IMM;
si->data.u_imm = imm;
adjust_stack(emit, 1);
}
STATIC void emit_post_push_reg_reg(emit_t *emit, vtype_kind_t vtypea, int rega, vtype_kind_t vtypeb, int regb) {
emit_post_push_reg(emit, vtypea, rega);
emit_post_push_reg(emit, vtypeb, regb);
}
STATIC void emit_post_push_reg_reg_reg(emit_t *emit, vtype_kind_t vtypea, int rega, vtype_kind_t vtypeb, int regb, vtype_kind_t vtypec, int regc) {
emit_post_push_reg(emit, vtypea, rega);
emit_post_push_reg(emit, vtypeb, regb);
emit_post_push_reg(emit, vtypec, regc);
}
STATIC void emit_post_push_reg_reg_reg_reg(emit_t *emit, vtype_kind_t vtypea, int rega, vtype_kind_t vtypeb, int regb, vtype_kind_t vtypec, int regc, vtype_kind_t vtyped, int regd) {
emit_post_push_reg(emit, vtypea, rega);
emit_post_push_reg(emit, vtypeb, regb);
emit_post_push_reg(emit, vtypec, regc);
emit_post_push_reg(emit, vtyped, regd);
}
STATIC void emit_call(emit_t *emit, mp_fun_kind_t fun_kind) {
need_reg_all(emit);
ASM_CALL_IND(emit->as, fun_kind);
}
STATIC void emit_call_with_imm_arg(emit_t *emit, mp_fun_kind_t fun_kind, mp_int_t arg_val, int arg_reg) {
need_reg_all(emit);
ASM_MOV_REG_IMM(emit->as, arg_reg, arg_val);
ASM_CALL_IND(emit->as, fun_kind);
}
STATIC void emit_call_with_2_imm_args(emit_t *emit, mp_fun_kind_t fun_kind, mp_int_t arg_val1, int arg_reg1, mp_int_t arg_val2, int arg_reg2) {
need_reg_all(emit);
ASM_MOV_REG_IMM(emit->as, arg_reg1, arg_val1);
ASM_MOV_REG_IMM(emit->as, arg_reg2, arg_val2);
ASM_CALL_IND(emit->as, fun_kind);
}
STATIC void emit_call_with_qstr_arg(emit_t *emit, mp_fun_kind_t fun_kind, qstr qst, int arg_reg) {
need_reg_all(emit);
emit_native_mov_reg_qstr(emit, arg_reg, qst);
ASM_CALL_IND(emit->as, fun_kind);
}
// vtype of all n_pop objects is VTYPE_PYOBJ
// Will convert any items that are not VTYPE_PYOBJ to this type and put them back on the stack.
// If any conversions of non-immediate values are needed, then it uses REG_ARG_1, REG_ARG_2 and REG_RET.
// Otherwise, it does not use any temporary registers (but may use reg_dest before loading it with stack pointer).
STATIC void emit_get_stack_pointer_to_reg_for_pop(emit_t *emit, mp_uint_t reg_dest, mp_uint_t n_pop) {
need_reg_all(emit);
// First, store any immediate values to their respective place on the stack.
for (mp_uint_t i = 0; i < n_pop; i++) {
stack_info_t *si = &emit->stack_info[emit->stack_size - 1 - i];
// must push any imm's to stack
// must convert them to VTYPE_PYOBJ for viper code
if (si->kind == STACK_IMM) {
si->kind = STACK_VALUE;
si->vtype = load_reg_stack_imm(emit, reg_dest, si, true);
emit_native_mov_state_reg(emit, emit->stack_start + emit->stack_size - 1 - i, reg_dest);
}
// verify that this value is on the stack
assert(si->kind == STACK_VALUE);
}
// Second, convert any non-VTYPE_PYOBJ to that type.
for (mp_uint_t i = 0; i < n_pop; i++) {
stack_info_t *si = &emit->stack_info[emit->stack_size - 1 - i];
if (si->vtype != VTYPE_PYOBJ) {
mp_uint_t local_num = emit->stack_start + emit->stack_size - 1 - i;
emit_native_mov_reg_state(emit, REG_ARG_1, local_num);
emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, si->vtype, REG_ARG_2); // arg2 = type
emit_native_mov_state_reg(emit, local_num, REG_RET);
si->vtype = VTYPE_PYOBJ;
DEBUG_printf(" convert_native_to_obj(local_num=" UINT_FMT ")\n", local_num);
}
}
// Adujust the stack for a pop of n_pop items, and load the stack pointer into reg_dest.
adjust_stack(emit, -n_pop);
emit_native_mov_reg_state_addr(emit, reg_dest, emit->stack_start + emit->stack_size);
}
// vtype of all n_push objects is VTYPE_PYOBJ
STATIC void emit_get_stack_pointer_to_reg_for_push(emit_t *emit, mp_uint_t reg_dest, mp_uint_t n_push) {
need_reg_all(emit);
ensure_extra_stack(emit, n_push);
for (mp_uint_t i = 0; i < n_push; i++) {
emit->stack_info[emit->stack_size + i].kind = STACK_VALUE;
emit->stack_info[emit->stack_size + i].vtype = VTYPE_PYOBJ;
}
emit_native_mov_reg_state_addr(emit, reg_dest, emit->stack_start + emit->stack_size);
adjust_stack(emit, n_push);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
STATIC void emit_native_push_exc_stack(emit_t *emit, uint label, bool is_finally) {
if (emit->exc_stack_size + 1 > emit->exc_stack_alloc) {
size_t new_alloc = emit->exc_stack_alloc + 4;
emit->exc_stack = m_renew(exc_stack_entry_t, emit->exc_stack, emit->exc_stack_alloc, new_alloc);
emit->exc_stack_alloc = new_alloc;
}
exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size++];
e->label = label;
e->is_finally = is_finally;
e->unwind_label = UNWIND_LABEL_UNUSED;
e->is_active = true;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
ASM_MOV_REG_PCREL(emit->as, REG_RET, label);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
}
STATIC void emit_native_leave_exc_stack(emit_t *emit, bool start_of_handler) {
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
assert(emit->exc_stack_size > 0);
// Get current exception handler and deactivate it
exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
e->is_active = false;
// Find next innermost active exception handler, to restore as current handler
for (--e; e >= emit->exc_stack && !e->is_active; --e) {
}
// Update the PC of the new exception handler
if (e < emit->exc_stack) {
// No active handler, clear handler PC to zero
if (start_of_handler) {
// Optimisation: PC is already cleared by global exc handler
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
return;
}
ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET);
} else {
// Found new active handler, get its PC
ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
}
STATIC exc_stack_entry_t *emit_native_pop_exc_stack(emit_t *emit) {
assert(emit->exc_stack_size > 0);
exc_stack_entry_t *e = &emit->exc_stack[--emit->exc_stack_size];
assert(e->is_active == false);
return e;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
STATIC void emit_load_reg_with_ptr(emit_t *emit, int reg, mp_uint_t ptr, size_t table_off) {
if (!emit->do_viper_types) {
// Skip qstr names of arguments
table_off += emit->scope->num_pos_args + emit->scope->num_kwonly_args;
}
if (emit->pass == MP_PASS_EMIT) {
emit->const_table[table_off] = ptr;
}
emit_native_mov_reg_state(emit, REG_TEMP0, LOCAL_IDX_FUN_OBJ(emit));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_TEMP0, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
ASM_LOAD_REG_REG_OFFSET(emit->as, reg, REG_TEMP0, table_off);
}
STATIC void emit_load_reg_with_object(emit_t *emit, int reg, mp_obj_t obj) {
// First entry is for mp_fun_table
size_t table_off = 1 + emit->const_table_cur_obj++;
emit_load_reg_with_ptr(emit, reg, (mp_uint_t)obj, table_off);
}
STATIC void emit_load_reg_with_raw_code(emit_t *emit, int reg, mp_raw_code_t *rc) {
// First entry is for mp_fun_table, then constant objects
size_t table_off = 1 + emit->const_table_num_obj + emit->const_table_cur_raw_code++;
emit_load_reg_with_ptr(emit, reg, (mp_uint_t)rc, table_off);
}
STATIC void emit_native_label_assign(emit_t *emit, mp_uint_t l) {
DEBUG_printf("label_assign(" UINT_FMT ")\n", l);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
bool is_finally = false;
if (emit->exc_stack_size > 0) {
exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
is_finally = e->is_finally && e->label == l;
}
if (is_finally) {
// Label is at start of finally handler: store TOS into exception slot
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_TEMP0);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_TEMP0);
}
emit_native_pre(emit);
// need to commit stack because we can jump here from elsewhere
need_stack_settled(emit);
mp_asm_base_label_assign(&emit->as->base, l);
emit_post(emit);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
if (is_finally) {
// Label is at start of finally handler: pop exception stack
emit_native_leave_exc_stack(emit, false);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
}
STATIC void emit_native_global_exc_entry(emit_t *emit) {
// Note: 4 labels are reserved for this function, starting at *emit->label_slot
emit->exit_label = *emit->label_slot;
if (NEED_GLOBAL_EXC_HANDLER(emit)) {
mp_uint_t nlr_label = *emit->label_slot + 1;
mp_uint_t start_label = *emit->label_slot + 2;
mp_uint_t global_except_label = *emit->label_slot + 3;
if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
// Set new globals
emit_native_mov_reg_state(emit, REG_ARG_1, LOCAL_IDX_FUN_OBJ(emit));
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_1, REG_ARG_1, offsetof(mp_obj_fun_bc_t, globals) / sizeof(uintptr_t));
emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
// Save old globals (or NULL if globals didn't change)
emit_native_mov_state_reg(emit, LOCAL_IDX_OLD_GLOBALS(emit), REG_RET);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
if (emit->scope->exc_stack_size == 0) {
if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
// Optimisation: if globals didn't change don't push the nlr context
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, false);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
// Wrap everything in an nlr context
ASM_MOV_REG_LOCAL_ADDR(emit->as, REG_ARG_1, 0);
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
emit_call(emit, MP_F_NLR_PUSH);
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, true);
} else {
// Clear the unwind state
ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_TEMP0);
// Put PC of start code block into REG_LOCAL_1
ASM_MOV_REG_PCREL(emit->as, REG_LOCAL_1, start_label);
// Wrap everything in an nlr context
emit_native_label_assign(emit, nlr_label);
ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_2, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
ASM_MOV_REG_LOCAL_ADDR(emit->as, REG_ARG_1, 0);
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
emit_call(emit, MP_F_NLR_PUSH);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_LOCAL_2);
ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true);
// Clear PC of current code block, and jump there to resume execution
ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_TEMP0);
ASM_JUMP_REG(emit->as, REG_LOCAL_1);
// Global exception handler: check for valid exception handler
emit_native_label_assign(emit, global_except_label);
ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_1, LOCAL_IDX_EXC_HANDLER_PC(emit));
ASM_JUMP_IF_REG_NONZERO(emit->as, REG_LOCAL_1, nlr_label, false);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
// Restore old globals
emit_native_mov_reg_state(emit, REG_ARG_1, LOCAL_IDX_OLD_GLOBALS(emit));
emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
}
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
// Store return value in state[0]
ASM_MOV_REG_LOCAL(emit->as, REG_TEMP0, LOCAL_IDX_EXC_VAL(emit));
ASM_STORE_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_GENERATOR_STATE, offsetof(mp_code_state_t, state) / sizeof(uintptr_t));
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Load return kind
ASM_MOV_REG_IMM(emit->as, REG_RET, MP_VM_RETURN_EXCEPTION);
ASM_EXIT(emit->as);
} else {
// Re-raise exception out to caller
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
emit_call(emit, MP_F_NATIVE_RAISE);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Label for start of function
emit_native_label_assign(emit, start_label);
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
emit_native_mov_reg_state(emit, REG_TEMP0, LOCAL_IDX_GEN_PC(emit));
ASM_JUMP_REG(emit->as, REG_TEMP0);
emit->start_offset = mp_asm_base_get_code_pos(&emit->as->base);
// This is the first entry of the generator
// Check LOCAL_IDX_EXC_VAL for any injected value
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
emit_call(emit, MP_F_NATIVE_RAISE);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
}
STATIC void emit_native_global_exc_exit(emit_t *emit) {
// Label for end of function
emit_native_label_assign(emit, emit->exit_label);
if (NEED_GLOBAL_EXC_HANDLER(emit)) {
// Get old globals
if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
emit_native_mov_reg_state(emit, REG_ARG_1, LOCAL_IDX_OLD_GLOBALS(emit));
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
if (emit->scope->exc_stack_size == 0) {
// Optimisation: if globals didn't change then don't restore them and don't do nlr_pop
ASM_JUMP_IF_REG_ZERO(emit->as, REG_ARG_1, emit->exit_label + 1, false);
}
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
// Restore old globals
emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
}
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Pop the nlr context
emit_call(emit, MP_F_NLR_POP);
if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
if (emit->scope->exc_stack_size == 0) {
// Destination label for above optimisation
emit_native_label_assign(emit, emit->exit_label + 1);
}
py: Fix native functions so they run with their correct globals context. Prior to this commit a function compiled with the native decorator @micropython.native would not work correctly when accessing global variables, because the globals dict was not being set upon function entry. This commit fixes this problem by, upon function entry, setting as the current globals dict the globals dict context the function was defined within, as per normal Python semantics, and as bytecode does. Upon function exit the original globals dict is restored. In order to restore the globals dict when an exception is raised the native function must guard its internals with an nlr_push/nlr_pop pair. Because this push/pop is relatively expensive, in both C stack usage for the nlr_buf_t and CPU execution time, the implementation here optimises things as much as possible. First, the compiler keeps track of whether a function even needs to access global variables. Using this information the native emitter then generates three different kinds of code: 1. no globals used, no exception handlers: no nlr handling code and no setting of the globals dict. 2. globals used, no exception handlers: an nlr_buf_t is allocated on the C stack but it is not used if the globals dict is unchanged, saving execution time because nlr_push/nlr_pop don't need to run. 3. function has exception handlers, may use globals: an nlr_buf_t is allocated and nlr_push/nlr_pop are always called. In the end, native functions that don't access globals and don't have exception handlers will run more efficiently than those that do. Fixes issue #1573.
2018-09-13 13:03:48 +01:00
}
// Load return value
ASM_MOV_REG_LOCAL(emit->as, REG_RET, LOCAL_IDX_RET_VAL(emit));
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
ASM_EXIT(emit->as);
}
STATIC void emit_native_import_name(emit_t *emit, qstr qst) {
DEBUG_printf("import_name %s\n", qstr_str(qst));
// get arguments from stack: arg2 = fromlist, arg3 = level
// If using viper types these arguments must be converted to proper objects, and
// to accomplish this viper types are turned off for the emit_pre_pop_reg_reg call.
bool orig_do_viper_types = emit->do_viper_types;
emit->do_viper_types = false;
vtype_kind_t vtype_fromlist;
vtype_kind_t vtype_level;
emit_pre_pop_reg_reg(emit, &vtype_fromlist, REG_ARG_2, &vtype_level, REG_ARG_3);
assert(vtype_fromlist == VTYPE_PYOBJ);
assert(vtype_level == VTYPE_PYOBJ);
emit->do_viper_types = orig_do_viper_types;
emit_call_with_qstr_arg(emit, MP_F_IMPORT_NAME, qst, REG_ARG_1); // arg1 = import name
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_import_from(emit_t *emit, qstr qst) {
DEBUG_printf("import_from %s\n", qstr_str(qst));
emit_native_pre(emit);
vtype_kind_t vtype_module;
emit_access_stack(emit, 1, &vtype_module, REG_ARG_1); // arg1 = module
assert(vtype_module == VTYPE_PYOBJ);
emit_call_with_qstr_arg(emit, MP_F_IMPORT_FROM, qst, REG_ARG_2); // arg2 = import name
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_import_star(emit_t *emit) {
DEBUG_printf("import_star\n");
vtype_kind_t vtype_module;
emit_pre_pop_reg(emit, &vtype_module, REG_ARG_1); // arg1 = module
assert(vtype_module == VTYPE_PYOBJ);
emit_call(emit, MP_F_IMPORT_ALL);
emit_post(emit);
}
STATIC void emit_native_import(emit_t *emit, qstr qst, int kind) {
if (kind == MP_EMIT_IMPORT_NAME) {
emit_native_import_name(emit, qst);
} else if (kind == MP_EMIT_IMPORT_FROM) {
emit_native_import_from(emit, qst);
} else {
emit_native_import_star(emit);
}
}
STATIC void emit_native_load_const_tok(emit_t *emit, mp_token_kind_t tok) {
DEBUG_printf("load_const_tok(tok=%u)\n", tok);
if (tok == MP_TOKEN_ELLIPSIS) {
#if MICROPY_PERSISTENT_CODE_SAVE
emit_native_load_const_obj(emit, MP_OBJ_FROM_PTR(&mp_const_ellipsis_obj));
#else
emit_post_push_imm(emit, VTYPE_PYOBJ, (mp_uint_t)MP_OBJ_FROM_PTR(&mp_const_ellipsis_obj));
#endif
} else {
emit_native_pre(emit);
if (tok == MP_TOKEN_KW_NONE) {
emit_post_push_imm(emit, VTYPE_PTR_NONE, 0);
} else {
emit_post_push_imm(emit, VTYPE_BOOL, tok == MP_TOKEN_KW_FALSE ? 0 : 1);
}
}
}
STATIC void emit_native_load_const_small_int(emit_t *emit, mp_int_t arg) {
DEBUG_printf("load_const_small_int(int=" INT_FMT ")\n", arg);
emit_native_pre(emit);
emit_post_push_imm(emit, VTYPE_INT, arg);
}
STATIC void emit_native_load_const_str(emit_t *emit, qstr qst) {
emit_native_pre(emit);
// TODO: Eventually we want to be able to work with raw pointers in viper to
// do native array access. For now we just load them as any other object.
/*
if (emit->do_viper_types) {
// load a pointer to the asciiz string?
emit_post_push_imm(emit, VTYPE_PTR, (mp_uint_t)qstr_str(qst));
} else
*/
{
need_reg_single(emit, REG_TEMP0, 0);
emit_native_mov_reg_qstr_obj(emit, REG_TEMP0, qst);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_TEMP0);
}
}
STATIC void emit_native_load_const_obj(emit_t *emit, mp_obj_t obj) {
emit->scope->scope_flags |= MP_SCOPE_FLAG_HASCONSTS;
emit_native_pre(emit);
need_reg_single(emit, REG_RET, 0);
emit_load_reg_with_object(emit, REG_RET, obj);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_load_null(emit_t *emit) {
emit_native_pre(emit);
emit_post_push_imm(emit, VTYPE_PYOBJ, 0);
}
STATIC void emit_native_load_fast(emit_t *emit, qstr qst, mp_uint_t local_num) {
DEBUG_printf("load_fast(%s, " UINT_FMT ")\n", qstr_str(qst), local_num);
vtype_kind_t vtype = emit->local_vtype[local_num];
if (vtype == VTYPE_UNBOUND) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit, "local '%q' used before type known", qst);
}
emit_native_pre(emit);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
if (local_num < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit)) {
emit_post_push_reg(emit, vtype, reg_local_table[local_num]);
2014-08-16 21:55:53 +01:00
} else {
need_reg_single(emit, REG_TEMP0, 0);
emit_native_mov_reg_state(emit, REG_TEMP0, LOCAL_IDX_LOCAL_VAR(emit, local_num));
emit_post_push_reg(emit, vtype, REG_TEMP0);
2014-08-16 21:55:53 +01:00
}
}
STATIC void emit_native_load_deref(emit_t *emit, qstr qst, mp_uint_t local_num) {
DEBUG_printf("load_deref(%s, " UINT_FMT ")\n", qstr_str(qst), local_num);
need_reg_single(emit, REG_RET, 0);
emit_native_load_fast(emit, qst, local_num);
vtype_kind_t vtype;
int reg_base = REG_RET;
emit_pre_pop_reg_flexible(emit, &vtype, &reg_base, -1, -1);
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_RET, reg_base, 1);
// closed over vars are always Python objects
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
2013-12-11 00:41:43 +00:00
}
STATIC void emit_native_load_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind) {
if (kind == MP_EMIT_IDOP_LOCAL_FAST) {
emit_native_load_fast(emit, qst, local_num);
} else {
emit_native_load_deref(emit, qst, local_num);
}
}
STATIC void emit_native_load_global(emit_t *emit, qstr qst, int kind) {
MP_STATIC_ASSERT(MP_F_LOAD_NAME + MP_EMIT_IDOP_GLOBAL_NAME == MP_F_LOAD_NAME);
MP_STATIC_ASSERT(MP_F_LOAD_NAME + MP_EMIT_IDOP_GLOBAL_GLOBAL == MP_F_LOAD_GLOBAL);
emit_native_pre(emit);
if (kind == MP_EMIT_IDOP_GLOBAL_NAME) {
DEBUG_printf("load_name(%s)\n", qstr_str(qst));
} else {
DEBUG_printf("load_global(%s)\n", qstr_str(qst));
if (emit->do_viper_types) {
// check for builtin casting operators
int native_type = mp_native_type_from_qstr(qst);
if (native_type >= MP_NATIVE_TYPE_INT) {
emit_post_push_imm(emit, VTYPE_BUILTIN_CAST, native_type);
return;
}
}
}
emit_call_with_qstr_arg(emit, MP_F_LOAD_NAME + kind, qst, REG_ARG_1);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_load_attr(emit_t *emit, qstr qst) {
// depends on type of subject:
// - integer, function, pointer to integers: error
// - pointer to structure: get member, quite easy
// - Python object: call mp_load_attr, and needs to be typed to convert result
vtype_kind_t vtype_base;
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
assert(vtype_base == VTYPE_PYOBJ);
emit_call_with_qstr_arg(emit, MP_F_LOAD_ATTR, qst, REG_ARG_2); // arg2 = attribute name
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_load_method(emit_t *emit, qstr qst, bool is_super) {
if (is_super) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, 3); // arg2 = dest ptr
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_2, 2); // arg2 = dest ptr
emit_call_with_qstr_arg(emit, MP_F_LOAD_SUPER_METHOD, qst, REG_ARG_1); // arg1 = method name
} else {
vtype_kind_t vtype_base;
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
assert(vtype_base == VTYPE_PYOBJ);
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
emit_call_with_qstr_arg(emit, MP_F_LOAD_METHOD, qst, REG_ARG_2); // arg2 = method name
}
}
STATIC void emit_native_load_build_class(emit_t *emit) {
emit_native_pre(emit);
emit_call(emit, MP_F_LOAD_BUILD_CLASS);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_load_subscr(emit_t *emit) {
DEBUG_printf("load_subscr\n");
// need to compile: base[index]
// pop: index, base
// optimise case where index is an immediate
vtype_kind_t vtype_base = peek_vtype(emit, 1);
if (vtype_base == VTYPE_PYOBJ) {
// standard Python subscr
// TODO factor this implicit cast code with other uses of it
vtype_kind_t vtype_index = peek_vtype(emit, 0);
if (vtype_index == VTYPE_PYOBJ) {
emit_pre_pop_reg(emit, &vtype_index, REG_ARG_2);
} else {
emit_pre_pop_reg(emit, &vtype_index, REG_ARG_1);
emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, vtype_index, REG_ARG_2); // arg2 = type
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET);
}
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1);
emit_call_with_imm_arg(emit, MP_F_OBJ_SUBSCR, (mp_uint_t)MP_OBJ_SENTINEL, REG_ARG_3);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
} else {
// viper load
// TODO The different machine architectures have very different
// capabilities and requirements for loads, so probably best to
// write a completely separate load-optimiser for each one.
stack_info_t *top = peek_stack(emit, 0);
if (top->vtype == VTYPE_INT && top->kind == STACK_IMM) {
// index is an immediate
mp_int_t index_value = top->data.u_imm;
emit_pre_pop_discard(emit); // discard index
int reg_base = REG_ARG_1;
int reg_index = REG_ARG_2;
emit_pre_pop_reg_flexible(emit, &vtype_base, &reg_base, reg_index, reg_index);
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
// TODO optimise to use thumb ldrb r1, [r2, r3]
if (index_value != 0) {
// index is non-zero
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_ldrb_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
break;
}
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value);
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base
reg_base = reg_index;
}
ASM_LOAD8_REG_REG(emit->as, REG_RET, reg_base); // load from (base+index)
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
if (index_value != 0) {
// index is a non-zero immediate
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_ldrh_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
break;
}
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 1);
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 2*index to base
reg_base = reg_index;
}
ASM_LOAD16_REG_REG(emit->as, REG_RET, reg_base); // load from (base+2*index)
break;
}
case VTYPE_PTR32: {
// pointer to 32-bit memory
if (index_value != 0) {
// index is a non-zero immediate
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_ldr_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
break;
}
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 2);
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base
reg_base = reg_index;
}
ASM_LOAD32_REG_REG(emit->as, REG_RET, reg_base); // load from (base+4*index)
break;
}
default:
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't load from '%q'", vtype_to_qstr(vtype_base));
}
} else {
// index is not an immediate
vtype_kind_t vtype_index;
int reg_index = REG_ARG_2;
emit_pre_pop_reg_flexible(emit, &vtype_index, &reg_index, REG_ARG_1, REG_ARG_1);
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1);
if (vtype_index != VTYPE_INT && vtype_index != VTYPE_UINT) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't load with '%q' index", vtype_to_qstr(vtype_index));
}
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
// TODO optimise to use thumb ldrb r1, [r2, r3]
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD8_REG_REG(emit->as, REG_RET, REG_ARG_1); // store value to (base+index)
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index)
break;
}
case VTYPE_PTR32: {
// pointer to word-size memory
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_LOAD32_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+4*index)
break;
}
default:
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't load from '%q'", vtype_to_qstr(vtype_base));
}
}
emit_post_push_reg(emit, VTYPE_INT, REG_RET);
}
}
STATIC void emit_native_store_fast(emit_t *emit, qstr qst, mp_uint_t local_num) {
vtype_kind_t vtype;
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
if (local_num < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit)) {
emit_pre_pop_reg(emit, &vtype, reg_local_table[local_num]);
} else {
emit_pre_pop_reg(emit, &vtype, REG_TEMP0);
emit_native_mov_state_reg(emit, LOCAL_IDX_LOCAL_VAR(emit, local_num), REG_TEMP0);
2014-08-16 21:55:53 +01:00
}
emit_post(emit);
// check types
if (emit->local_vtype[local_num] == VTYPE_UNBOUND) {
// first time this local is assigned, so give it a type of the object stored in it
emit->local_vtype[local_num] = vtype;
} else if (emit->local_vtype[local_num] != vtype) {
// type of local is not the same as object stored in it
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"local '%q' has type '%q' but source is '%q'",
qst, vtype_to_qstr(emit->local_vtype[local_num]), vtype_to_qstr(vtype));
}
}
STATIC void emit_native_store_deref(emit_t *emit, qstr qst, mp_uint_t local_num) {
DEBUG_printf("store_deref(%s, " UINT_FMT ")\n", qstr_str(qst), local_num);
need_reg_single(emit, REG_TEMP0, 0);
need_reg_single(emit, REG_TEMP1, 0);
emit_native_load_fast(emit, qst, local_num);
vtype_kind_t vtype;
int reg_base = REG_TEMP0;
emit_pre_pop_reg_flexible(emit, &vtype, &reg_base, -1, -1);
int reg_src = REG_TEMP1;
emit_pre_pop_reg_flexible(emit, &vtype, &reg_src, reg_base, reg_base);
ASM_STORE_REG_REG_OFFSET(emit->as, reg_src, reg_base, 1);
emit_post(emit);
2013-12-11 00:41:43 +00:00
}
STATIC void emit_native_store_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind) {
if (kind == MP_EMIT_IDOP_LOCAL_FAST) {
emit_native_store_fast(emit, qst, local_num);
} else {
emit_native_store_deref(emit, qst, local_num);
}
}
STATIC void emit_native_store_global(emit_t *emit, qstr qst, int kind) {
MP_STATIC_ASSERT(MP_F_STORE_NAME + MP_EMIT_IDOP_GLOBAL_NAME == MP_F_STORE_NAME);
MP_STATIC_ASSERT(MP_F_STORE_NAME + MP_EMIT_IDOP_GLOBAL_GLOBAL == MP_F_STORE_GLOBAL);
if (kind == MP_EMIT_IDOP_GLOBAL_NAME) {
// mp_store_name, but needs conversion of object (maybe have mp_viper_store_name(obj, type))
vtype_kind_t vtype;
2014-08-15 23:47:59 +01:00
emit_pre_pop_reg(emit, &vtype, REG_ARG_2);
assert(vtype == VTYPE_PYOBJ);
2014-08-15 23:47:59 +01:00
} else {
vtype_kind_t vtype = peek_vtype(emit, 0);
if (vtype == VTYPE_PYOBJ) {
emit_pre_pop_reg(emit, &vtype, REG_ARG_2);
} else {
emit_pre_pop_reg(emit, &vtype, REG_ARG_1);
emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, vtype, REG_ARG_2); // arg2 = type
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET);
}
2014-08-15 23:47:59 +01:00
}
emit_call_with_qstr_arg(emit, MP_F_STORE_NAME + kind, qst, REG_ARG_1); // arg1 = name
2014-08-15 23:47:59 +01:00
emit_post(emit);
}
STATIC void emit_native_store_attr(emit_t *emit, qstr qst) {
vtype_kind_t vtype_base, vtype_val;
emit_pre_pop_reg_reg(emit, &vtype_base, REG_ARG_1, &vtype_val, REG_ARG_3); // arg1 = base, arg3 = value
assert(vtype_base == VTYPE_PYOBJ);
assert(vtype_val == VTYPE_PYOBJ);
emit_call_with_qstr_arg(emit, MP_F_STORE_ATTR, qst, REG_ARG_2); // arg2 = attribute name
emit_post(emit);
}
STATIC void emit_native_store_subscr(emit_t *emit) {
DEBUG_printf("store_subscr\n");
// need to compile: base[index] = value
// pop: index, base, value
// optimise case where index is an immediate
vtype_kind_t vtype_base = peek_vtype(emit, 1);
if (vtype_base == VTYPE_PYOBJ) {
// standard Python subscr
vtype_kind_t vtype_index = peek_vtype(emit, 0);
vtype_kind_t vtype_value = peek_vtype(emit, 2);
if (vtype_index != VTYPE_PYOBJ || vtype_value != VTYPE_PYOBJ) {
// need to implicitly convert non-objects to objects
// TODO do this properly
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_1, 3);
adjust_stack(emit, 3);
}
emit_pre_pop_reg_reg_reg(emit, &vtype_index, REG_ARG_2, &vtype_base, REG_ARG_1, &vtype_value, REG_ARG_3);
emit_call(emit, MP_F_OBJ_SUBSCR);
} else {
// viper store
// TODO The different machine architectures have very different
// capabilities and requirements for stores, so probably best to
// write a completely separate store-optimiser for each one.
stack_info_t *top = peek_stack(emit, 0);
if (top->vtype == VTYPE_INT && top->kind == STACK_IMM) {
// index is an immediate
mp_int_t index_value = top->data.u_imm;
emit_pre_pop_discard(emit); // discard index
vtype_kind_t vtype_value;
int reg_base = REG_ARG_1;
int reg_index = REG_ARG_2;
int reg_value = REG_ARG_3;
emit_pre_pop_reg_flexible(emit, &vtype_base, &reg_base, reg_index, reg_value);
#if N_X86
// special case: x86 needs byte stores to be from lower 4 regs (REG_ARG_3 is EDX)
emit_pre_pop_reg(emit, &vtype_value, reg_value);
#else
emit_pre_pop_reg_flexible(emit, &vtype_value, &reg_value, reg_base, reg_index);
#endif
if (vtype_value != VTYPE_BOOL && vtype_value != VTYPE_INT && vtype_value != VTYPE_UINT) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't store '%q'", vtype_to_qstr(vtype_value));
}
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
// TODO optimise to use thumb strb r1, [r2, r3]
if (index_value != 0) {
// index is non-zero
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_strb_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
break;
}
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value);
#if N_ARM
asm_arm_strb_reg_reg_reg(emit->as, reg_value, reg_base, reg_index);
return;
#endif
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base
reg_base = reg_index;
}
ASM_STORE8_REG_REG(emit->as, reg_value, reg_base); // store value to (base+index)
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
if (index_value != 0) {
// index is a non-zero immediate
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_strh_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
break;
}
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 1);
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 2*index to base
reg_base = reg_index;
}
ASM_STORE16_REG_REG(emit->as, reg_value, reg_base); // store value to (base+2*index)
break;
}
case VTYPE_PTR32: {
// pointer to 32-bit memory
if (index_value != 0) {
// index is a non-zero immediate
#if N_THUMB
if (index_value > 0 && index_value < 32) {
asm_thumb_str_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
break;
}
#endif
#if N_ARM
ASM_MOV_REG_IMM(emit->as, reg_index, index_value);
asm_arm_str_reg_reg_reg(emit->as, reg_value, reg_base, reg_index);
return;
#endif
ASM_MOV_REG_IMM(emit->as, reg_index, index_value << 2);
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base
reg_base = reg_index;
}
ASM_STORE32_REG_REG(emit->as, reg_value, reg_base); // store value to (base+4*index)
break;
}
default:
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't store to '%q'", vtype_to_qstr(vtype_base));
}
} else {
// index is not an immediate
vtype_kind_t vtype_index, vtype_value;
int reg_index = REG_ARG_2;
int reg_value = REG_ARG_3;
emit_pre_pop_reg_flexible(emit, &vtype_index, &reg_index, REG_ARG_1, reg_value);
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1);
if (vtype_index != VTYPE_INT && vtype_index != VTYPE_UINT) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't store with '%q' index", vtype_to_qstr(vtype_index));
}
#if N_X86
// special case: x86 needs byte stores to be from lower 4 regs (REG_ARG_3 is EDX)
emit_pre_pop_reg(emit, &vtype_value, reg_value);
#else
emit_pre_pop_reg_flexible(emit, &vtype_value, &reg_value, REG_ARG_1, reg_index);
#endif
if (vtype_value != VTYPE_BOOL && vtype_value != VTYPE_INT && vtype_value != VTYPE_UINT) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't store '%q'", vtype_to_qstr(vtype_value));
}
switch (vtype_base) {
case VTYPE_PTR8: {
// pointer to 8-bit memory
// TODO optimise to use thumb strb r1, [r2, r3]
#if N_ARM
asm_arm_strb_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#endif
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE8_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+index)
break;
}
case VTYPE_PTR16: {
// pointer to 16-bit memory
#if N_ARM
asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#endif
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE16_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+2*index)
break;
}
case VTYPE_PTR32: {
// pointer to 32-bit memory
#if N_ARM
asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
break;
#endif
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
ASM_STORE32_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+4*index)
break;
}
default:
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't store to '%q'", vtype_to_qstr(vtype_base));
}
}
}
}
STATIC void emit_native_delete_local(emit_t *emit, qstr qst, mp_uint_t local_num, int kind) {
if (kind == MP_EMIT_IDOP_LOCAL_FAST) {
// TODO: This is not compliant implementation. We could use MP_OBJ_SENTINEL
// to mark deleted vars but then every var would need to be checked on
// each access. Very inefficient, so just set value to None to enable GC.
emit_native_load_const_tok(emit, MP_TOKEN_KW_NONE);
emit_native_store_fast(emit, qst, local_num);
} else {
// TODO implement me!
}
2013-12-11 00:41:43 +00:00
}
STATIC void emit_native_delete_global(emit_t *emit, qstr qst, int kind) {
MP_STATIC_ASSERT(MP_F_DELETE_NAME + MP_EMIT_IDOP_GLOBAL_NAME == MP_F_DELETE_NAME);
MP_STATIC_ASSERT(MP_F_DELETE_NAME + MP_EMIT_IDOP_GLOBAL_GLOBAL == MP_F_DELETE_GLOBAL);
emit_native_pre(emit);
emit_call_with_qstr_arg(emit, MP_F_DELETE_NAME + kind, qst, REG_ARG_1);
emit_post(emit);
}
STATIC void emit_native_delete_attr(emit_t *emit, qstr qst) {
vtype_kind_t vtype_base;
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
assert(vtype_base == VTYPE_PYOBJ);
ASM_XOR_REG_REG(emit->as, REG_ARG_3, REG_ARG_3); // arg3 = value (null for delete)
emit_call_with_qstr_arg(emit, MP_F_STORE_ATTR, qst, REG_ARG_2); // arg2 = attribute name
emit_post(emit);
}
STATIC void emit_native_delete_subscr(emit_t *emit) {
vtype_kind_t vtype_index, vtype_base;
emit_pre_pop_reg_reg(emit, &vtype_index, REG_ARG_2, &vtype_base, REG_ARG_1); // index, base
assert(vtype_index == VTYPE_PYOBJ);
assert(vtype_base == VTYPE_PYOBJ);
emit_call_with_imm_arg(emit, MP_F_OBJ_SUBSCR, (mp_uint_t)MP_OBJ_NULL, REG_ARG_3);
}
STATIC void emit_native_subscr(emit_t *emit, int kind) {
if (kind == MP_EMIT_SUBSCR_LOAD) {
emit_native_load_subscr(emit);
} else if (kind == MP_EMIT_SUBSCR_STORE) {
emit_native_store_subscr(emit);
} else {
emit_native_delete_subscr(emit);
}
}
STATIC void emit_native_attr(emit_t *emit, qstr qst, int kind) {
if (kind == MP_EMIT_ATTR_LOAD) {
emit_native_load_attr(emit, qst);
} else if (kind == MP_EMIT_ATTR_STORE) {
emit_native_store_attr(emit, qst);
} else {
emit_native_delete_attr(emit, qst);
}
}
STATIC void emit_native_dup_top(emit_t *emit) {
DEBUG_printf("dup_top\n");
vtype_kind_t vtype;
int reg = REG_TEMP0;
emit_pre_pop_reg_flexible(emit, &vtype, &reg, -1, -1);
emit_post_push_reg_reg(emit, vtype, reg, vtype, reg);
}
STATIC void emit_native_dup_top_two(emit_t *emit) {
vtype_kind_t vtype0, vtype1;
emit_pre_pop_reg_reg(emit, &vtype0, REG_TEMP0, &vtype1, REG_TEMP1);
emit_post_push_reg_reg_reg_reg(emit, vtype1, REG_TEMP1, vtype0, REG_TEMP0, vtype1, REG_TEMP1, vtype0, REG_TEMP0);
}
STATIC void emit_native_pop_top(emit_t *emit) {
DEBUG_printf("pop_top\n");
emit_pre_pop_discard(emit);
emit_post(emit);
}
STATIC void emit_native_rot_two(emit_t *emit) {
DEBUG_printf("rot_two\n");
vtype_kind_t vtype0, vtype1;
emit_pre_pop_reg_reg(emit, &vtype0, REG_TEMP0, &vtype1, REG_TEMP1);
emit_post_push_reg_reg(emit, vtype0, REG_TEMP0, vtype1, REG_TEMP1);
}
STATIC void emit_native_rot_three(emit_t *emit) {
DEBUG_printf("rot_three\n");
vtype_kind_t vtype0, vtype1, vtype2;
emit_pre_pop_reg_reg_reg(emit, &vtype0, REG_TEMP0, &vtype1, REG_TEMP1, &vtype2, REG_TEMP2);
emit_post_push_reg_reg_reg(emit, vtype0, REG_TEMP0, vtype2, REG_TEMP2, vtype1, REG_TEMP1);
}
STATIC void emit_native_jump(emit_t *emit, mp_uint_t label) {
DEBUG_printf("jump(label=" UINT_FMT ")\n", label);
emit_native_pre(emit);
// need to commit stack because we are jumping elsewhere
need_stack_settled(emit);
ASM_JUMP(emit->as, label);
emit_post(emit);
}
STATIC void emit_native_jump_helper(emit_t *emit, bool cond, mp_uint_t label, bool pop) {
vtype_kind_t vtype = peek_vtype(emit, 0);
if (vtype == VTYPE_PYOBJ) {
emit_pre_pop_reg(emit, &vtype, REG_ARG_1);
if (!pop) {
adjust_stack(emit, 1);
}
emit_call(emit, MP_F_OBJ_IS_TRUE);
} else {
emit_pre_pop_reg(emit, &vtype, REG_RET);
if (!pop) {
adjust_stack(emit, 1);
}
if (!(vtype == VTYPE_BOOL || vtype == VTYPE_INT || vtype == VTYPE_UINT)) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't implicitly convert '%q' to 'bool'", vtype_to_qstr(vtype));
}
}
// For non-pop need to save the vtype so that emit_native_adjust_stack_size
// can use it. This is a bit of a hack.
if (!pop) {
emit->saved_stack_vtype = vtype;
}
// need to commit stack because we may jump elsewhere
need_stack_settled(emit);
// Emit the jump
if (cond) {
ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, label, vtype == VTYPE_PYOBJ);
} else {
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, label, vtype == VTYPE_PYOBJ);
}
if (!pop) {
adjust_stack(emit, -1);
}
emit_post(emit);
}
STATIC void emit_native_pop_jump_if(emit_t *emit, bool cond, mp_uint_t label) {
DEBUG_printf("pop_jump_if(cond=%u, label=" UINT_FMT ")\n", cond, label);
emit_native_jump_helper(emit, cond, label, true);
}
STATIC void emit_native_jump_if_or_pop(emit_t *emit, bool cond, mp_uint_t label) {
DEBUG_printf("jump_if_or_pop(cond=%u, label=" UINT_FMT ")\n", cond, label);
emit_native_jump_helper(emit, cond, label, false);
}
STATIC void emit_native_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_depth) {
if (except_depth > 0) {
exc_stack_entry_t *first_finally = NULL;
exc_stack_entry_t *prev_finally = NULL;
exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
for (; except_depth > 0; --except_depth, --e) {
if (e->is_finally && e->is_active) {
// Found an active finally handler
if (first_finally == NULL) {
first_finally = e;
}
if (prev_finally != NULL) {
// Mark prev finally as needed to unwind a jump
prev_finally->unwind_label = e->label;
}
prev_finally = e;
}
}
if (prev_finally == NULL) {
// No finally, handle the jump ourselves
// First, restore the exception handler address for the jump
if (e < emit->exc_stack) {
ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET);
} else {
ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label);
}
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
} else {
// Last finally should do our jump for us
// Mark finally as needing to decide the type of jump
prev_finally->unwind_label = UNWIND_LABEL_DO_FINAL_UNWIND;
ASM_MOV_REG_PCREL(emit->as, REG_RET, label & ~MP_EMIT_BREAK_FROM_FOR);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_RET);
// Cancel any active exception (see also emit_native_pop_except_jump)
emit_native_mov_reg_const(emit, REG_RET, MP_F_CONST_NONE_OBJ);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_RET);
// Jump to the innermost active finally
label = first_finally->label;
}
}
emit_native_jump(emit, label & ~MP_EMIT_BREAK_FROM_FOR);
}
STATIC void emit_native_setup_with(emit_t *emit, mp_uint_t label) {
// the context manager is on the top of the stack
// stack: (..., ctx_mgr)
// get __exit__ method
vtype_kind_t vtype;
emit_access_stack(emit, 1, &vtype, REG_ARG_1); // arg1 = ctx_mgr
assert(vtype == VTYPE_PYOBJ);
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
emit_call_with_qstr_arg(emit, MP_F_LOAD_METHOD, MP_QSTR___exit__, REG_ARG_2);
// stack: (..., ctx_mgr, __exit__, self)
emit_pre_pop_reg(emit, &vtype, REG_ARG_3); // self
emit_pre_pop_reg(emit, &vtype, REG_ARG_2); // __exit__
emit_pre_pop_reg(emit, &vtype, REG_ARG_1); // ctx_mgr
emit_post_push_reg(emit, vtype, REG_ARG_2); // __exit__
emit_post_push_reg(emit, vtype, REG_ARG_3); // self
// stack: (..., __exit__, self)
// REG_ARG_1=ctx_mgr
// get __enter__ method
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
emit_call_with_qstr_arg(emit, MP_F_LOAD_METHOD, MP_QSTR___enter__, REG_ARG_2); // arg2 = method name
// stack: (..., __exit__, self, __enter__, self)
// call __enter__ method
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, 2); // pointer to items, including meth and self
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW, 0, REG_ARG_1, 0, REG_ARG_2);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); // push return value of __enter__
// stack: (..., __exit__, self, as_value)
// need to commit stack because we may jump elsewhere
need_stack_settled(emit);
emit_native_push_exc_stack(emit, label, true);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_dup_top(emit);
// stack: (..., __exit__, self, as_value, as_value)
}
STATIC void emit_native_setup_block(emit_t *emit, mp_uint_t label, int kind) {
if (kind == MP_EMIT_SETUP_BLOCK_WITH) {
emit_native_setup_with(emit, label);
} else {
// Set up except and finally
emit_native_pre(emit);
need_stack_settled(emit);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_push_exc_stack(emit, label, kind == MP_EMIT_SETUP_BLOCK_FINALLY);
emit_post(emit);
}
}
STATIC void emit_native_with_cleanup(emit_t *emit, mp_uint_t label) {
// Note: 3 labels are reserved for this function, starting at *emit->label_slot
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// stack: (..., __exit__, self, as_value)
emit_native_pre(emit);
emit_native_leave_exc_stack(emit, false);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
adjust_stack(emit, -1);
// stack: (..., __exit__, self)
// Label for case where __exit__ is called from an unwind jump
emit_native_label_assign(emit, *emit->label_slot + 2);
// call __exit__
emit_post_push_imm(emit, VTYPE_PTR_NONE, 0);
emit_post_push_imm(emit, VTYPE_PTR_NONE, 0);
emit_post_push_imm(emit, VTYPE_PTR_NONE, 0);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, 5);
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW, 3, REG_ARG_1, 0, REG_ARG_2);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Replace exc with None and finish
emit_native_jump(emit, *emit->label_slot);
// nlr_catch
// Don't use emit_native_label_assign because this isn't a real finally label
mp_asm_base_label_assign(&emit->as->base, label);
// Leave with's exception handler
emit_native_leave_exc_stack(emit, true);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Adjust stack counter for: __exit__, self (implicitly discard as_value which is above self)
emit_native_adjust_stack_size(emit, 2);
// stack: (..., __exit__, self)
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit)); // get exc
// Check if exc is None and jump to non-exc handler if it is
emit_native_mov_reg_const(emit, REG_ARG_2, MP_F_CONST_NONE_OBJ);
ASM_JUMP_IF_REG_EQ(emit->as, REG_ARG_1, REG_ARG_2, *emit->label_slot + 2);
ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_2, REG_ARG_1, 0); // get type(exc)
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_ARG_2); // push type(exc)
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_ARG_1); // push exc value
emit_post_push_imm(emit, VTYPE_PTR_NONE, 0); // traceback info
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Stack: (..., __exit__, self, type(exc), exc, traceback)
// call __exit__ method
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, 5);
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW, 3, REG_ARG_1, 0, REG_ARG_2);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Stack: (...)
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// If REG_RET is true then we need to replace exception with None (swallow exception)
if (REG_ARG_1 != REG_RET) {
ASM_MOV_REG_REG(emit->as, REG_ARG_1, REG_RET);
}
emit_call(emit, MP_F_OBJ_IS_TRUE);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, *emit->label_slot + 1, true);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Replace exception with None
emit_native_label_assign(emit, *emit->label_slot);
emit_native_mov_reg_const(emit, REG_TEMP0, MP_F_CONST_NONE_OBJ);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_TEMP0);
// end of with cleanup nlr_catch block
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_label_assign(emit, *emit->label_slot + 1);
// Exception is in nlr_buf.ret_val slot
}
STATIC void emit_native_end_finally(emit_t *emit) {
// logic:
// exc = pop_stack
// if exc == None: pass
// else: raise exc
// the check if exc is None is done in the MP_F_NATIVE_RAISE stub
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
emit_native_pre(emit);
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
emit_call(emit, MP_F_NATIVE_RAISE);
// Get state for this finally and see if we need to unwind
exc_stack_entry_t *e = emit_native_pop_exc_stack(emit);
if (e->unwind_label != UNWIND_LABEL_UNUSED) {
ASM_MOV_REG_LOCAL(emit->as, REG_RET, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, *emit->label_slot, false);
if (e->unwind_label == UNWIND_LABEL_DO_FINAL_UNWIND) {
ASM_JUMP_REG(emit->as, REG_RET);
} else {
emit_native_jump(emit, e->unwind_label);
}
emit_native_label_assign(emit, *emit->label_slot);
}
emit_post(emit);
}
STATIC void emit_native_get_iter(emit_t *emit, bool use_stack) {
// perhaps the difficult one, as we want to rewrite for loops using native code
// in cases where we iterate over a Python object, can we use normal runtime calls?
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_ARG_1);
assert(vtype == VTYPE_PYOBJ);
if (use_stack) {
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_2, MP_OBJ_ITER_BUF_NSLOTS);
emit_call(emit, MP_F_NATIVE_GETITER);
} else {
// mp_getiter will allocate the iter_buf on the heap
ASM_MOV_REG_IMM(emit->as, REG_ARG_2, 0);
emit_call(emit, MP_F_NATIVE_GETITER);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
}
STATIC void emit_native_for_iter(emit_t *emit, mp_uint_t label) {
emit_native_pre(emit);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_1, MP_OBJ_ITER_BUF_NSLOTS);
adjust_stack(emit, MP_OBJ_ITER_BUF_NSLOTS);
emit_call(emit, MP_F_NATIVE_ITERNEXT);
#if MICROPY_DEBUG_MP_OBJ_SENTINELS
ASM_MOV_REG_IMM(emit->as, REG_TEMP1, (mp_uint_t)MP_OBJ_STOP_ITERATION);
ASM_JUMP_IF_REG_EQ(emit->as, REG_RET, REG_TEMP1, label);
#else
MP_STATIC_ASSERT(MP_OBJ_STOP_ITERATION == 0);
ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, label, false);
#endif
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_for_iter_end(emit_t *emit) {
// adjust stack counter (we get here from for_iter ending, which popped the value for us)
emit_native_pre(emit);
adjust_stack(emit, -MP_OBJ_ITER_BUF_NSLOTS);
emit_post(emit);
}
STATIC void emit_native_pop_except_jump(emit_t *emit, mp_uint_t label, bool within_exc_handler) {
if (within_exc_handler) {
// Cancel any active exception so subsequent handlers don't see it
emit_native_mov_reg_const(emit, REG_TEMP0, MP_F_CONST_NONE_OBJ);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_TEMP0);
} else {
emit_native_leave_exc_stack(emit, false);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
}
emit_native_jump(emit, label);
}
STATIC void emit_native_unary_op(emit_t *emit, mp_unary_op_t op) {
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_ARG_2);
if (vtype == VTYPE_PYOBJ) {
emit_call_with_imm_arg(emit, MP_F_UNARY_OP, op, REG_ARG_1);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
} else {
adjust_stack(emit, 1);
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"unary op %q not implemented", mp_unary_op_method_name[op]);
}
}
STATIC void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) {
DEBUG_printf("binary_op(" UINT_FMT ")\n", op);
vtype_kind_t vtype_lhs = peek_vtype(emit, 1);
vtype_kind_t vtype_rhs = peek_vtype(emit, 0);
if (vtype_lhs == VTYPE_INT && vtype_rhs == VTYPE_INT) {
// for integers, inplace and normal ops are equivalent, so use just normal ops
if (MP_BINARY_OP_INPLACE_OR <= op && op <= MP_BINARY_OP_INPLACE_POWER) {
op += MP_BINARY_OP_OR - MP_BINARY_OP_INPLACE_OR;
}
#if N_X64 || N_X86
// special cases for x86 and shifting
if (op == MP_BINARY_OP_LSHIFT || op == MP_BINARY_OP_RSHIFT) {
#if N_X64
emit_pre_pop_reg_reg(emit, &vtype_rhs, ASM_X64_REG_RCX, &vtype_lhs, REG_RET);
#else
emit_pre_pop_reg_reg(emit, &vtype_rhs, ASM_X86_REG_ECX, &vtype_lhs, REG_RET);
#endif
if (op == MP_BINARY_OP_LSHIFT) {
ASM_LSL_REG(emit->as, REG_RET);
} else {
ASM_ASR_REG(emit->as, REG_RET);
}
emit_post_push_reg(emit, VTYPE_INT, REG_RET);
return;
}
#endif
// special cases for floor-divide and module because we dispatch to helper functions
if (op == MP_BINARY_OP_FLOOR_DIVIDE || op == MP_BINARY_OP_MODULO) {
emit_pre_pop_reg_reg(emit, &vtype_rhs, REG_ARG_2, &vtype_lhs, REG_ARG_1);
if (op == MP_BINARY_OP_FLOOR_DIVIDE) {
emit_call(emit, MP_F_SMALL_INT_FLOOR_DIVIDE);
} else {
emit_call(emit, MP_F_SMALL_INT_MODULO);
}
emit_post_push_reg(emit, VTYPE_INT, REG_RET);
return;
}
int reg_rhs = REG_ARG_3;
emit_pre_pop_reg_flexible(emit, &vtype_rhs, &reg_rhs, REG_RET, REG_ARG_2);
emit_pre_pop_reg(emit, &vtype_lhs, REG_ARG_2);
if (0) {
// dummy
#if !(N_X64 || N_X86)
} else if (op == MP_BINARY_OP_LSHIFT) {
ASM_LSL_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_RSHIFT) {
ASM_ASR_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
#endif
} else if (op == MP_BINARY_OP_OR) {
ASM_OR_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_XOR) {
ASM_XOR_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_AND) {
ASM_AND_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_ADD) {
ASM_ADD_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_SUBTRACT) {
ASM_SUB_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (op == MP_BINARY_OP_MULTIPLY) {
ASM_MUL_REG_REG(emit->as, REG_ARG_2, reg_rhs);
emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2);
} else if (MP_BINARY_OP_LESS <= op && op <= MP_BINARY_OP_NOT_EQUAL) {
// comparison ops are (in enum order):
// MP_BINARY_OP_LESS
// MP_BINARY_OP_MORE
// MP_BINARY_OP_EQUAL
// MP_BINARY_OP_LESS_EQUAL
// MP_BINARY_OP_MORE_EQUAL
// MP_BINARY_OP_NOT_EQUAL
need_reg_single(emit, REG_RET, 0);
#if N_X64
asm_x64_xor_r64_r64(emit->as, REG_RET, REG_RET);
asm_x64_cmp_r64_with_r64(emit->as, reg_rhs, REG_ARG_2);
static byte ops[6] = {
ASM_X64_CC_JL,
ASM_X64_CC_JG,
ASM_X64_CC_JE,
ASM_X64_CC_JLE,
ASM_X64_CC_JGE,
ASM_X64_CC_JNE,
};
asm_x64_setcc_r8(emit->as, ops[op - MP_BINARY_OP_LESS], REG_RET);
#elif N_X86
asm_x86_xor_r32_r32(emit->as, REG_RET, REG_RET);
asm_x86_cmp_r32_with_r32(emit->as, reg_rhs, REG_ARG_2);
static byte ops[6] = {
ASM_X86_CC_JL,
ASM_X86_CC_JG,
ASM_X86_CC_JE,
ASM_X86_CC_JLE,
ASM_X86_CC_JGE,
ASM_X86_CC_JNE,
};
asm_x86_setcc_r8(emit->as, ops[op - MP_BINARY_OP_LESS], REG_RET);
#elif N_THUMB
asm_thumb_cmp_rlo_rlo(emit->as, REG_ARG_2, reg_rhs);
static uint16_t ops[6] = {
ASM_THUMB_OP_ITE_GE,
ASM_THUMB_OP_ITE_GT,
ASM_THUMB_OP_ITE_EQ,
ASM_THUMB_OP_ITE_GT,
ASM_THUMB_OP_ITE_GE,
ASM_THUMB_OP_ITE_EQ,
};
static byte ret[6] = { 0, 1, 1, 0, 1, 0, };
asm_thumb_op16(emit->as, ops[op - MP_BINARY_OP_LESS]);
asm_thumb_mov_rlo_i8(emit->as, REG_RET, ret[op - MP_BINARY_OP_LESS]);
asm_thumb_mov_rlo_i8(emit->as, REG_RET, ret[op - MP_BINARY_OP_LESS] ^ 1);
#elif N_ARM
asm_arm_cmp_reg_reg(emit->as, REG_ARG_2, reg_rhs);
static uint ccs[6] = {
ASM_ARM_CC_LT,
ASM_ARM_CC_GT,
ASM_ARM_CC_EQ,
ASM_ARM_CC_LE,
ASM_ARM_CC_GE,
ASM_ARM_CC_NE,
};
asm_arm_setcc_reg(emit->as, REG_RET, ccs[op - MP_BINARY_OP_LESS]);
#elif N_XTENSA
static uint8_t ccs[6] = {
ASM_XTENSA_CC_LT,
0x80 | ASM_XTENSA_CC_LT, // for GT we'll swap args
ASM_XTENSA_CC_EQ,
0x80 | ASM_XTENSA_CC_GE, // for LE we'll swap args
ASM_XTENSA_CC_GE,
ASM_XTENSA_CC_NE,
};
uint8_t cc = ccs[op - MP_BINARY_OP_LESS];
if ((cc & 0x80) == 0) {
asm_xtensa_setcc_reg_reg_reg(emit->as, cc, REG_RET, REG_ARG_2, reg_rhs);
} else {
asm_xtensa_setcc_reg_reg_reg(emit->as, cc & ~0x80, REG_RET, reg_rhs, REG_ARG_2);
}
#else
#error not implemented
#endif
emit_post_push_reg(emit, VTYPE_BOOL, REG_RET);
} else {
// TODO other ops not yet implemented
adjust_stack(emit, 1);
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"binary op %q not implemented", mp_binary_op_method_name[op]);
}
} else if (vtype_lhs == VTYPE_PYOBJ && vtype_rhs == VTYPE_PYOBJ) {
emit_pre_pop_reg_reg(emit, &vtype_rhs, REG_ARG_3, &vtype_lhs, REG_ARG_2);
bool invert = false;
if (op == MP_BINARY_OP_NOT_IN) {
invert = true;
op = MP_BINARY_OP_IN;
} else if (op == MP_BINARY_OP_IS_NOT) {
invert = true;
op = MP_BINARY_OP_IS;
}
emit_call_with_imm_arg(emit, MP_F_BINARY_OP, op, REG_ARG_1);
if (invert) {
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET);
emit_call_with_imm_arg(emit, MP_F_UNARY_OP, MP_UNARY_OP_NOT, REG_ARG_1);
}
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
} else {
adjust_stack(emit, -1);
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"can't do binary op between '%q' and '%q'",
vtype_to_qstr(vtype_lhs), vtype_to_qstr(vtype_rhs));
}
}
#if MICROPY_PY_BUILTINS_SLICE
STATIC void emit_native_build_slice(emit_t *emit, mp_uint_t n_args);
#endif
STATIC void emit_native_build(emit_t *emit, mp_uint_t n_args, int kind) {
// for viper: call runtime, with types of args
// if wrapped in byte_array, or something, allocates memory and fills it
MP_STATIC_ASSERT(MP_F_BUILD_TUPLE + MP_EMIT_BUILD_TUPLE == MP_F_BUILD_TUPLE);
MP_STATIC_ASSERT(MP_F_BUILD_TUPLE + MP_EMIT_BUILD_LIST == MP_F_BUILD_LIST);
MP_STATIC_ASSERT(MP_F_BUILD_TUPLE + MP_EMIT_BUILD_MAP == MP_F_BUILD_MAP);
MP_STATIC_ASSERT(MP_F_BUILD_TUPLE + MP_EMIT_BUILD_SET == MP_F_BUILD_SET);
#if MICROPY_PY_BUILTINS_SLICE
if (kind == MP_EMIT_BUILD_SLICE) {
emit_native_build_slice(emit, n_args);
return;
}
#endif
emit_native_pre(emit);
if (kind == MP_EMIT_BUILD_TUPLE || kind == MP_EMIT_BUILD_LIST || kind == MP_EMIT_BUILD_SET) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items
}
emit_call_with_imm_arg(emit, MP_F_BUILD_TUPLE + kind, n_args, REG_ARG_1);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); // new tuple/list/map/set
}
STATIC void emit_native_store_map(emit_t *emit) {
vtype_kind_t vtype_key, vtype_value, vtype_map;
emit_pre_pop_reg_reg_reg(emit, &vtype_key, REG_ARG_2, &vtype_value, REG_ARG_3, &vtype_map, REG_ARG_1); // key, value, map
assert(vtype_key == VTYPE_PYOBJ);
assert(vtype_value == VTYPE_PYOBJ);
assert(vtype_map == VTYPE_PYOBJ);
emit_call(emit, MP_F_STORE_MAP);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); // map
}
#if MICROPY_PY_BUILTINS_SLICE
STATIC void emit_native_build_slice(emit_t *emit, mp_uint_t n_args) {
DEBUG_printf("build_slice %d\n", n_args);
if (n_args == 2) {
vtype_kind_t vtype_start, vtype_stop;
emit_pre_pop_reg_reg(emit, &vtype_stop, REG_ARG_2, &vtype_start, REG_ARG_1); // arg1 = start, arg2 = stop
assert(vtype_start == VTYPE_PYOBJ);
assert(vtype_stop == VTYPE_PYOBJ);
emit_native_mov_reg_const(emit, REG_ARG_3, MP_F_CONST_NONE_OBJ); // arg3 = step
} else {
assert(n_args == 3);
vtype_kind_t vtype_start, vtype_stop, vtype_step;
emit_pre_pop_reg_reg_reg(emit, &vtype_step, REG_ARG_3, &vtype_stop, REG_ARG_2, &vtype_start, REG_ARG_1); // arg1 = start, arg2 = stop, arg3 = step
assert(vtype_start == VTYPE_PYOBJ);
assert(vtype_stop == VTYPE_PYOBJ);
assert(vtype_step == VTYPE_PYOBJ);
}
emit_call(emit, MP_F_NEW_SLICE);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
#endif
STATIC void emit_native_store_comp(emit_t *emit, scope_kind_t kind, mp_uint_t collection_index) {
mp_fun_kind_t f;
if (kind == SCOPE_LIST_COMP) {
vtype_kind_t vtype_item;
emit_pre_pop_reg(emit, &vtype_item, REG_ARG_2);
assert(vtype_item == VTYPE_PYOBJ);
f = MP_F_LIST_APPEND;
#if MICROPY_PY_BUILTINS_SET
} else if (kind == SCOPE_SET_COMP) {
vtype_kind_t vtype_item;
emit_pre_pop_reg(emit, &vtype_item, REG_ARG_2);
assert(vtype_item == VTYPE_PYOBJ);
f = MP_F_STORE_SET;
#endif
} else {
// SCOPE_DICT_COMP
vtype_kind_t vtype_key, vtype_value;
emit_pre_pop_reg_reg(emit, &vtype_key, REG_ARG_2, &vtype_value, REG_ARG_3);
assert(vtype_key == VTYPE_PYOBJ);
assert(vtype_value == VTYPE_PYOBJ);
f = MP_F_STORE_MAP;
}
vtype_kind_t vtype_collection;
emit_access_stack(emit, collection_index, &vtype_collection, REG_ARG_1);
assert(vtype_collection == VTYPE_PYOBJ);
emit_call(emit, f);
emit_post(emit);
}
STATIC void emit_native_unpack_sequence(emit_t *emit, mp_uint_t n_args) {
DEBUG_printf("unpack_sequence %d\n", n_args);
vtype_kind_t vtype_base;
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = seq
assert(vtype_base == VTYPE_PYOBJ);
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, n_args); // arg3 = dest ptr
emit_call_with_imm_arg(emit, MP_F_UNPACK_SEQUENCE, n_args, REG_ARG_2); // arg2 = n_args
}
STATIC void emit_native_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right) {
DEBUG_printf("unpack_ex %d %d\n", n_left, n_right);
vtype_kind_t vtype_base;
emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = seq
assert(vtype_base == VTYPE_PYOBJ);
emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, n_left + n_right + 1); // arg3 = dest ptr
emit_call_with_imm_arg(emit, MP_F_UNPACK_EX, n_left | (n_right << 8), REG_ARG_2); // arg2 = n_left + n_right
}
STATIC void emit_native_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
// call runtime, with type info for args, or don't support dict/default params, or only support Python objects for them
emit_native_pre(emit);
if (n_pos_defaults == 0 && n_kw_defaults == 0) {
need_reg_all(emit);
ASM_MOV_REG_IMM(emit->as, REG_ARG_2, (mp_uint_t)MP_OBJ_NULL);
ASM_MOV_REG_IMM(emit->as, REG_ARG_3, (mp_uint_t)MP_OBJ_NULL);
} else {
vtype_kind_t vtype_def_tuple, vtype_def_dict;
emit_pre_pop_reg_reg(emit, &vtype_def_dict, REG_ARG_3, &vtype_def_tuple, REG_ARG_2);
assert(vtype_def_tuple == VTYPE_PYOBJ);
assert(vtype_def_dict == VTYPE_PYOBJ);
need_reg_all(emit);
}
emit_load_reg_with_raw_code(emit, REG_ARG_1, scope->raw_code);
ASM_CALL_IND(emit->as, MP_F_MAKE_FUNCTION_FROM_RAW_CODE);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
emit_native_pre(emit);
if (n_pos_defaults == 0 && n_kw_defaults == 0) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_closed_over);
ASM_MOV_REG_IMM(emit->as, REG_ARG_2, n_closed_over);
} else {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_closed_over + 2);
ASM_MOV_REG_IMM(emit->as, REG_ARG_2, 0x100 | n_closed_over);
}
emit_load_reg_with_raw_code(emit, REG_ARG_1, scope->raw_code);
ASM_CALL_IND(emit->as, MP_F_MAKE_CLOSURE_FROM_RAW_CODE);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
STATIC void emit_native_call_function(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags) {
DEBUG_printf("call_function(n_pos=" UINT_FMT ", n_kw=" UINT_FMT ", star_flags=" UINT_FMT ")\n", n_positional, n_keyword, star_flags);
// TODO: in viper mode, call special runtime routine with type info for args,
// and wanted type info for return, to remove need for boxing/unboxing
emit_native_pre(emit);
vtype_kind_t vtype_fun = peek_vtype(emit, n_positional + 2 * n_keyword);
if (vtype_fun == VTYPE_BUILTIN_CAST) {
// casting operator
assert(n_positional == 1 && n_keyword == 0);
assert(!star_flags);
DEBUG_printf(" cast to %d\n", vtype_fun);
vtype_kind_t vtype_cast = peek_stack(emit, 1)->data.u_imm;
switch (peek_vtype(emit, 0)) {
case VTYPE_PYOBJ: {
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_ARG_1);
emit_pre_pop_discard(emit);
emit_call_with_imm_arg(emit, MP_F_CONVERT_OBJ_TO_NATIVE, vtype_cast, REG_ARG_2); // arg2 = type
emit_post_push_reg(emit, vtype_cast, REG_RET);
break;
}
case VTYPE_BOOL:
case VTYPE_INT:
case VTYPE_UINT:
case VTYPE_PTR:
case VTYPE_PTR8:
case VTYPE_PTR16:
case VTYPE_PTR32:
case VTYPE_PTR_NONE:
emit_fold_stack_top(emit, REG_ARG_1);
emit_post_top_set_vtype(emit, vtype_cast);
break;
default:
// this can happen when casting a cast: int(int)
mp_raise_NotImplementedError("casting");
}
} else {
assert(vtype_fun == VTYPE_PYOBJ);
if (star_flags) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_positional + 2 * n_keyword + 3); // pointer to args
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW_VAR, 0, REG_ARG_1, n_positional | (n_keyword << 8), REG_ARG_2);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
} else {
if (n_positional != 0 || n_keyword != 0) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_positional + 2 * n_keyword); // pointer to args
}
emit_pre_pop_reg(emit, &vtype_fun, REG_ARG_1); // the function
emit_call_with_imm_arg(emit, MP_F_NATIVE_CALL_FUNCTION_N_KW, n_positional | (n_keyword << 8), REG_ARG_2);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
}
}
STATIC void emit_native_call_method(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_keyword, mp_uint_t star_flags) {
if (star_flags) {
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_positional + 2 * n_keyword + 4); // pointer to args
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW_VAR, 1, REG_ARG_1, n_positional | (n_keyword << 8), REG_ARG_2);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
} else {
emit_native_pre(emit);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, 2 + n_positional + 2 * n_keyword); // pointer to items, including meth and self
emit_call_with_2_imm_args(emit, MP_F_CALL_METHOD_N_KW, n_positional, REG_ARG_1, n_keyword, REG_ARG_2);
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
}
}
STATIC void emit_native_return_value(emit_t *emit) {
DEBUG_printf("return_value\n");
if (emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR) {
// Save pointer to current stack position for caller to access return value
emit_get_stack_pointer_to_reg_for_pop(emit, REG_TEMP0, 1);
emit_native_mov_state_reg(emit, offsetof(mp_code_state_t, sp) / sizeof(uintptr_t), REG_TEMP0);
// Put return type in return value slot
ASM_MOV_REG_IMM(emit->as, REG_TEMP0, MP_VM_RETURN_NORMAL);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_RET_VAL(emit), REG_TEMP0);
// Do the unwinding jump to get to the return handler
emit_native_unwind_jump(emit, emit->exit_label, emit->exc_stack_size);
emit->last_emit_was_return_value = true;
return;
}
if (emit->do_viper_types) {
vtype_kind_t return_vtype = emit->scope->scope_flags >> MP_SCOPE_FLAG_VIPERRET_POS;
if (peek_vtype(emit, 0) == VTYPE_PTR_NONE) {
emit_pre_pop_discard(emit);
if (return_vtype == VTYPE_PYOBJ) {
emit_native_mov_reg_const(emit, REG_RET, MP_F_CONST_NONE_OBJ);
} else {
ASM_MOV_REG_IMM(emit->as, REG_ARG_1, 0);
}
} else {
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, return_vtype == VTYPE_PYOBJ ? REG_RET : REG_ARG_1);
if (vtype != return_vtype) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
"return expected '%q' but got '%q'",
vtype_to_qstr(return_vtype), vtype_to_qstr(vtype));
2014-08-15 23:47:59 +01:00
}
}
if (return_vtype != VTYPE_PYOBJ) {
emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, return_vtype, REG_ARG_2);
}
} else {
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_RET);
assert(vtype == VTYPE_PYOBJ);
}
if (NEED_GLOBAL_EXC_HANDLER(emit)) {
// Save return value for the global exception handler to use
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_RET_VAL(emit), REG_RET);
}
emit_native_unwind_jump(emit, emit->exit_label, emit->exc_stack_size);
emit->last_emit_was_return_value = true;
}
STATIC void emit_native_raise_varargs(emit_t *emit, mp_uint_t n_args) {
(void)n_args;
assert(n_args == 1);
vtype_kind_t vtype_exc;
emit_pre_pop_reg(emit, &vtype_exc, REG_ARG_1); // arg1 = object to raise
if (vtype_exc != VTYPE_PYOBJ) {
EMIT_NATIVE_VIPER_TYPE_ERROR(emit, "must raise an object");
}
// TODO probably make this 1 call to the runtime (which could even call convert, native_raise(obj, type))
emit_call(emit, MP_F_NATIVE_RAISE);
}
STATIC void emit_native_yield(emit_t *emit, int kind) {
// Note: 1 (yield) or 3 (yield from) labels are reserved for this function, starting at *emit->label_slot
if (emit->do_viper_types) {
mp_raise_NotImplementedError("native yield");
}
emit->scope->scope_flags |= MP_SCOPE_FLAG_GENERATOR;
need_stack_settled(emit);
if (kind == MP_EMIT_YIELD_FROM) {
// Top of yield-from loop, conceptually implementing:
// for item in generator:
// yield item
// Jump to start of loop
emit_native_jump(emit, *emit->label_slot + 2);
// Label for top of loop
emit_native_label_assign(emit, *emit->label_slot + 1);
}
// Save pointer to current stack position for caller to access yielded value
emit_get_stack_pointer_to_reg_for_pop(emit, REG_TEMP0, 1);
emit_native_mov_state_reg(emit, offsetof(mp_code_state_t, sp) / sizeof(uintptr_t), REG_TEMP0);
// Put return type in return value slot
ASM_MOV_REG_IMM(emit->as, REG_TEMP0, MP_VM_RETURN_YIELD);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_RET_VAL(emit), REG_TEMP0);
// Save re-entry PC
ASM_MOV_REG_PCREL(emit->as, REG_TEMP0, *emit->label_slot);
emit_native_mov_state_reg(emit, LOCAL_IDX_GEN_PC(emit), REG_TEMP0);
// Jump to exit handler
ASM_JUMP(emit->as, emit->exit_label);
// Label re-entry point
mp_asm_base_label_assign(&emit->as->base, *emit->label_slot);
// Re-open any active exception handler
if (emit->exc_stack_size > 0) {
// Find innermost active exception handler, to restore as current handler
exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
for (; e >= emit->exc_stack; --e) {
if (e->is_active) {
// Found active handler, get its PC
ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label);
ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
}
}
}
emit_native_adjust_stack_size(emit, 1); // send_value
if (kind == MP_EMIT_YIELD_VALUE) {
// Check LOCAL_IDX_EXC_VAL for any injected value
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
emit_call(emit, MP_F_NATIVE_RAISE);
} else {
// Label loop entry
emit_native_label_assign(emit, *emit->label_slot + 2);
// Get the next item from the delegate generator
vtype_kind_t vtype;
emit_pre_pop_reg(emit, &vtype, REG_ARG_2); // send_value
emit_access_stack(emit, 1, &vtype, REG_ARG_1); // generator
ASM_MOV_REG_LOCAL(emit->as, REG_ARG_3, LOCAL_IDX_EXC_VAL(emit)); // throw_value
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_ARG_3);
emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, 1); // ret_value
emit_call(emit, MP_F_NATIVE_YIELD_FROM);
// If returned non-zero then generator continues
ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, *emit->label_slot + 1, true);
// Pop exhausted gen, replace with ret_value
emit_native_adjust_stack_size(emit, 1); // ret_value
emit_fold_stack_top(emit, REG_ARG_1);
}
}
STATIC void emit_native_start_except_handler(emit_t *emit) {
// Protected block has finished so leave the current exception handler
emit_native_leave_exc_stack(emit, true);
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
// Get and push nlr_buf.ret_val
ASM_MOV_REG_LOCAL(emit->as, REG_TEMP0, LOCAL_IDX_EXC_VAL(emit));
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_TEMP0);
}
STATIC void emit_native_end_except_handler(emit_t *emit) {
py/emitnative: Optimise and improve exception handling in native code. Prior to this patch, native code would use a full nlr_buf_t for each exception handler (try-except, try-finally, with). For nested exception handlers this would use a lot of C stack and be rather inefficient. This patch changes how exceptions are handled in native code by setting up only a single nlr_buf_t context for the entire function, and then manages a state machine (using the PC) to work out which exception handler to run when an exception is raised by an nlr_jump. This keeps the C stack usage at a constant level regardless of the depth of Python exception blocks. The patch also fixes an existing bug when local variables are written to within an exception handler, then their value was incorrectly restored if an exception was raised (since the nlr_jump would restore register values, back to the point of the nlr_push). And it also gets nested try-finally+with working with the viper emitter. Broadly speaking, efficiency of executing native code that doesn't use any exception blocks is unchanged, and emitted code size is only slightly increased for such function. C stack usage of all native functions is either equal or less than before. Emitted code size for native functions that use exception blocks is increased by roughly 10% (due in part to fixing of above-mentioned bugs). But, most importantly, this patch allows to implement more Python features in native code, like unwind jumps and yielding from within nested exception blocks.
2018-08-16 04:56:36 +01:00
adjust_stack(emit, -1); // pop the exception (end_finally didn't use it)
}
const emit_method_table_t EXPORT_FUN(method_table) = {
emit_native_start_pass,
emit_native_end_pass,
emit_native_last_emit_was_return_value,
emit_native_adjust_stack_size,
emit_native_set_source_line,
{
emit_native_load_local,
emit_native_load_global,
},
{
emit_native_store_local,
emit_native_store_global,
},
{
emit_native_delete_local,
emit_native_delete_global,
},
emit_native_label_assign,
emit_native_import,
emit_native_load_const_tok,
emit_native_load_const_small_int,
emit_native_load_const_str,
emit_native_load_const_obj,
emit_native_load_null,
emit_native_load_method,
emit_native_load_build_class,
emit_native_subscr,
emit_native_attr,
emit_native_dup_top,
emit_native_dup_top_two,
emit_native_pop_top,
emit_native_rot_two,
emit_native_rot_three,
emit_native_jump,
emit_native_pop_jump_if,
emit_native_jump_if_or_pop,
emit_native_unwind_jump,
emit_native_setup_block,
emit_native_with_cleanup,
emit_native_end_finally,
emit_native_get_iter,
emit_native_for_iter,
emit_native_for_iter_end,
emit_native_pop_except_jump,
emit_native_unary_op,
emit_native_binary_op,
emit_native_build,
emit_native_store_map,
emit_native_store_comp,
emit_native_unpack_sequence,
emit_native_unpack_ex,
emit_native_make_function,
emit_native_make_closure,
emit_native_call_function,
emit_native_call_method,
emit_native_return_value,
emit_native_raise_varargs,
emit_native_yield,
emit_native_start_except_handler,
emit_native_end_except_handler,
};
#endif