From eb9221cbaea02d214029255d519ff38385d8ff83 Mon Sep 17 00:00:00 2001 From: Yen-Fu Chen Date: Mon, 11 Mar 2024 20:01:34 +0800 Subject: [PATCH] Avoid generating machine code repeatly for same basic block Originally, every execution path had its own machine code. Therefore, even if some basic blocks had been traced in other execution paths, we still generated a new copy of machine code. To use the code cache space more efficiently, we modified the recorded table to be global, allowing every execution path to share the machine code. This modification ensures that every basic block has only one copy of machine code, thus saving code cache space. For example, the code cache stored 1,926,471 (1.9 MB) bytes of machine code for AES previously. After the modification, the code cache stored 182,730 (0.18 MB) bytes of machine code. --- src/jit.c | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/src/jit.c b/src/jit.c index ae815b7da..f5a0ea78f 100644 --- a/src/jit.c +++ b/src/jit.c @@ -56,7 +56,8 @@ #define JIT_OP_MOD_REG (JIT_CLS_ALU | JIT_SRC_REG | 0x90) #define STACK_SIZE 512 -#define MAX_INSNS 1024 +#define MAX_JUMPS 1024 +#define MAX_INSNS 65536 #if defined(__x86_64__) #define JUMP_LOC jump_loc + 2 /* Special values for target_pc in struct jump */ @@ -353,7 +354,7 @@ static inline void emit_jump_target_address(struct jit_state *state, int32_t target_pc) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_INSNS); + assert(state->n_jumps < MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; emit4(state, 0); @@ -555,7 +556,7 @@ static inline void emit_jump_target_offset(struct jit_state *state, uint32_t jump_state_offset) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_INSNS); + assert(state->n_jumps < MAX_JUMPS); jump->offset_loc = jump_loc; jump->target_offset = jump_state_offset; } @@ -933,7 +934,7 @@ static inline void emit_jmp(struct jit_state *state, uint32_t target_pc) emit_jump_target_address(state, target_pc); #elif defined(__aarch64__) struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_INSNS); + assert(state->n_jumps < MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; emit_a64(state, UBR_B); @@ -1531,29 +1532,30 @@ static void resolve_jumps(struct jit_state *state) } } +set_t set; + static void translate_chained_block(struct jit_state *state, riscv_t *rv, - block_t *block, - set_t *set) + block_t *block) { - if (set_has(set, block->pc_start)) + if (set_has(&set, block->pc_start)) return; - set_add(set, block->pc_start); + set_add(&set, block->pc_start); offset_map_insert(state, block->pc_start); translate(state, rv, block); rv_insn_t *ir = block->ir_tail; - if (ir->branch_untaken && !set_has(set, ir->branch_untaken->pc)) { + if (ir->branch_untaken && !set_has(&set, ir->branch_untaken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_untaken->pc, false); if (block1->translatable) - translate_chained_block(state, rv, block1, set); + translate_chained_block(state, rv, block1); } - if (ir->branch_taken && !set_has(set, ir->branch_taken->pc)) { + if (ir->branch_taken && !set_has(&set, ir->branch_taken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_taken->pc, false); if (block1->translatable) - translate_chained_block(state, rv, block1, set); + translate_chained_block(state, rv, block1); } branch_history_table_t *bt = ir->branch_table; if (bt) { @@ -1565,11 +1567,11 @@ static void translate_chained_block(struct jit_state *state, max_idx = i; } if (bt->PC[max_idx] && bt->times[max_idx] >= 256 && - !set_has(set, bt->PC[max_idx])) { + !set_has(&set, bt->PC[max_idx])) { block_t *block1 = cache_get(rv->block_cache, bt->PC[max_idx], false); if (block1 && block1->translatable) - translate_chained_block(state, rv, block1, set); + translate_chained_block(state, rv, block1); } } } @@ -1577,14 +1579,18 @@ static void translate_chained_block(struct jit_state *state, uint32_t jit_translate(riscv_t *rv, block_t *block) { struct jit_state *state = rv->jit_state; - memset(state->offset_map, 0, MAX_INSNS * sizeof(struct offset_map)); - memset(state->jumps, 0, MAX_INSNS * sizeof(struct jump)); - state->n_insn = 0; + if (set_has(&set, block->pc_start)) { + for (int i = 0; i < state->n_insn; i++) { + if (block->pc_start == state->offset_map[i].pc) { + return state->offset_map[i].offset; + } + } + __UNREACHABLE; + } + memset(state->jumps, 0, 1024 * sizeof(struct jump)); state->n_jumps = 0; uint32_t entry_loc = state->offset; - set_t set; - set_reset(&set); - translate_chained_block(&(*state), rv, block, &set); + translate_chained_block(&(*state), rv, block); if (state->offset == state->size) { printf("Target buffer too small\n"); goto out; @@ -1607,10 +1613,12 @@ struct jit_state *jit_state_init(size_t size) #endif , -1, 0); + state->n_insn = 0; assert(state->buf != MAP_FAILED); + set_reset(&set); prepare_translate(state); state->offset_map = calloc(MAX_INSNS, sizeof(struct offset_map)); - state->jumps = calloc(MAX_INSNS, sizeof(struct jump)); + state->jumps = calloc(MAX_JUMPS, sizeof(struct jump)); return state; }