Skip to content

Commit

Permalink
Avoid generating machine code repeatly for same basic block
Browse files Browse the repository at this point in the history
Originally, every execution path had its own machine code. Therefore,
even if some basic blocks had been traced in other execution paths, we
still generated a new copy of machine code. To use the code cache space
more efficiently, we modified the recorded table to be global, allowing
every execution path to share the machine code. This modification
ensures that every basic block has only one copy of machine code, thus
saving code cache space.

For example, the code cache stored 1,926,471 (1.9 MB) bytes of machine
code for AES previously. After the modification, the code cache stored
182,730 (0.18 MB) bytes of machine code.
  • Loading branch information
qwe661234 committed Mar 11, 2024
1 parent 6877b22 commit eb9221c
Showing 1 changed file with 29 additions and 21 deletions.
50 changes: 29 additions & 21 deletions src/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
#define JIT_OP_MOD_REG (JIT_CLS_ALU | JIT_SRC_REG | 0x90)

#define STACK_SIZE 512
#define MAX_INSNS 1024
#define MAX_JUMPS 1024
#define MAX_INSNS 65536
#if defined(__x86_64__)
#define JUMP_LOC jump_loc + 2
/* Special values for target_pc in struct jump */
Expand Down Expand Up @@ -353,7 +354,7 @@ static inline void emit_jump_target_address(struct jit_state *state,
int32_t target_pc)
{
struct jump *jump = &state->jumps[state->n_jumps++];
assert(state->n_jumps < MAX_INSNS);
assert(state->n_jumps < MAX_JUMPS);
jump->offset_loc = state->offset;
jump->target_pc = target_pc;
emit4(state, 0);
Expand Down Expand Up @@ -555,7 +556,7 @@ static inline void emit_jump_target_offset(struct jit_state *state,
uint32_t jump_state_offset)
{
struct jump *jump = &state->jumps[state->n_jumps++];
assert(state->n_jumps < MAX_INSNS);
assert(state->n_jumps < MAX_JUMPS);
jump->offset_loc = jump_loc;
jump->target_offset = jump_state_offset;
}
Expand Down Expand Up @@ -933,7 +934,7 @@ static inline void emit_jmp(struct jit_state *state, uint32_t target_pc)
emit_jump_target_address(state, target_pc);
#elif defined(__aarch64__)
struct jump *jump = &state->jumps[state->n_jumps++];
assert(state->n_jumps < MAX_INSNS);
assert(state->n_jumps < MAX_JUMPS);
jump->offset_loc = state->offset;
jump->target_pc = target_pc;
emit_a64(state, UBR_B);
Expand Down Expand Up @@ -1531,29 +1532,30 @@ static void resolve_jumps(struct jit_state *state)
}
}

set_t set;

static void translate_chained_block(struct jit_state *state,
riscv_t *rv,
block_t *block,
set_t *set)
block_t *block)
{
if (set_has(set, block->pc_start))
if (set_has(&set, block->pc_start))
return;

set_add(set, block->pc_start);
set_add(&set, block->pc_start);
offset_map_insert(state, block->pc_start);
translate(state, rv, block);
rv_insn_t *ir = block->ir_tail;
if (ir->branch_untaken && !set_has(set, ir->branch_untaken->pc)) {
if (ir->branch_untaken && !set_has(&set, ir->branch_untaken->pc)) {
block_t *block1 =
cache_get(rv->block_cache, ir->branch_untaken->pc, false);
if (block1->translatable)
translate_chained_block(state, rv, block1, set);
translate_chained_block(state, rv, block1);
}
if (ir->branch_taken && !set_has(set, ir->branch_taken->pc)) {
if (ir->branch_taken && !set_has(&set, ir->branch_taken->pc)) {
block_t *block1 =
cache_get(rv->block_cache, ir->branch_taken->pc, false);
if (block1->translatable)
translate_chained_block(state, rv, block1, set);
translate_chained_block(state, rv, block1);
}
branch_history_table_t *bt = ir->branch_table;
if (bt) {
Expand All @@ -1565,26 +1567,30 @@ static void translate_chained_block(struct jit_state *state,
max_idx = i;
}
if (bt->PC[max_idx] && bt->times[max_idx] >= 256 &&
!set_has(set, bt->PC[max_idx])) {
!set_has(&set, bt->PC[max_idx])) {
block_t *block1 =
cache_get(rv->block_cache, bt->PC[max_idx], false);
if (block1 && block1->translatable)
translate_chained_block(state, rv, block1, set);
translate_chained_block(state, rv, block1);
}
}
}

uint32_t jit_translate(riscv_t *rv, block_t *block)
{
struct jit_state *state = rv->jit_state;
memset(state->offset_map, 0, MAX_INSNS * sizeof(struct offset_map));
memset(state->jumps, 0, MAX_INSNS * sizeof(struct jump));
state->n_insn = 0;
if (set_has(&set, block->pc_start)) {
for (int i = 0; i < state->n_insn; i++) {
if (block->pc_start == state->offset_map[i].pc) {
return state->offset_map[i].offset;
}
}
__UNREACHABLE;
}
memset(state->jumps, 0, 1024 * sizeof(struct jump));
state->n_jumps = 0;
uint32_t entry_loc = state->offset;
set_t set;
set_reset(&set);
translate_chained_block(&(*state), rv, block, &set);
translate_chained_block(&(*state), rv, block);
if (state->offset == state->size) {
printf("Target buffer too small\n");
goto out;
Expand All @@ -1607,10 +1613,12 @@ struct jit_state *jit_state_init(size_t size)
#endif
,
-1, 0);
state->n_insn = 0;
assert(state->buf != MAP_FAILED);
set_reset(&set);
prepare_translate(state);
state->offset_map = calloc(MAX_INSNS, sizeof(struct offset_map));
state->jumps = calloc(MAX_INSNS, sizeof(struct jump));
state->jumps = calloc(MAX_JUMPS, sizeof(struct jump));
return state;
}

Expand Down

0 comments on commit eb9221c

Please # to comment.