Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Macro operation fusion: sequential shift instructions #237

Merged
merged 1 commit into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ enum {
typedef struct {
int32_t imm;
uint8_t rd, rs1, rs2;
uint8_t opcode;
} opcode_fuse_t;

typedef struct rv_insn {
Expand Down
48 changes: 43 additions & 5 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ static uint32_t last_pc = 0;
_(fuse3) \
_(fuse4) \
_(fuse5) \
_(fuse6)
_(fuse6) \
_(fuse7)

enum {
rv_insn_fuse0 = N_RV_INSNS,
Expand Down Expand Up @@ -500,6 +501,20 @@ static bool do_fuse6(riscv_t *rv, const rv_insn_t *ir)
MUST_TAIL return next->impl(rv, next);
}

/* multiple shift immediate */
static bool do_fuse7(riscv_t *rv, const rv_insn_t *ir)
{
rv->csr_cycle += ir->imm2;
opcode_fuse_t *fuse = ir->fuse;
for (int i = 0; i < ir->imm2; i++)
shift_func(rv, (const rv_insn_t *) (&fuse[i]));
rv->PC += ir->imm2 * ir->insn_len;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir->next;
MUST_TAIL return next->impl(rv, next);
}

/* clang-format off */
static const void *dispatch_table[] = {
/* RV32 instructions */
Expand Down Expand Up @@ -608,9 +623,8 @@ static void block_translate(riscv_t *rv, block_map_t *map, block_t *block)
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t)); \
ir->impl = dispatch_table[ir->opcode]; \
next_ir = tmp_ir; \
for (int j = 1; j < count; j++, next_ir = next_ir->next) { \
for (int j = 1; j < count; j++, next_ir = next_ir->next) \
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
} \
remove_next_nth_ir(rv, ir, block, count - 1); \
}

Expand Down Expand Up @@ -986,9 +1000,8 @@ static void match_pattern(riscv_t *rv, block_t *block)
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t));
ir->impl = dispatch_table[ir->opcode];
next_ir = ir->next;
for (int j = 1; j < count; j++, next_ir = next_ir->next) {
for (int j = 1; j < count; j++, next_ir = next_ir->next)
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t));
}
remove_next_nth_ir(rv, ir, block, count - 1);
}
break;
Expand All @@ -1005,6 +1018,31 @@ static void match_pattern(riscv_t *rv, block_t *block)
break;
/* TODO: mixture of SW and LW */
/* TODO: reorder insturction to match pattern */
case rv_insn_slli:
count = 1;
next_ir = ir->next;
while (1) {
if (next_ir->opcode != rv_insn_slli &&
next_ir->opcode != rv_insn_srli &&
next_ir->opcode != rv_insn_srai)
break;
count++;
if (next_ir->tailcall)
break;
next_ir = next_ir->next;
}
if (count > 1) {
ir->fuse = malloc(count * sizeof(opcode_fuse_t));
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t));
ir->opcode = rv_insn_fuse7;
ir->imm2 = count;
ir->impl = dispatch_table[ir->opcode];
next_ir = ir->next;
for (int j = 1; j < count; j++, next_ir = next_ir->next)
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t));
remove_next_nth_ir(rv, ir, block, count - 1);
}
break;
}
}
}
Expand Down
21 changes: 18 additions & 3 deletions src/rv32_template.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,20 +172,35 @@ RVOP(ori, { rv->X[ir->rd] = rv->X[ir->rs1] | ir->imm; })
*/
RVOP(andi, { rv->X[ir->rd] = rv->X[ir->rs1] & ir->imm; })

FORCE_INLINE void shift_func(riscv_t *rv, const rv_insn_t *ir)
{
switch (ir->opcode) {
case rv_insn_slli:
rv->X[ir->rd] = rv->X[ir->rs1] << (ir->imm & 0x1f);
break;
case rv_insn_srli:
rv->X[ir->rd] = rv->X[ir->rs1] >> (ir->imm & 0x1f);
break;
case rv_insn_srai:
rv->X[ir->rd] = ((int32_t) rv->X[ir->rs1]) >> (ir->imm & 0x1f);
break;
}
};

/* SLLI performs logical left shift on the value in register rs1 by the shift
* amount held in the lower 5 bits of the immediate.
*/
RVOP(slli, { rv->X[ir->rd] = rv->X[ir->rs1] << (ir->imm & 0x1f); })
RVOP(slli, { shift_func(rv, ir); })

/* SRLI performs logical right shift on the value in register rs1 by the shift
* amount held in the lower 5 bits of the immediate.
*/
RVOP(srli, { rv->X[ir->rd] = rv->X[ir->rs1] >> (ir->imm & 0x1f); })
RVOP(srli, { shift_func(rv, ir); })

/* SRAI performs arithmetic right shift on the value in register rs1 by the
* shift amount held in the lower 5 bits of the immediate.
*/
RVOP(srai, { rv->X[ir->rd] = ((int32_t) rv->X[ir->rs1]) >> (ir->imm & 0x1f); })
RVOP(srai, { shift_func(rv, ir); })

/* ADD */
RVOP(add, {
Expand Down