From c629f5dc2661dab0dfe74076ba5188b4d6a7c866 Mon Sep 17 00:00:00 2001 From: Stephen Dolan Date: Wed, 2 Jul 2014 21:54:48 +0100 Subject: [PATCH] Move tail call optimisations to compile.c. Needs more testing. --- compile.c | 21 +++++++++++++--- execute.c | 72 ------------------------------------------------------- 2 files changed, 18 insertions(+), 75 deletions(-) diff --git a/compile.c b/compile.c index 597fa6f145..48527675b2 100644 --- a/compile.c +++ b/compile.c @@ -74,6 +74,7 @@ static inst* inst_new(opcode op) { i->subfn = gen_noop(); i->arglist = gen_noop(); i->source = UNKNOWN_LOCATION; + i->compiled = 0; return i; } @@ -474,6 +475,12 @@ static int count_cfunctions(block b) { } +static int returns_immediately(inst* i) { + while (i->op == JUMP) + i = i->imm.target; + return i->op == RET; +} + // Expands call instructions into a calling sequence static int expand_call_arglist(struct locfile* locations, block* b) { int errors = 0; @@ -498,6 +505,12 @@ static int expand_call_arglist(struct locfile* locations, block* b) { case CLOSURE_CREATE: case CLOSURE_PARAM: { block callargs = gen_noop(); + /* A call is a tail call if: + a) the next instruction is a RET or a chain of unconditional JUMPs + that ends in a RET, and + b) none of the closures -callee included- are in the current bytecode block. */ + int is_tail_call = b->first && returns_immediately(b->first); + if (!curr->bound_by->compiled) is_tail_call = 0; for (inst* i; (i = block_take(&curr->arglist));) { assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO); block b = inst_block(i); @@ -511,10 +524,12 @@ static int expand_call_arglist(struct locfile* locations, block* b) { block_append(&callargs, gen_op_bound(CLOSURE_REF, b)); break; } + if (!i->bound_by->compiled) is_tail_call = 0; actual_args++; } curr->imm.intval = actual_args; curr->arglist = callargs; + if (is_tail_call) curr->op = TAIL_CALL_JQ; if (curr->bound_by->op == CLOSURE_CREATE) { for (inst* i = curr->bound_by->arglist.first; i; i = i->next) { @@ -559,13 +574,13 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) { int pos = 0; int var_frame_idx = 0; bc->nsubfunctions = 0; - errors += expand_call_arglist(locations, &b); b = BLOCK(b, gen_op_simple(RET)); + errors += expand_call_arglist(locations, &b); jv localnames = jv_array(); for (inst* curr = b.first; curr; curr = curr->next) { if (!curr->next) assert(curr == b.last); int length = opcode_describe(curr->op)->length; - if (curr->op == CALL_JQ) { + if (curr->op == CALL_JQ || curr->op == TAIL_CALL_JQ) { for (inst* arg = curr->arglist.first; arg; arg = arg->next) { length += 2; } @@ -639,7 +654,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) { assert(!curr->arglist.first); code[pos++] = (uint16_t)curr->imm.intval; code[pos++] = curr->bound_by->imm.intval; - } else if (curr->op == CALL_JQ) { + } else if (curr->op == CALL_JQ || curr->op == TAIL_CALL_JQ) { assert(curr->bound_by->op == CLOSURE_CREATE || curr->bound_by->op == CLOSURE_PARAM); code[pos++] = (uint16_t)curr->imm.intval; diff --git a/execute.c b/execute.c index 860e46b790..f0700ba35f 100644 --- a/execute.c +++ b/execute.c @@ -796,76 +796,6 @@ void jq_teardown(jq_state **jq) { jv_mem_free(old_jq); } -static int ret_follows(uint16_t *pc) { - if (*pc == RET) - return 1; - if (*pc++ != JUMP) - return 0; - return ret_follows(pc + *pc + 1); // FIXME, might be ironic -} - -/* - * Look for tail calls that can be optimized: tail calls with no - * references left to the current frame. - * - * We're staring at this bytecode layout: - * - * CALL_JQ - * - * (2 units) - * (2 units each) - * - * - * A closure is: - * - * (a relative frame count chased via the current frame's env) - * (an index of a subfunction or closure in that frame) - * - * We're looking for: - * - * a) the next instruction is a RET or a chain of unconditional JUMPs - * that ends in a RET, and - * - * b) none of the closures -callee included- have level == 0. - */ -static uint16_t tail_call_analyze(uint16_t *pc) { - assert(*pc == CALL_JQ); - pc++; - // + 1 for the callee closure - for (uint16_t nclosures = *pc++ + 1; nclosures > 0; pc++, nclosures--) { - if (*pc++ == 0) - return CALL_JQ; - } - if (ret_follows(pc)) - return TAIL_CALL_JQ; - return CALL_JQ; -} - -static struct bytecode *optimize_code(struct bytecode *bc) { - uint16_t *pc = bc->code; - // FIXME: Don't mutate bc->code... - while (pc < bc->code + bc->codelen) { - switch (*pc) { - case CALL_JQ: - *pc = tail_call_analyze(pc); - break; - - // Other bytecode optimizations here. A peephole optimizer would - // fit right in. - default: break; - } - pc += bytecode_operation_length(pc); - } - return bc; -} - -static struct bytecode *optimize(struct bytecode *bc) { - for (int i=0; insubfunctions; i++) { - bc->subfunctions[i] = optimize(bc->subfunctions[i]); - } - return optimize_code(bc); -} - int jq_compile_args(jq_state *jq, const char* str, jv args) { jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data); assert(jv_get_kind(args) == JV_KIND_ARRAY); @@ -903,8 +833,6 @@ int jq_compile_args(jq_state *jq, const char* str, jv args) { fprintf(stderr, "%s\n", jv_string_value(s)); jv_free(s); } - if (jq->bc) - jq->bc = optimize(jq->bc); locfile_free(&locations); return jq->bc != NULL; }