diff --git a/oyente/symExec.py b/oyente/symExec.py index 4576dbac..2da35e77 100755 --- a/oyente/symExec.py +++ b/oyente/symExec.py @@ -40,7 +40,10 @@ def initGlobalVars(): visited_pcs = set() global results - results = {} + results = { + "evm_code_coverage": "", "callstack": "", "concurrency": "", + "time_dependency": "", "reentrancy": "", "assertion_failure": "" + } # capturing the last statement of each basic block global end_ins_dict @@ -133,116 +136,6 @@ def compare_storage_and_gas_unit_test(global_state, analysis): test_status = unit_test.compare_with_symExec_result(global_state, analysis) exit(test_status) -def handler(signum, frame): - if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: - exit(TIME_OUT) - detect_bugs() - raise Exception("timeout") - -def detect_bugs(): - global results - global source_map - global visited_pcs - global global_problematic_pcs - - percentage_of_opcodes_covered = float(len(visited_pcs)) / len(instructions.keys()) * 100 - log.info("\t EVM code coverage: \t %s%%", round(percentage_of_opcodes_covered, 1)) - results["perct_evm_covered"] = str(round(percentage_of_opcodes_covered, 1)) - - log.debug("Checking for Callstack attack...") - run_callstack_attack() - - if global_params.REPORT_MODE: - rfile.write(str(total_no_of_paths) + "\n") - detect_money_concurrency() - detect_time_dependency() - stop = time.time() - if global_params.REPORT_MODE: - rfile.write(str(stop-start)) - rfile.close() - if global_params.DATA_FLOW: - detect_data_concurrency() - detect_data_money_concurrency() - log.debug("Results for Reentrancy Bug: " + str(reentrancy_all_paths)) - reentrancy_bug_found = any([v for sublist in reentrancy_all_paths for v in sublist]) - if not isTesting(): - s = "" - if reentrancy_bug_found and source_map != None: - pcs = global_problematic_pcs["reentrancy_bug"] - pcs = [pc for pc in pcs if source_map.find_source_code(pc)] - pcs = source_map.reduce_same_position_pcs(pcs) - s = source_map.to_str(pcs, "Reentrancy bug") - results["reentrancy"] = s - s = "\t Reentrancy bug: \t True" + s if s else "\t Reentrancy bug: \t False" - log.info(s) - - if global_params.CHECK_ASSERTIONS: - if source_map == None: - raise("Assertion checks need a Source Map") - pcs = [pc for pc in global_problematic_pcs["assertion_failure"] if "assert" in source_map.find_source_code(pc)] - pcs = source_map.reduce_same_position_pcs(pcs) - - if not isTesting(): - s = source_map.to_str(pcs, "Assertion failure") - results["assertion_failure"] = s - s = "\t Assertion failure: \t True" + s if s else "\t Assertion failure: \t False" - log.info(s) - - if global_params.WEB: - results_for_web() - -def main(contract, contract_sol, _source_map = None): - global c_name - global c_name_sol - global source_map - - c_name = contract - c_name_sol = contract_sol - source_map = _source_map - - check_unit_test_file() - initGlobalVars() - set_cur_file(c_name[4:] if len(c_name) > 5 else c_name) - start = time.time() - signal.signal(signal.SIGALRM, handler) - if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: - global_params.GLOBAL_TIMEOUT = global_params.GLOBAL_TIMEOUT_TEST - signal.alarm(global_params.GLOBAL_TIMEOUT) - atexit.register(closing_message) - - log.info("Running, please wait...") - - if not isTesting(): - log.info("\t============ Results ===========") - - try: - build_cfg_and_analyze() - log.debug("Done Symbolic execution") - except Exception as e: - if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: - log.exception(e) - exit(EXCEPTION) - traceback.print_exc() - raise e - signal.alarm(0) - - detect_bugs() - - -def results_for_web(): - global results - results["cname"] = source_map.get_cname().split(":")[1] - print "======= results =======" - print json.dumps(results) - -def closing_message(): - log.info("\t====== Analysis Completed ======") - if global_params.STORE_RESULT: - result_file = c_name + '.json' - with open(result_file, 'w') as of: - of.write(json.dumps(results, indent=1)) - log.info("Wrote results to %s.", result_file) - def change_format(): with open(c_name) as disasm_file: file_contents = disasm_file.readlines() @@ -276,8 +169,9 @@ def change_format(): with open(c_name, 'w') as disasm_file: disasm_file.write("\n".join(file_contents)) - def build_cfg_and_analyze(): + global source_map + change_format() with open(c_name, 'r') as disasm_file: disasm_file.readline() # Remove first line @@ -288,149 +182,67 @@ def build_cfg_and_analyze(): full_sym_exec() # jump targets are constructed on the fly -# Detect if a money flow depends on the timestamp -def detect_time_dependency(): - global results - global source_map +def print_cfg(): + for block in vertices.values(): + block.display() + log.debug(str(edges)) - TIMESTAMP_VAR = "IH_s" - is_dependant = False - pcs = [] - if global_params.PRINT_PATHS: - log.info("ALL PATH CONDITIONS") - for i, cond in enumerate(path_conditions): - if global_params.PRINT_PATHS: - log.info("PATH " + str(i + 1) + ": " + str(cond)) - for j, expr in enumerate(cond): - if is_expr(expr): - if TIMESTAMP_VAR in str(expr) and j in global_problematic_pcs["time_dependency_bug"][i]: - pcs.append(global_problematic_pcs["time_dependency_bug"][i][j]) - is_dependant = True - continue - if not isTesting(): - s = "" - if source_map != None: - pcs = [pc for pc in pcs if source_map.find_source_code(pc)] - pcs = source_map.reduce_same_position_pcs(pcs) - s = source_map.to_str(pcs, "Time dependency bug") - results["time_dependency"] = s - s = "\t Time dependency bug: \t True" + s if s else "\t Time dependency bug: \t False" - log.info(s) +def mapping_push_instruction(current_line_content, current_ins_address, idx, positions, length): + global source_map - if global_params.REPORT_MODE: - file_name = c_name.split("/")[len(c_name.split("/"))-1].split(".")[0] - report_file = file_name + '.report' - with open(report_file, 'w') as rfile: - if is_dependant: - rfile.write("yes\n") + instr_value = current_line_content.split(" ")[1] + while (idx < length): + name = positions[idx]['name'] + if name.startswith("tag"): + idx += 1 + else: + value = positions[idx]['value'] + if name.startswith("PUSH"): + if name == "PUSH": + if int(value, 16) == int(instr_value, 16): + source_map.set_instr_positions(current_ins_address, idx) + idx += 1 + break; + else: + raise Exception("Source map error") + else: + source_map.set_instr_positions(current_ins_address, idx) + idx += 1 + break; else: - rfile.write("no\n") + raise Exception("Source map error") + return idx - -# detect if two paths send money to different people -def detect_money_concurrency(): - global results +def mapping_non_push_instruction(current_line_content, current_ins_address, idx, positions, length): global source_map - n = len(money_flow_all_paths) - for i in range(n): - log.debug("Path " + str(i) + ": " + str(money_flow_all_paths[i])) - log.debug(all_gs[i]) - i = 0 - false_positive = [] - concurrency_paths = [] - pcs = [] - for flow in money_flow_all_paths: - i += 1 - if len(flow) == 1: - continue # pass all flows which do not do anything with money - for j in range(i, n): - jflow = money_flow_all_paths[j] - if len(jflow) == 1: - continue - if is_diff(flow, jflow): - pcs = global_problematic_pcs["money_concurrency_bug"][j] - concurrency_paths.append([i-1, j]) - if global_params.CHECK_CONCURRENCY_FP and \ - is_false_positive(i-1, j, all_gs, path_conditions) and \ - is_false_positive(j, i-1, all_gs, path_conditions): - false_positive.append([i-1, j]) - - # if PRINT_MODE: print "All false positive cases: ", false_positive - log.debug("Concurrency in paths: ") - if not isTesting(): - s = "" - if source_map != None: - pcs = [pc for pc in pcs if source_map.find_source_code(pc)] - pcs = source_map.reduce_same_position_pcs(pcs) - s = source_map.to_str(pcs, "Money concurrency bug") - results["concurrency"] = s - s = "\t Money concurrency bug: True" + s if s else "\t Money concurrency bug: False" - log.info(s) - if global_params.REPORT_MODE: - rfile.write("number of path: " + str(n) + "\n") - # number of FP detected - rfile.write(str(len(false_positive)) + "\n") - rfile.write(str(false_positive) + "\n") - # number of total races - rfile.write(str(len(concurrency_paths)) + "\n") - # all the races - rfile.write(str(concurrency_paths) + "\n") - - -# Detect if there is data concurrency in two different flows. -# e.g. if a flow modifies a value stored in the storage address and -# the other one reads that value in its execution -def detect_data_concurrency(): - sload_flows = data_flow_all_paths[0] - sstore_flows = data_flow_all_paths[1] - concurrency_addr = [] - for sflow in sstore_flows: - for addr in sflow: - for lflow in sload_flows: - if addr in lflow: - if not addr in concurrency_addr: - concurrency_addr.append(addr) - break - log.debug("data concurrency in storage " + str(concurrency_addr)) - -# Detect if any change in a storage address will result in a different -# flow of money. Currently I implement this detection by -# considering if a path condition contains -# a variable which is a storage address. -def detect_data_money_concurrency(): - n = len(money_flow_all_paths) - sstore_flows = data_flow_all_paths[1] - concurrency_addr = [] - for i in range(n): - cond = path_conditions[i] - list_vars = [] - for expr in cond: - list_vars += get_vars(expr) - set_vars = set(i.decl().name() for i in list_vars) - for sflow in sstore_flows: - for addr in sflow: - var_name = gen.gen_owner_store_var(addr) - if var_name in set_vars: - concurrency_addr.append(var_name) - log.debug("Concurrency in data that affects money flow: " + str(set(concurrency_addr))) - - -def print_cfg(): - for block in vertices.values(): - block.display() - log.debug(str(edges)) - + instr_name = current_line_content.split(" ")[0] + while (idx < length): + name = positions[idx]['name'] + if name.startswith("tag"): + idx += 1 + else: + if name == instr_name or name == "INVALID" and instr_name == "ASSERTFAIL" or name == "KECCAK256" and instr_name == "SHA3" or name == "SELFDESTRUCT" and instr_name == "SUICIDE": + source_map.set_instr_positions(current_ins_address, idx) + idx += 1 + break; + else: + raise Exception("Source map error") + return idx # 1. Parse the disassembled file # 2. Then identify each basic block (i.e. one-in, one-out) # 3. Store them in vertices def collect_vertices(tokens): + global source_map + if source_map: + idx = 0 + positions = source_map.get_positions() + length = len(positions) global end_ins_dict global instructions global jump_type - global source_map current_ins_address = 0 last_ins_address = 0 @@ -439,11 +251,6 @@ def collect_vertices(tokens): current_line_content = "" wait_for_push = False is_new_block = False - count = 0 - positions = [] - if source_map != None: - positions = source_map.get_positions() - length = len(positions) for tok_type, tok_string, (srow, scol), _, line_number in tokens: if wait_for_push is True: @@ -453,27 +260,7 @@ def collect_vertices(tokens): is_new_line = True current_line_content += push_val + ' ' instructions[current_ins_address] = current_line_content - instr_name, instr_value, _ = current_line_content.split(" ") - while (count < length): - name = positions[count]['name'] - if name.startswith("tag"): - count += 1 - else: - value = positions[count]['value'] - if name.startswith("PUSH"): - if name == "PUSH": - if int(value, 16) == int(instr_value, 16): - source_map.set_instr_positions(current_ins_address, count) - count += 1 - break; - else: - raise Exception("Source map error") - else: - source_map.set_instr_positions(current_ins_address, count) - count += 1 - break; - else: - raise Exception("Source map error") + idx = mapping_push_instruction(current_line_content, current_ins_address, idx, positions, length) if source_map else None log.debug(current_line_content) current_line_content = "" wait_for_push = False @@ -501,18 +288,7 @@ def collect_vertices(tokens): is_new_line = True log.debug(current_line_content) instructions[current_ins_address] = current_line_content - instr_name = current_line_content.split(" ")[0] - while (count < length): - name = positions[count]['name'] - if name.startswith("tag"): - count += 1 - else: - if name == instr_name or name == "INVALID" and instr_name == "ASSERTFAIL" or name == "KECCAK256" and instr_name == "SHA3" or name == "SELFDESTRUCT" and instr_name == "SUICIDE": - source_map.set_instr_positions(current_ins_address, count) - count += 1 - break; - else: - raise Exception("Source map error") + idx = mapping_non_push_instruction(current_line_content, current_ins_address, idx, positions, length) if source_map else None current_line_content = "" continue elif tok_type == NAME: @@ -2033,6 +1809,137 @@ def sym_exec_ins(start, instr, stack, mem, memory, global_state, sha3_list, path except: log.debug("Error: Debugging states") +# Detect if a money flow depends on the timestamp +def detect_time_dependency(): + global results + global source_map + + TIMESTAMP_VAR = "IH_s" + is_dependant = False + pcs = [] + if global_params.PRINT_PATHS: + log.info("ALL PATH CONDITIONS") + for i, cond in enumerate(path_conditions): + if global_params.PRINT_PATHS: + log.info("PATH " + str(i + 1) + ": " + str(cond)) + for j, expr in enumerate(cond): + if is_expr(expr): + if TIMESTAMP_VAR in str(expr) and j in global_problematic_pcs["time_dependency_bug"][i]: + pcs.append(global_problematic_pcs["time_dependency_bug"][i][j]) + is_dependant = True + continue + + if source_map: + pcs = [pc for pc in pcs if source_map.find_source_code(pc)] + pcs = source_map.reduce_same_position_pcs(pcs) + s = source_map.to_str(pcs, "Time dependency bug") + results["time_dependency"] = s + s = "\t Time dependency bug: \t True" + s if s else "\t Time dependency bug: \t False" + log.info(s) + else: + log.info("\t Timedependency bug: \t %s", bool(pcs)) + + if global_params.REPORT_MODE: + file_name = c_name.split("/")[len(c_name.split("/"))-1].split(".")[0] + report_file = file_name + '.report' + with open(report_file, 'w') as rfile: + if is_dependant: + rfile.write("yes\n") + else: + rfile.write("no\n") + + +# detect if two paths send money to different people +def detect_money_concurrency(): + global results + global source_map + + n = len(money_flow_all_paths) + for i in range(n): + log.debug("Path " + str(i) + ": " + str(money_flow_all_paths[i])) + log.debug(all_gs[i]) + i = 0 + false_positive = [] + concurrency_paths = [] + pcs = [] + for flow in money_flow_all_paths: + i += 1 + if len(flow) == 1: + continue # pass all flows which do not do anything with money + for j in range(i, n): + jflow = money_flow_all_paths[j] + if len(jflow) == 1: + continue + if is_diff(flow, jflow): + pcs = global_problematic_pcs["money_concurrency_bug"][j] + concurrency_paths.append([i-1, j]) + if global_params.CHECK_CONCURRENCY_FP and \ + is_false_positive(i-1, j, all_gs, path_conditions) and \ + is_false_positive(j, i-1, all_gs, path_conditions): + false_positive.append([i-1, j]) + + # if PRINT_MODE: print "All false positive cases: ", false_positive + log.debug("Concurrency in paths: ") + if source_map: + pcs = [pc for pc in pcs if source_map.find_source_code(pc)] + pcs = source_map.reduce_same_position_pcs(pcs) + s = source_map.to_str(pcs, "Money concurrency bug") + results["concurrency"] = s + s = "\t Money concurrency bug: True" + s if s else "\t Money concurrency bug: False" + log.info(s) + else: + log.info("\t Money concurrency bug: %s", bool(pcs)) + + if global_params.REPORT_MODE: + rfile.write("number of path: " + str(n) + "\n") + # number of FP detected + rfile.write(str(len(false_positive)) + "\n") + rfile.write(str(false_positive) + "\n") + # number of total races + rfile.write(str(len(concurrency_paths)) + "\n") + # all the races + rfile.write(str(concurrency_paths) + "\n") + + +# Detect if there is data concurrency in two different flows. +# e.g. if a flow modifies a value stored in the storage address and +# the other one reads that value in its execution +def detect_data_concurrency(): + sload_flows = data_flow_all_paths[0] + sstore_flows = data_flow_all_paths[1] + concurrency_addr = [] + for sflow in sstore_flows: + for addr in sflow: + for lflow in sload_flows: + if addr in lflow: + if not addr in concurrency_addr: + concurrency_addr.append(addr) + break + log.debug("data concurrency in storage " + str(concurrency_addr)) + +# Detect if any change in a storage address will result in a different +# flow of money. Currently I implement this detection by +# considering if a path condition contains +# a variable which is a storage address. +def detect_data_money_concurrency(): + n = len(money_flow_all_paths) + sstore_flows = data_flow_all_paths[1] + concurrency_addr = [] + for i in range(n): + cond = path_conditions[i] + list_vars = [] + for expr in cond: + list_vars += get_vars(expr) + set_vars = set(i.decl().name() for i in list_vars) + for sflow in sstore_flows: + for addr in sflow: + var_name = gen.gen_owner_store_var(addr) + if var_name in set_vars: + concurrency_addr.append(var_name) + log.debug("Concurrency in data that affects money flow: " + str(set(concurrency_addr))) + + + def check_callstack_attack(disasm): problematic_instructions = ['CALL', 'CALLCODE'] pcs = [] @@ -2069,16 +1976,141 @@ def run_callstack_attack(): instr_pattern = r"([\d]+) ([A-Z]+)([\d]+)?(?: => 0x)?(\S+)?" instr = re.findall(instr_pattern, disasm_data) pcs = check_callstack_attack(instr) - result = True if pcs else False - if not isTesting(): - s = "" - if source_map != None: - pcs = [pc for pc in pcs if source_map.find_source_code(pc)] - pcs = source_map.reduce_same_position_pcs(pcs) - s = source_map.to_str(pcs, "Callstack bug") + if source_map: + pcs = [pc for pc in pcs if source_map.find_source_code(pc)] + pcs = source_map.reduce_same_position_pcs(pcs) + s = source_map.to_str(pcs, "Callstack bug") results["callstack"] = s s = "\t Callstack bug: \t True" + s if s else "\t Callstack bug: \t False" log.info(s) + else: + log.info("\t Callstack bug: \t %s", bool(pcs)) + +def detect_reentrancy(): + reentrancy_bug_found = any([v for sublist in reentrancy_all_paths for v in sublist]) + if source_map: + pcs = global_problematic_pcs["reentrancy_bug"] + pcs = [pc for pc in pcs if source_map.find_source_code(pc)] + pcs = source_map.reduce_same_position_pcs(pcs) + s = source_map.to_str(pcs, "Reentrancy bug") + results["reentrancy"] = s + s = "\t Reentrancy bug: \t True" + s if s else "\t Reentrancy bug: \t False" + log.info(s) + else: + log.info("\t Reentrancy bug: \t %s", reentrancy_bug_found) + +def detect_assertion_failure(): + pcs = [pc for pc in global_problematic_pcs["assertion_failure"] if "assert" in source_map.find_source_code(pc)] + pcs = source_map.reduce_same_position_pcs(pcs) + + s = source_map.to_str(pcs, "Assertion failure") + results["assertion_failure"] = s + s = "\t Assertion failure: \t True" + s if s else "\t Assertion failure: \t False" + log.info(s) + +def detect_bugs(): + if isTesting(): + return + + global results + global source_map + global visited_pcs + global global_problematic_pcs + + if instructions: + evm_code_coverage = float(len(visited_pcs)) / len(instructions.keys()) * 100 + log.info("\t EVM code coverage: \t %s%%", round(evm_code_coverage, 1)) + results["evm_code_coverage"] = str(round(evm_code_coverage, 1)) + + log.debug("Checking for Callstack attack...") + run_callstack_attack() + + if global_params.REPORT_MODE: + rfile.write(str(total_no_of_paths) + "\n") + + detect_money_concurrency() + detect_time_dependency() + + stop = time.time() + if global_params.REPORT_MODE: + rfile.write(str(stop-start)) + rfile.close() + if global_params.DATA_FLOW: + detect_data_concurrency() + detect_data_money_concurrency() + + log.debug("Results for Reentrancy Bug: " + str(reentrancy_all_paths)) + detect_reentrancy() + + if global_params.CHECK_ASSERTIONS: + if source_map: + detect_assertion_failure() + else: + raise("Assertion checks need a Source Map") + else: + log.info("\t EVM code coverage: \t 0/0") + results["evm_code_coverage"] = "0/0" + + if global_params.WEB: + results_for_web() + +def closing_message(): + log.info("\t====== Analysis Completed ======") + if global_params.STORE_RESULT: + result_file = c_name + '.json' + with open(result_file, 'w') as of: + of.write(json.dumps(results, indent=1)) + log.info("Wrote results to %s.", result_file) + +def handler(signum, frame): + if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: + exit(TIME_OUT) + detect_bugs() + raise Exception("timeout") + +def results_for_web(): + global results + results["cname"] = source_map.get_cname().split(":")[1] + print "======= results =======" + print json.dumps(results) + +def main(contract, contract_sol, _source_map = None): + global c_name + global c_name_sol + global source_map + + c_name = contract + c_name_sol = contract_sol + source_map = _source_map + + check_unit_test_file() + initGlobalVars() + set_cur_file(c_name[4:] if len(c_name) > 5 else c_name) + start = time.time() + signal.signal(signal.SIGALRM, handler) + if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: + global_params.GLOBAL_TIMEOUT = global_params.GLOBAL_TIMEOUT_TEST + signal.alarm(global_params.GLOBAL_TIMEOUT) + atexit.register(closing_message) + + log.info("Running, please wait...") + + if not isTesting(): + log.info("\t============ Results ===========") + + try: + build_cfg_and_analyze() + log.debug("Done Symbolic execution") + except Exception as e: + if global_params.UNIT_TEST == 2 or global_params.UNIT_TEST == 3: + log.exception(e) + exit(EXCEPTION) + traceback.print_exc() + raise e + signal.alarm(0) + + detect_bugs() + if __name__ == '__main__': main(sys.argv[1]) diff --git a/web/app/assets/javascripts/src/app/oyente-analyzer.js b/web/app/assets/javascripts/src/app/oyente-analyzer.js index 211b09b6..00c1b600 100644 --- a/web/app/assets/javascripts/src/app/oyente-analyzer.js +++ b/web/app/assets/javascripts/src/app/oyente-analyzer.js @@ -74,26 +74,40 @@ function Analyzer () {