diff --git a/orion.py b/orion.py index e1dd655..d3d9c34 100644 --- a/orion.py +++ b/orion.py @@ -32,11 +32,9 @@ def cli(max_content_width=120): @click.option("--uuid", default="", help="UUID to use as base for comparisons") @click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") @click.option("--config", default="config.yaml", help="Path to the configuration file") -@click.option( - "--output-path", default="output.csv", help="Path to save the output csv file" -) +@click.option("--output-path", default="output.csv", help="Path to save the output csv file") @click.option("--debug", is_flag=True, help="log level ") -@click.option("--hunter-analyze", is_flag=True, help="run hunter analyze") +@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze") @click.option( "-o", "--output", @@ -76,8 +74,8 @@ def orion(**kwargs): benchmarkIndex=test['benchmarkIndex'] uuid = kwargs["uuid"] baseline = kwargs["baseline"] - index = "ospst-perf-scale-ci-*" - match = Matcher(index=index, + fingerprint_index = test["index"] + match = Matcher(index=fingerprint_index, level=level, ES_URL=ES_URL, verify_certs=False) if uuid == "": metadata = orion_funcs.get_metadata(test, logger) @@ -95,20 +93,23 @@ def orion(**kwargs): else: uuids = [uuid for uuid in re.split(' |,',baseline) if uuid] uuids.append(uuid) - buildUrls = orion_funcs.get_build_urls(index, uuids,match) + buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match) - index=benchmarkIndex + fingerprint_index=benchmarkIndex if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] : ids = uuids else: if baseline == "": - runs = match.match_kube_burner(uuids, index) + runs = match.match_kube_burner(uuids, fingerprint_index) ids = match.filter_runs(runs, runs) else: ids = uuids - metrics = test["metrics"] - dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger) + dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger) + + for i, df in enumerate(dataframe_list): + if i != 0 and ('timestamp' in df.columns): + dataframe_list[i] = df.drop(columns=['timestamp']) for i, df in enumerate(dataframe_list): if i != 0: @@ -122,13 +123,13 @@ def orion(**kwargs): shortener = pyshorteners.Shortener() merged_df["buildUrl"] = merged_df["uuid"].apply( lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop - csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv" + csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv" match.save_results( merged_df, csv_file_path=csv_name ) if kwargs["hunter_analyze"]: - orion_funcs.run_hunter_analyze(merged_df,test, kwargs["output"]) + orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"]) if __name__ == "__main__": diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py index 8c24352..57a2b19 100644 --- a/utils/orion_funcs.py +++ b/utils/orion_funcs.py @@ -24,15 +24,19 @@ def run_hunter_analyze(merged_df, test, output): """ merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"]) merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9 - metrics = {column: Metric(1, 1.0) - for column in merged_df.columns - if column not in ["uuid","timestamp","buildUrl"]} - data = {column: merged_df[column] - for column in merged_df.columns - if column not in ["uuid","timestamp","buildUrl"]} + metrics = { + column: Metric(1, 1.0) + for column in merged_df.columns + if column not in ["uuid","timestamp","buildUrl"] + } + data = { + column: merged_df[column] + for column in merged_df.columns + if column not in ["uuid","timestamp","buildUrl"] + } attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid","buildUrl"]} - series=Series( + series = Series( test_name=test["name"], branch=None, time=list(merged_df["timestamp"]), @@ -69,7 +73,7 @@ def parse_json_output(merged_df, change_points_by_metric): for index, entry in enumerate(df_json): entry["metrics"] = { key: {"value": entry.pop(key), "percentage_change": 0} - for key in entry.keys() - {"uuid", "timestamp"} + for key in entry.keys() - {"uuid", "timestamp", "buildUrl"} } entry["is_changepoint"] = False @@ -112,10 +116,9 @@ def get_metric_data(ids, index, metrics, match, logger): agg_value = metric["agg"]["value"] agg_type = metric["agg"]["agg_type"] agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid","timestamp", agg_name]) - cpu_df = cpu_df.rename( - columns={agg_name: metric_name+ "_" + agg_name} - ) + cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name]) + cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first') + cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type}) dataframe_list.append(cpu_df) logger.debug(cpu_df) @@ -131,6 +134,9 @@ def get_metric_data(ids, index, metrics, match, logger): podl_df = match.convert_to_df( podl, columns=["uuid", "timestamp", metric_of_interest] ) + podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first') + podl_df = podl_df.rename(columns={metric_of_interest: + metric_name + "_" + metric_of_interest}) dataframe_list.append(podl_df) logger.debug(podl_df) except Exception as e: # pylint: disable=broad-exception-caught @@ -156,6 +162,22 @@ def get_metadata(test, logger): logger.debug("metadata" + str(metadata)) return metadata +def get_build_urls(index, uuids,match): + """Gets metadata of the run from each test + to get the build url + + Args: + uuids (list): str list of uuid to find build urls of + match: the fmatch instance + + + Returns: + dict: dictionary of the metadata + """ + + test = match.getResults("",uuids,index,{}) + buildUrls = {run["uuid"]: run["buildUrl"] for run in test} + return buildUrls def get_build_urls(index, uuids,match): """Gets metadata of the run from each test