From 354e0ca9f6d3121245cea078907895be5adf735c Mon Sep 17 00:00:00 2001 From: Shashank Reddy Boyapally Date: Mon, 20 May 2024 17:11:49 -0400 Subject: [PATCH] rebased, handled duplicates, changed table column names Signed-off-by: Shashank Reddy Boyapally --- examples/small-scale-cluster-density.yaml | 2 + orion.py | 48 ++++++++++++++--------- utils/orion_funcs.py | 37 ++++++++++++----- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/examples/small-scale-cluster-density.yaml b/examples/small-scale-cluster-density.yaml index 703f1b5..c2b13b3 100644 --- a/examples/small-scale-cluster-density.yaml +++ b/examples/small-scale-cluster-density.yaml @@ -1,5 +1,7 @@ tests : - name : aws-small-scale-cluster-density-v2 + index: ospst-perf-scale-ci-* + benchmarkIndex: ospst-ripsaw-kube-burner* metadata: platform: AWS masterNodesType: m6a.xlarge diff --git a/orion.py b/orion.py index 956cfe6..064046e 100644 --- a/orion.py +++ b/orion.py @@ -4,10 +4,12 @@ # pylint: disable = import-error import sys +import warnings from functools import reduce import logging import os import re +import pyshorteners import click import pandas as pd @@ -15,6 +17,7 @@ from fmatch.matcher import Matcher from utils import orion_funcs +warnings.filterwarnings("ignore", message="Unverified HTTPS request.*") @click.group() # pylint: disable=unused-argument @@ -24,16 +27,14 @@ def cli(max_content_width=120): """ -# pylint: disable=too-many-locals +# pylint: disable=too-many-locals, too-many-statements @click.command() @click.option("--uuid", default="", help="UUID to use as base for comparisons") @click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid") @click.option("--config", default="config.yaml", help="Path to the configuration file") -@click.option( - "--output-path", default="output.csv", help="Path to save the output csv file" -) +@click.option("--output-path", default="output.csv", help="Path to save the output csv file") @click.option("--debug", is_flag=True, help="log level ") -@click.option("--hunter-analyze", is_flag=True, help="run hunter analyze") +@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze") @click.option( "-o", "--output", @@ -68,11 +69,14 @@ def orion(**kwargs): else: logger.error("ES_SERVER environment variable/config variable not set") sys.exit(1) - + shortener = pyshorteners.Shortener() for test in data["tests"]: + benchmarkIndex=test['benchmarkIndex'] uuid = kwargs["uuid"] baseline = kwargs["baseline"] - match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL) + fingerprint_index = test["index"] + match = Matcher(index=fingerprint_index, + level=level, ES_URL=ES_URL, verify_certs=False) if uuid == "": metadata = orion_funcs.get_metadata(test, logger) else: @@ -80,42 +84,48 @@ def orion(**kwargs): logger.info("The test %s has started", test["name"]) if baseline == "": - uuids = match.get_uuid_by_metadata(metadata) + runs = match.get_uuid_by_metadata(metadata) + uuids = [run["uuid"] for run in runs] + buildUrls = {run["uuid"]: run["buildUrl"] for run in runs} if len(uuids) == 0: logging.info("No UUID present for given metadata") sys.exit() else: uuids = [uuid for uuid in re.split(' |,',baseline) if uuid] uuids.append(uuid) - if metadata["benchmark.keyword"] == "k8s-netperf" : - index = "k8s-netperf" - ids = uuids - elif metadata["benchmark.keyword"] == "ingress-perf": - index = "ingress-performance" + buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match) + + fingerprint_index=benchmarkIndex + if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] : ids = uuids else: - index = "ripsaw-kube-burner" if baseline == "": - runs = match.match_kube_burner(uuids) + runs = match.match_kube_burner(uuids, fingerprint_index) ids = match.filter_runs(runs, runs) else: ids = uuids - metrics = test["metrics"] - dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger) + dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger) + + for i, df in enumerate(dataframe_list): + if i != 0 and ('timestamp' in df.columns): + dataframe_list[i] = df.drop(columns=['timestamp']) merged_df = reduce( lambda left, right: pd.merge(left, right, on="uuid", how="inner"), dataframe_list, ) - csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv" + shortener = pyshorteners.Shortener() + merged_df["buildUrl"] = merged_df["uuid"].apply( + lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop + csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv" match.save_results( merged_df, csv_file_path=csv_name ) if kwargs["hunter_analyze"]: - orion_funcs.run_hunter_analyze(merged_df,test, kwargs["output"]) + orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"]) if __name__ == "__main__": diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py index f074e9a..995a2ff 100644 --- a/utils/orion_funcs.py +++ b/utils/orion_funcs.py @@ -27,16 +27,15 @@ def run_hunter_analyze(merged_df, test, output): metrics = { column: Metric(1, 1.0) for column in merged_df.columns - if column not in ["uuid", "timestamp"] + if column not in ["uuid","timestamp","buildUrl"] } data = { column: merged_df[column] for column in merged_df.columns - if column not in ["uuid", "timestamp"] - } - attributes = { - column: merged_df[column] for column in merged_df.columns if column in ["uuid"] + if column not in ["uuid","timestamp","buildUrl"] } + attributes={column: merged_df[column] + for column in merged_df.columns if column in ["uuid","buildUrl"]} series = Series( test_name=test["name"], branch=None, @@ -74,7 +73,7 @@ def parse_json_output(merged_df, change_points_by_metric): for index, entry in enumerate(df_json): entry["metrics"] = { key: {"value": entry.pop(key), "percentage_change": 0} - for key in entry.keys() - {"uuid", "timestamp"} + for key in entry.keys() - {"uuid", "timestamp", "buildUrl"} } entry["is_changepoint"] = False @@ -117,8 +116,9 @@ def get_metric_data(ids, index, metrics, match, logger): agg_value = metric["agg"]["value"] agg_type = metric["agg"]["agg_type"] agg_name = agg_value + "_" + agg_type - cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name]) - cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name}) + cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name]) + cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first') + cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type}) dataframe_list.append(cpu_df) logger.debug(cpu_df) @@ -134,6 +134,9 @@ def get_metric_data(ids, index, metrics, match, logger): podl_df = match.convert_to_df( podl, columns=["uuid", "timestamp", metric_of_interest] ) + podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first') + podl_df = podl_df.rename(columns={metric_of_interest: + metric_name + "_" + metric_of_interest}) dataframe_list.append(podl_df) logger.debug(podl_df) except Exception as e: # pylint: disable=broad-exception-caught @@ -159,6 +162,22 @@ def get_metadata(test, logger): logger.debug("metadata" + str(metadata)) return metadata +def get_build_urls(index, uuids,match): + """Gets metadata of the run from each test + to get the build url + + Args: + uuids (list): str list of uuid to find build urls of + match: the fmatch instance + + + Returns: + dict: dictionary of the metadata + """ + + test = match.getResults("",uuids,index,{}) + buildUrls = {run["uuid"]: run["buildUrl"] for run in test} + return buildUrls def filter_metadata(uuid,match,logger): """Gets metadata of the run from each test @@ -220,7 +239,7 @@ def set_logging(level, logger): handler = logging.StreamHandler(sys.stdout) handler.setLevel(level) formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + "%(asctime)s [%(name)s:%(filename)s:%(lineno)d] %(levelname)s: %(message)s" ) handler.setFormatter(formatter) logger.addHandler(handler)