From 354e0ca9f6d3121245cea078907895be5adf735c Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Mon, 20 May 2024 17:11:49 -0400
Subject: [PATCH] rebased, handled duplicates, changed table column names

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 examples/small-scale-cluster-density.yaml |  2 +
 orion.py                                  | 48 ++++++++++++++---------
 utils/orion_funcs.py                      | 37 ++++++++++++-----
 3 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/examples/small-scale-cluster-density.yaml b/examples/small-scale-cluster-density.yaml
index 703f1b5..c2b13b3 100644
--- a/examples/small-scale-cluster-density.yaml
+++ b/examples/small-scale-cluster-density.yaml
@@ -1,5 +1,7 @@
 tests :
   - name : aws-small-scale-cluster-density-v2
+    index: ospst-perf-scale-ci-*
+    benchmarkIndex: ospst-ripsaw-kube-burner*
     metadata:
       platform: AWS
       masterNodesType: m6a.xlarge
diff --git a/orion.py b/orion.py
index 956cfe6..064046e 100644
--- a/orion.py
+++ b/orion.py
@@ -4,10 +4,12 @@
 
 # pylint: disable = import-error
 import sys
+import warnings
 from functools import reduce
 import logging
 import os
 import re
+import pyshorteners
 
 import click
 import pandas as pd
@@ -15,6 +17,7 @@
 from fmatch.matcher import Matcher
 from utils import orion_funcs
 
+warnings.filterwarnings("ignore", message="Unverified HTTPS request.*")
 
 @click.group()
 # pylint: disable=unused-argument
@@ -24,16 +27,14 @@ def cli(max_content_width=120):
     """
 
 
-# pylint: disable=too-many-locals
+# pylint: disable=too-many-locals, too-many-statements
 @click.command()
 @click.option("--uuid", default="", help="UUID to use as base for comparisons")
 @click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid")
 @click.option("--config", default="config.yaml", help="Path to the configuration file")
-@click.option(
-    "--output-path", default="output.csv", help="Path to save the output csv file"
-)
+@click.option("--output-path", default="output.csv", help="Path to save the output csv file")
 @click.option("--debug", is_flag=True, help="log level ")
-@click.option("--hunter-analyze", is_flag=True, help="run hunter analyze")
+@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze")
 @click.option(
     "-o",
     "--output",
@@ -68,11 +69,14 @@ def orion(**kwargs):
         else:
             logger.error("ES_SERVER environment variable/config variable not set")
             sys.exit(1)
-
+    shortener = pyshorteners.Shortener()
     for test in data["tests"]:
+        benchmarkIndex=test['benchmarkIndex']
         uuid = kwargs["uuid"]
         baseline = kwargs["baseline"]
-        match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL)
+        fingerprint_index = test["index"]
+        match = Matcher(index=fingerprint_index,
+                        level=level, ES_URL=ES_URL, verify_certs=False)
         if uuid == "":
             metadata = orion_funcs.get_metadata(test, logger)
         else:
@@ -80,42 +84,48 @@ def orion(**kwargs):
 
         logger.info("The test %s has started", test["name"])
         if baseline == "":
-            uuids = match.get_uuid_by_metadata(metadata)
+            runs = match.get_uuid_by_metadata(metadata)
+            uuids = [run["uuid"] for run in runs]
+            buildUrls = {run["uuid"]: run["buildUrl"] for run in runs}
             if len(uuids) == 0:
                 logging.info("No UUID present for given metadata")
                 sys.exit()
         else:
             uuids = [uuid for uuid in re.split(' |,',baseline) if uuid]
             uuids.append(uuid)
-        if metadata["benchmark.keyword"] == "k8s-netperf" :
-            index = "k8s-netperf"
-            ids = uuids
-        elif metadata["benchmark.keyword"] == "ingress-perf":
-            index = "ingress-performance"
+            buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match)
+
+        fingerprint_index=benchmarkIndex
+        if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] :
             ids = uuids
         else:
-            index = "ripsaw-kube-burner"
             if baseline == "":
-                runs = match.match_kube_burner(uuids)
+                runs = match.match_kube_burner(uuids, fingerprint_index)
                 ids = match.filter_runs(runs, runs)
             else:
                 ids = uuids
-
         metrics = test["metrics"]
-        dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger)
+        dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger)
+
+        for i, df in enumerate(dataframe_list):
+            if i != 0 and ('timestamp' in df.columns):
+                dataframe_list[i] = df.drop(columns=['timestamp'])
 
         merged_df = reduce(
             lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
             dataframe_list,
         )
 
-        csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv"
+        shortener = pyshorteners.Shortener()
+        merged_df["buildUrl"] = merged_df["uuid"].apply(
+            lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop
+        csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv"
         match.save_results(
             merged_df, csv_file_path=csv_name
         )
 
         if kwargs["hunter_analyze"]:
-            orion_funcs.run_hunter_analyze(merged_df,test, kwargs["output"])
+            orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"])
 
 
 if __name__ == "__main__":
diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py
index f074e9a..995a2ff 100644
--- a/utils/orion_funcs.py
+++ b/utils/orion_funcs.py
@@ -27,16 +27,15 @@ def run_hunter_analyze(merged_df, test, output):
     metrics = {
         column: Metric(1, 1.0)
         for column in merged_df.columns
-        if column not in ["uuid", "timestamp"]
+        if column not in ["uuid","timestamp","buildUrl"]
     }
     data = {
         column: merged_df[column]
         for column in merged_df.columns
-        if column not in ["uuid", "timestamp"]
-    }
-    attributes = {
-        column: merged_df[column] for column in merged_df.columns if column in ["uuid"]
+        if column not in ["uuid","timestamp","buildUrl"]
     }
+    attributes={column: merged_df[column]
+                for column in merged_df.columns if column in ["uuid","buildUrl"]}
     series = Series(
         test_name=test["name"],
         branch=None,
@@ -74,7 +73,7 @@ def parse_json_output(merged_df, change_points_by_metric):
     for index, entry in enumerate(df_json):
         entry["metrics"] = {
             key: {"value": entry.pop(key), "percentage_change": 0}
-            for key in entry.keys() - {"uuid", "timestamp"}
+            for key in entry.keys() - {"uuid", "timestamp", "buildUrl"}
         }
         entry["is_changepoint"] = False
 
@@ -117,8 +116,9 @@ def get_metric_data(ids, index, metrics, match, logger):
                 agg_value = metric["agg"]["value"]
                 agg_type = metric["agg"]["agg_type"]
                 agg_name = agg_value + "_" + agg_type
-                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
-                cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name})
+                cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name])
+                cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first')
+                cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type})
                 dataframe_list.append(cpu_df)
                 logger.debug(cpu_df)
 
@@ -134,6 +134,9 @@ def get_metric_data(ids, index, metrics, match, logger):
                 podl_df = match.convert_to_df(
                     podl, columns=["uuid", "timestamp", metric_of_interest]
                 )
+                podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first')
+                podl_df = podl_df.rename(columns={metric_of_interest:
+                                                    metric_name + "_" + metric_of_interest})
                 dataframe_list.append(podl_df)
                 logger.debug(podl_df)
             except Exception as e:  # pylint: disable=broad-exception-caught
@@ -159,6 +162,22 @@ def get_metadata(test, logger):
     logger.debug("metadata" + str(metadata))
     return metadata
 
+def get_build_urls(index, uuids,match):
+    """Gets metadata of the run from each test 
+        to get the build url
+
+    Args:
+        uuids (list): str list of uuid to find build urls of
+        match: the fmatch instance
+        
+
+    Returns:
+        dict: dictionary of the metadata
+    """
+
+    test = match.getResults("",uuids,index,{})
+    buildUrls = {run["uuid"]: run["buildUrl"] for run in test}
+    return buildUrls
 
 def filter_metadata(uuid,match,logger):
     """Gets metadata of the run from each test
@@ -220,7 +239,7 @@ def set_logging(level, logger):
     handler = logging.StreamHandler(sys.stdout)
     handler.setLevel(level)
     formatter = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        "%(asctime)s [%(name)s:%(filename)s:%(lineno)d] %(levelname)s: %(message)s"
     )
     handler.setFormatter(formatter)
     logger.addHandler(handler)