Skip to content

Commit

Permalink
rebased, handled duplicates, changed table column names
Browse files Browse the repository at this point in the history
Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
  • Loading branch information
shashank-boyapally committed May 20, 2024
1 parent 12374e8 commit 354e0ca
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 28 deletions.
2 changes: 2 additions & 0 deletions examples/small-scale-cluster-density.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
tests :
- name : aws-small-scale-cluster-density-v2
index: ospst-perf-scale-ci-*
benchmarkIndex: ospst-ripsaw-kube-burner*
metadata:
platform: AWS
masterNodesType: m6a.xlarge
Expand Down
48 changes: 29 additions & 19 deletions orion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@

# pylint: disable = import-error
import sys
import warnings
from functools import reduce
import logging
import os
import re
import pyshorteners

import click
import pandas as pd

from fmatch.matcher import Matcher
from utils import orion_funcs

warnings.filterwarnings("ignore", message="Unverified HTTPS request.*")

@click.group()
# pylint: disable=unused-argument
Expand All @@ -24,16 +27,14 @@ def cli(max_content_width=120):
"""


# pylint: disable=too-many-locals
# pylint: disable=too-many-locals, too-many-statements
@click.command()
@click.option("--uuid", default="", help="UUID to use as base for comparisons")
@click.option("--baseline", default="", help="Baseline UUID(s) to to compare against uuid")
@click.option("--config", default="config.yaml", help="Path to the configuration file")
@click.option(
"--output-path", default="output.csv", help="Path to save the output csv file"
)
@click.option("--output-path", default="output.csv", help="Path to save the output csv file")
@click.option("--debug", is_flag=True, help="log level ")
@click.option("--hunter-analyze", is_flag=True, help="run hunter analyze")
@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze")
@click.option(
"-o",
"--output",
Expand Down Expand Up @@ -68,54 +69,63 @@ def orion(**kwargs):
else:
logger.error("ES_SERVER environment variable/config variable not set")
sys.exit(1)

shortener = pyshorteners.Shortener()
for test in data["tests"]:
benchmarkIndex=test['benchmarkIndex']
uuid = kwargs["uuid"]
baseline = kwargs["baseline"]
match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL)
fingerprint_index = test["index"]
match = Matcher(index=fingerprint_index,
level=level, ES_URL=ES_URL, verify_certs=False)
if uuid == "":
metadata = orion_funcs.get_metadata(test, logger)
else:
metadata = orion_funcs.filter_metadata(uuid,match,logger)

logger.info("The test %s has started", test["name"])
if baseline == "":
uuids = match.get_uuid_by_metadata(metadata)
runs = match.get_uuid_by_metadata(metadata)
uuids = [run["uuid"] for run in runs]
buildUrls = {run["uuid"]: run["buildUrl"] for run in runs}
if len(uuids) == 0:
logging.info("No UUID present for given metadata")
sys.exit()
else:
uuids = [uuid for uuid in re.split(' |,',baseline) if uuid]
uuids.append(uuid)
if metadata["benchmark.keyword"] == "k8s-netperf" :
index = "k8s-netperf"
ids = uuids
elif metadata["benchmark.keyword"] == "ingress-perf":
index = "ingress-performance"
buildUrls = orion_funcs.get_build_urls(fingerprint_index, uuids,match)

fingerprint_index=benchmarkIndex
if metadata["benchmark.keyword"] in ["ingress-perf","k8s-netperf"] :
ids = uuids
else:
index = "ripsaw-kube-burner"
if baseline == "":
runs = match.match_kube_burner(uuids)
runs = match.match_kube_burner(uuids, fingerprint_index)
ids = match.filter_runs(runs, runs)
else:
ids = uuids

metrics = test["metrics"]
dataframe_list = orion_funcs.get_metric_data(ids, index, metrics, match, logger)
dataframe_list = orion_funcs.get_metric_data(ids, fingerprint_index, metrics, match, logger)

for i, df in enumerate(dataframe_list):
if i != 0 and ('timestamp' in df.columns):
dataframe_list[i] = df.drop(columns=['timestamp'])

merged_df = reduce(
lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
dataframe_list,
)

csv_name = kwargs["output"].split(".")[0]+"-"+test['name']+".csv"
shortener = pyshorteners.Shortener()
merged_df["buildUrl"] = merged_df["uuid"].apply(
lambda uuid: shortener.tinyurl.short(buildUrls[uuid])) #pylint: disable = cell-var-from-loop
csv_name = kwargs["output_path"].split(".")[0]+"-"+test['name']+".csv"
match.save_results(
merged_df, csv_file_path=csv_name
)

if kwargs["hunter_analyze"]:
orion_funcs.run_hunter_analyze(merged_df,test, kwargs["output"])
orion_funcs.run_hunter_analyze(merged_df,test,kwargs["output"])


if __name__ == "__main__":
Expand Down
37 changes: 28 additions & 9 deletions utils/orion_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ def run_hunter_analyze(merged_df, test, output):
metrics = {
column: Metric(1, 1.0)
for column in merged_df.columns
if column not in ["uuid", "timestamp"]
if column not in ["uuid","timestamp","buildUrl"]
}
data = {
column: merged_df[column]
for column in merged_df.columns
if column not in ["uuid", "timestamp"]
}
attributes = {
column: merged_df[column] for column in merged_df.columns if column in ["uuid"]
if column not in ["uuid","timestamp","buildUrl"]
}
attributes={column: merged_df[column]
for column in merged_df.columns if column in ["uuid","buildUrl"]}
series = Series(
test_name=test["name"],
branch=None,
Expand Down Expand Up @@ -74,7 +73,7 @@ def parse_json_output(merged_df, change_points_by_metric):
for index, entry in enumerate(df_json):
entry["metrics"] = {
key: {"value": entry.pop(key), "percentage_change": 0}
for key in entry.keys() - {"uuid", "timestamp"}
for key in entry.keys() - {"uuid", "timestamp", "buildUrl"}
}
entry["is_changepoint"] = False

Expand Down Expand Up @@ -117,8 +116,9 @@ def get_metric_data(ids, index, metrics, match, logger):
agg_value = metric["agg"]["value"]
agg_type = metric["agg"]["agg_type"]
agg_name = agg_value + "_" + agg_type
cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_name})
cpu_df = match.convert_to_df(cpu, columns=["uuid", "timestamp", agg_name])
cpu_df= cpu_df.drop_duplicates(subset=['uuid'],keep='first')
cpu_df = cpu_df.rename(columns={agg_name: metric_name + "_" + agg_type})
dataframe_list.append(cpu_df)
logger.debug(cpu_df)

Expand All @@ -134,6 +134,9 @@ def get_metric_data(ids, index, metrics, match, logger):
podl_df = match.convert_to_df(
podl, columns=["uuid", "timestamp", metric_of_interest]
)
podl_df= podl_df.drop_duplicates(subset=['uuid'],keep='first')
podl_df = podl_df.rename(columns={metric_of_interest:
metric_name + "_" + metric_of_interest})
dataframe_list.append(podl_df)
logger.debug(podl_df)
except Exception as e: # pylint: disable=broad-exception-caught
Expand All @@ -159,6 +162,22 @@ def get_metadata(test, logger):
logger.debug("metadata" + str(metadata))
return metadata

def get_build_urls(index, uuids,match):
"""Gets metadata of the run from each test
to get the build url
Args:
uuids (list): str list of uuid to find build urls of
match: the fmatch instance
Returns:
dict: dictionary of the metadata
"""

test = match.getResults("",uuids,index,{})
buildUrls = {run["uuid"]: run["buildUrl"] for run in test}
return buildUrls

def filter_metadata(uuid,match,logger):
"""Gets metadata of the run from each test
Expand Down Expand Up @@ -220,7 +239,7 @@ def set_logging(level, logger):
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(level)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
"%(asctime)s [%(name)s:%(filename)s:%(lineno)d] %(levelname)s: %(message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
Expand Down

0 comments on commit 354e0ca

Please # to comment.