From 69901958920e77bd979355dfcc6654e50e67a3b6 Mon Sep 17 00:00:00 2001 From: Jordan Maynor Date: Thu, 26 Sep 2024 15:45:01 -0500 Subject: [PATCH] Added tenacity retrying to spotlight --- clope/spotlight/spotlight.py | 109 +++++++++++++++++++---------------- requirements.txt | 1 + tests/test.py | 29 ++++++---- 3 files changed, 77 insertions(+), 62 deletions(-) diff --git a/clope/spotlight/spotlight.py b/clope/spotlight/spotlight.py index 6e2986c..bb3c766 100644 --- a/clope/spotlight/spotlight.py +++ b/clope/spotlight/spotlight.py @@ -9,8 +9,21 @@ import pandas import requests +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) +@retry( + stop=stop_after_attempt(5), + wait=wait_exponential(multiplier=1, min=4, max=10), + retry=retry_if_exception_type( + (requests.exceptions.ConnectionError, requests.exceptions.Timeout) + ), +) def run_report( report_id: str, params: List[Tuple[str, str]] = None, dtype: dict = None ) -> pandas.DataFrame: @@ -19,41 +32,34 @@ def run_report( Uses Basic authentication with username and password. Returns a pandas dataframe of the report data. - Takes two optional parameters: - - params: List of tuples to pass as parameters in the GET request. Usually date ranges. - - dtype: Dictionary of column names and data types to cast columns to. + :param report_id: The ID of the report to run. + :param params: A list of tuples to pass as parameters in the GET request. Usually date ranges. + :param dtype: Dictionary of column names and data types to cast columns to. """ # Check for environment variables - # Required if "CLO_USERNAME" not in os.environ: raise Exception("CLO_USERNAME environment variable not set") - clo_username = os.environ["CLO_USERNAME"] if "CLO_PASSWORD" not in os.environ: raise Exception("CLO_PASSWORD environment variable not set") - clo_password = os.environ["CLO_PASSWORD"] - # Optional - if "CLO_BASE_URL" not in os.environ: - clo_base_url = "https://api.mycantaloupe.com" - else: - clo_base_url = os.environ["CLO_BASE_URL"] - if "CLO_ARCHIVE_FILES" not in os.environ: - clo_archive_files = False - else: - clo_archive_files = os.environ["CLO_ARCHIVE_FILES"].lower() == "true" if params is None: params = [] params.append(("ReportId", report_id)) - response = requests.get( - clo_base_url + "/Reports/Run", - auth=(clo_username, clo_password), - params=params, - ) - - if response.status_code != 200: - print("Error, could not run report: ", response.content) - raise Exception("Error, could not run report", response.content) + try: + response = requests.get( + os.environ.get("CLO_BASE_URL", "https://api.mycantaloupe.com") + + "/Reports/Run", + auth=(os.environ["CLO_USERNAME"], os.environ["CLO_PASSWORD"]), + params=params, + ) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + raise Exception( + f"Error, could not run report: {e.response.status_code} - {e.response.content}" + ) + except requests.exceptions.RequestException as e: + raise Exception(f"Error, could not run report: {e}") excel_data = response.content @@ -62,41 +68,42 @@ def run_report( with open(f"report{report_id}.xlsx", "wb") as f: f.write(excel_data) except Exception as e: - print("Error saving excel file: ", e) - exit(1) + raise Exception(f"Error saving excel file {e}") try: report_df = pandas.read_excel( f"report{report_id}.xlsx", sheet_name="Report", dtype=dtype ) except Exception as e: - print("Error reading excel file: ", e) - raise Exception("Error reading excel file", e) + raise Exception(f"Error reading excel file: {e}") # Delete temp excel file if len(report_df) > 0: - if clo_archive_files: - try: - new_dir = os.path.join( - os.getcwd(), "Archive", datetime.now().strftime("%Y-%m-%d") - ) - os.makedirs(new_dir, exist_ok=True) - shutil.move( - f"report{report_id}.xlsx", - os.path.join(new_dir, f"report{report_id}.xlsx"), - ) - except Exception as e: - print("Error moving excel file: ", e) - raise Exception("Error moving excel file", e) - else: - try: - os.remove(f"report{report_id}.xlsx") - except Exception as e: - print("Error deleting excel file: ", e) - raise Exception("Error deleting excel file", e) - - # Print message if no data returned - if len(report_df) == 0: - print("No data returned from report") + _handle_temp_file(report_id) return report_df + + +def _handle_temp_file(report_id: str): + """ + Helper function that gets called after the report is run. + """ + if os.environ.get("CLO_ARCHIVE_FILES", "false").lower() == "true": + try: + new_dir = os.path.join( + os.getcwd(), "Archive", datetime.now().strftime("%Y-%m-%d") + ) + os.makedirs(new_dir, exist_ok=True) + shutil.move( + f"report{report_id}.xlsx", + os.path.join(new_dir, f"report{report_id}.xlsx"), + ) + except Exception as e: + print("Error moving excel file: ", e) + raise Exception("Error moving excel file", e) + else: + try: + os.remove(f"report{report_id}.xlsx") + except Exception as e: + print("Error deleting excel file: ", e) + raise Exception("Error deleting excel file", e) diff --git a/requirements.txt b/requirements.txt index d41ea6a..acb98df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pandas pyarrow openpyxl snowflake-connector-python[pandas] +tenacity diff --git a/tests/test.py b/tests/test.py index 91ae1c2..3fc3d6b 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,19 +3,26 @@ sys.path.insert(0, "") -from clope import get_machine_alerts_fact +from clope import get_machine_alerts_fact, run_report from clope.snow.dates import date_to_datekey, datekey_to_date +from dotenv import load_dotenv -alerts = get_machine_alerts_fact( - effective_date_range=( - date_to_datekey(datetime.now() - timedelta(days=1)), - date_to_datekey(datetime.now()), - ), - added_date_range=( - datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - - timedelta(days=1), - datetime.now(), - ), +load_dotenv() + +# alerts = get_machine_alerts_fact( +# effective_date_range=( +# date_to_datekey(datetime.now() - timedelta(days=1)), +# date_to_datekey(datetime.now()), +# ), +# added_date_range=( +# datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) +# - timedelta(days=1), +# datetime.now(), +# ), +# ) + +df = run_report( + "24128", [("filter3", "2024-09-06T00:00:00Z"), ("filter3", "2024-09-06T23:59:59Z")] ) pass