diff --git a/ads/opctl/operator/lowcode/common/utils.py b/ads/opctl/operator/lowcode/common/utils.py index 8fb3f5a23..3578b7df3 100644 --- a/ads/opctl/operator/lowcode/common/utils.py +++ b/ads/opctl/operator/lowcode/common/utils.py @@ -11,6 +11,7 @@ import tempfile from typing import List, Union +import cloudpickle import fsspec import oracledb import pandas as pd @@ -126,7 +127,26 @@ def load_data(data_spec, storage_options=None, **kwargs): return data +def _safe_write(fn, **kwargs): + try: + fn(**kwargs) + except Exception: + logger.warning(f'Failed to write file {kwargs.get("filename", "UNKNOWN")}') + + def write_data(data, filename, format, storage_options=None, index=False, **kwargs): + return _safe_write( + fn=_write_data, + data=data, + filename=filename, + format=format, + storage_options=storage_options, + index=index, + **kwargs, + ) + + +def _write_data(data, filename, format, storage_options=None, index=False, **kwargs): disable_print() if not format: _, format = os.path.splitext(filename) @@ -143,11 +163,24 @@ def write_data(data, filename, format, storage_options=None, index=False, **kwar def write_json(json_dict, filename, storage_options=None): + return _safe_write( + fn=_write_json, + json_dict=json_dict, + filename=filename, + storage_options=storage_options, + ) + + +def _write_json(json_dict, filename, storage_options=None): with fsspec.open(filename, mode="w", **storage_options) as f: f.write(json.dumps(json_dict)) def write_simple_json(data, path): + return _safe_write(fn=_write_simple_json, data=data, path=path) + + +def _write_simple_json(data, path): if ObjectStorageDetails.is_oci_path(path): storage_options = default_signer() else: @@ -156,6 +189,60 @@ def write_simple_json(data, path): json.dump(data, f, indent=4) +def write_file(local_filename, remote_filename, storage_options, **kwargs): + return _safe_write( + fn=_write_file, + local_filename=local_filename, + remote_filename=remote_filename, + storage_options=storage_options, + **kwargs, + ) + + +def _write_file(local_filename, remote_filename, storage_options, **kwargs): + with open(local_filename) as f1: + with fsspec.open( + remote_filename, + "w", + **storage_options, + ) as f2: + f2.write(f1.read()) + + +def load_pkl(filepath): + return _safe_write(fn=_load_pkl, filepath=filepath) + + +def _load_pkl(filepath): + storage_options = {} + if ObjectStorageDetails.is_oci_path(filepath): + storage_options = default_signer() + + with fsspec.open(filepath, "rb", **storage_options) as f: + return cloudpickle.load(f) + return None + + +def write_pkl(obj, filename, output_dir, storage_options): + return _safe_write( + fn=_write_pkl, + obj=obj, + filename=filename, + output_dir=output_dir, + storage_options=storage_options, + ) + + +def _write_pkl(obj, filename, output_dir, storage_options): + pkl_path = os.path.join(output_dir, filename) + with fsspec.open( + pkl_path, + "wb", + **storage_options, + ) as f: + cloudpickle.dump(obj, f) + + def merge_category_columns(data, target_category_columns): result = data.apply( lambda x: "__".join([str(x[col]) for col in target_category_columns]), axis=1 @@ -290,4 +377,8 @@ def disable_print(): # Restore def enable_print(): + try: + sys.stdout.close() + except Exception: + pass sys.stdout = sys.__stdout__ diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py index c3e37ffe0..0517e1b2a 100644 --- a/ads/opctl/operator/lowcode/forecast/model/automlx.py +++ b/ads/opctl/operator/lowcode/forecast/model/automlx.py @@ -38,6 +38,7 @@ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets): super().__init__(config, datasets) self.global_explanation = {} self.local_explanation = {} + self.explainability_kwargs = {} def set_kwargs(self): model_kwargs_cleaned = self.spec.model_kwargs @@ -54,6 +55,9 @@ def set_kwargs(self): self.spec.preprocessing.enabled or model_kwargs_cleaned.get("preprocessing", True) ) + sample_ratio = model_kwargs_cleaned.pop("sample_to_feature_ratio", None) + if sample_ratio is not None: + self.explainability_kwargs = {"sample_to_feature_ratio": sample_ratio} return model_kwargs_cleaned, time_budget def preprocess(self, data, series_id): # TODO: re-use self.le for explanations @@ -445,6 +449,7 @@ def explain_model(self): else None, pd.DataFrame(data_i[self.spec.target_column]), task="forecasting", + **self.explainability_kwargs, ) # Generate explanations for the forecast @@ -518,7 +523,9 @@ def get_validation_score_and_metric(self, model): model_params = model.selected_model_params_ if len(trials) > 0: score_col = [col for col in trials.columns if "Score" in col][0] - validation_score = trials[trials.Hyperparameters == model_params][score_col].iloc[0] + validation_score = trials[trials.Hyperparameters == model_params][ + score_col + ].iloc[0] else: validation_score = 0 return -1 * validation_score @@ -531,8 +538,12 @@ def generate_train_metrics(self) -> pd.DataFrame: for s_id in self.forecast_output.list_series_ids(): try: metrics = {self.spec.metric.upper(): self.models[s_id]["score"]} - metrics_df = pd.DataFrame.from_dict(metrics, orient="index", columns=[s_id]) - logger.warning("AutoMLX failed to generate training metrics. Recovering validation loss instead") + metrics_df = pd.DataFrame.from_dict( + metrics, orient="index", columns=[s_id] + ) + logger.warning( + "AutoMLX failed to generate training metrics. Recovering validation loss instead" + ) total_metrics = pd.concat([total_metrics, metrics_df], axis=1) except Exception as e: logger.debug( diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index e36efadf7..d5b225519 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -11,7 +11,6 @@ from abc import ABC, abstractmethod from typing import Tuple -import fsspec import numpy as np import pandas as pd import report_creator as rc @@ -25,10 +24,13 @@ disable_print, enable_print, human_time_friendly, + load_pkl, merged_category_column_name, seconds_to_datetime, write_data, + write_file, write_json, + write_pkl, ) from ads.opctl.operator.lowcode.forecast.utils import ( _build_metrics_df, @@ -38,8 +40,6 @@ evaluate_train_metrics, get_auto_select_plot, get_forecast_plots, - load_pkl, - write_pkl, ) from ..const import ( @@ -493,13 +493,11 @@ def _save_report( enable_print() report_path = os.path.join(unique_output_dir, self.spec.report_filename) - with open(report_local_path) as f1: - with fsspec.open( - report_path, - "w", - **storage_options, - ) as f2: - f2.write(f1.read()) + write_file( + local_filename=report_local_path, + remote_filename=report_path, + storage_options=storage_options, + ) # forecast csv report # todo: add test data into forecast.csv @@ -576,7 +574,9 @@ def _save_report( # Round to 4 decimal places before writing global_expl_rounded = self.formatted_global_explanation.copy() global_expl_rounded = global_expl_rounded.apply( - lambda col: np.round(col, 4) if np.issubdtype(col.dtype, np.number) else col + lambda col: np.round(col, 4) + if np.issubdtype(col.dtype, np.number) + else col ) if self.spec.generate_explanation_files: write_data( @@ -598,7 +598,9 @@ def _save_report( # Round to 4 decimal places before writing local_expl_rounded = self.formatted_local_explanation.copy() local_expl_rounded = local_expl_rounded.apply( - lambda col: np.round(col, 4) if np.issubdtype(col.dtype, np.number) else col + lambda col: np.round(col, 4) + if np.issubdtype(col.dtype, np.number) + else col ) if self.spec.generate_explanation_files: write_data( diff --git a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py index 91a533b17..f3a7a33f6 100644 --- a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py @@ -19,12 +19,10 @@ from ads.opctl.operator.lowcode.common.utils import ( disable_print, enable_print, -) -from ads.opctl.operator.lowcode.forecast.utils import ( - _select_plot_list, load_pkl, write_pkl, ) +from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list from ..const import DEFAULT_TRIALS, SupportedModels from ..operator_config import ForecastOperatorConfig @@ -159,20 +157,18 @@ def _train_model(self, i, s_id, df, model_kwargs): upper_bound=self.get_horizon(forecast[upper_bound_col_name]).values, lower_bound=self.get_horizon(forecast[lower_bound_col_name]).values, ) - core_columns = set(forecast.columns) - set( - [ - "y", - "yhat1", - upper_bound_col_name, - lower_bound_col_name, - "future_regressors_additive", - "future_regressors_multiplicative", - ] - ) + core_columns = set(forecast.columns) - { + "y", + "yhat1", + upper_bound_col_name, + lower_bound_col_name, + "future_regressors_additive", + "future_regressors_multiplicative", + } exog_variables = set( filter(lambda x: x.startswith("future_regressor_"), list(core_columns)) ) - combine_terms = list(core_columns - exog_variables - set(["ds"])) + combine_terms = list(core_columns - exog_variables - {"ds"}) temp_df = ( forecast[list(core_columns)] .rename({"ds": "Date"}, axis=1) diff --git a/ads/opctl/operator/lowcode/forecast/utils.py b/ads/opctl/operator/lowcode/forecast/utils.py index deb90099f..8db98cc98 100644 --- a/ads/opctl/operator/lowcode/forecast/utils.py +++ b/ads/opctl/operator/lowcode/forecast/utils.py @@ -1,14 +1,12 @@ #!/usr/bin/env python -# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Copyright (c) 2023, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging import os from typing import Set -import cloudpickle -import fsspec import numpy as np import pandas as pd import report_creator as rc @@ -21,7 +19,6 @@ r2_score, ) -from ads.common.object_storage_details import ObjectStorageDetails from ads.dataset.label_encoder import DataFrameLabelEncoder from ads.opctl import logger from ads.opctl.operator.lowcode.forecast.const import ForecastOutputColumns @@ -170,26 +167,6 @@ def _build_metrics_per_horizon( return metrics_df -def load_pkl(filepath): - storage_options = {} - if ObjectStorageDetails.is_oci_path(filepath): - storage_options = default_signer() - - with fsspec.open(filepath, "rb", **storage_options) as f: - return cloudpickle.load(f) - return None - - -def write_pkl(obj, filename, output_dir, storage_options): - pkl_path = os.path.join(output_dir, filename) - with fsspec.open( - pkl_path, - "wb", - **storage_options, - ) as f: - cloudpickle.dump(obj, f) - - def _build_metrics_df(y_true, y_pred, series_id): if len(y_true) == 0 or len(y_pred) == 0: return pd.DataFrame() @@ -251,7 +228,10 @@ def evaluate_train_metrics(output): def _select_plot_list(fn, series_ids, target_category_column): - blocks = [rc.Widget(fn(s_id=s_id), label=s_id if target_category_column else None) for s_id in series_ids] + blocks = [ + rc.Widget(fn(s_id=s_id), label=s_id if target_category_column else None) + for s_id in series_ids + ] return rc.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] @@ -264,8 +244,10 @@ def get_auto_select_plot(backtest_results): back_test_csv_columns = backtest_results.columns.tolist() back_test_column = "backtest" metric_column = "metric" - models = [x for x in back_test_csv_columns if x not in [back_test_column, metric_column]] - for i, column in enumerate(models): + models = [ + x for x in back_test_csv_columns if x not in [back_test_column, metric_column] + ] + for column in models: fig.add_trace( go.Scatter( x=backtest_results[back_test_column], @@ -283,7 +265,7 @@ def get_forecast_plots( horizon, test_data=None, ci_interval_width=0.95, - target_category_column=None + target_category_column=None, ): def plot_forecast_plotly(s_id): fig = go.Figure() @@ -380,7 +362,9 @@ def plot_forecast_plotly(s_id): ) return fig - return _select_plot_list(plot_forecast_plotly, forecast_output.list_series_ids(), target_category_column) + return _select_plot_list( + plot_forecast_plotly, forecast_output.list_series_ids(), target_category_column + ) def convert_target(target: str, target_col: str): diff --git a/pyproject.toml b/pyproject.toml index f76e206cf..914d8b3ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,14 +21,15 @@ build-backend = "flit_core.buildapi" # Required name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below -version = "2.13.8" +version = "2.13.9rc1" # Optional description = "Oracle Accelerated Data Science SDK" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.8" -license = { file = "LICENSE.txt" } authors = [{ name = "Oracle Data Science" }] +license = "UPL-1.0" +license-files = ["LICENSE.txt"] keywords = [ "Oracle Cloud Infrastructure", "OCI", @@ -39,11 +40,15 @@ keywords = [ "Data Science", "Cloud", "Oracle", + "GenAI", + "Generative AI", + "Forecast", + "Anomaly", + "Document Understanding", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", - "License :: OSI Approved :: Universal Permissive License (UPL)", "Operating System :: OS Independent", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/setup.py b/setup.py index c16a79ded..ffbbb4f12 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8; -*- -# Copyright (c) 2020, 2022 Oracle and/or its affiliates. +# Copyright (c) 2020, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ ### File setup.py obsolete and must not be used. Please update pyproject.toml instead. diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py index dd13356a4..a7dae481c 100644 --- a/tests/operators/forecast/test_errors.py +++ b/tests/operators/forecast/test_errors.py @@ -947,6 +947,38 @@ def test_prophet_floor_cap(operator_setup, model): ), "`max` not obeyed in prophet" +def _check_results_obj(results): + assert not results.get_forecast().empty + assert not results.get_metrics().empty + assert not results.get_global_explanations().empty + assert not results.get_local_explanations().empty + + +def _check_no_skippable_files(yaml_i, check_report=True): + files = os.listdir(yaml_i["spec"]["output_directory"]["url"]) + + if "errors.json" in files: + with open( + os.path.join(yaml_i["spec"]["output_directory"]["url"], "errors.json") + ) as f: + assert False, f"Failed due to errors.json being created: {f.read()}" + if check_report: + assert "report.html" in files, "Failed to generate report" + + assert ( + "forecast.csv" not in files + ), "Generated forecast file, but `generate_forecast_file` was set False" + assert ( + "metrics.csv" not in files + ), "Generated metrics file, but `generate_metrics_file` was set False" + assert ( + "local_explanations.csv" not in files + ), "Generated metrics file, but `generate_explanation_files` was set False" + assert ( + "global_explanations.csv" not in files + ), "Generated metrics file, but `generate_explanation_files` was set False" + + @pytest.mark.parametrize("model", ["prophet"]) def test_generate_files(operator_setup, model): yaml_i = TEMPLATE_YAML.copy() @@ -969,35 +1001,15 @@ def test_generate_files(operator_setup, model): yaml_i["spec"]["additional_data"]["data"] = df_add operator_config = ForecastOperatorConfig.from_dict(yaml_i) results = operate(operator_config) - files = os.listdir(yaml_i["spec"]["output_directory"]["url"]) - if "errors.json" in files: - with open( - os.path.join(yaml_i["spec"]["output_directory"]["url"], "errors.json") - ) as f: - assert False, f"Failed due to errors.json being created: {f.read()}" - assert "report.html" in files, "Failed to generate report" - assert ( - "forecast.csv" not in files - ), "Generated forecast file, but `generate_forecast_file` was set False" - assert ( - "metrics.csv" not in files - ), "Generated metrics file, but `generate_metrics_file` was set False" - assert ( - "local_explanations.csv" not in files - ), "Generated metrics file, but `generate_explanation_files` was set False" - assert ( - "global_explanations.csv" not in files - ), "Generated metrics file, but `generate_explanation_files` was set False" - assert not results.get_forecast().empty - assert not results.get_metrics().empty - assert not results.get_global_explanations().empty - assert not results.get_local_explanations().empty + _check_results_obj(results) + _check_no_skippable_files(yaml_i) yaml_i["spec"].pop("generate_explanation_files") yaml_i["spec"].pop("generate_forecast_file") yaml_i["spec"].pop("generate_metrics_file") operator_config = ForecastOperatorConfig.from_dict(yaml_i) results = operate(operator_config) + _check_results_obj(results) files = os.listdir(yaml_i["spec"]["output_directory"]["url"]) if "errors.json" in files: with open( @@ -1011,6 +1023,12 @@ def test_generate_files(operator_setup, model): assert "local_explanation.csv" in files, "Failed to generated local expl file" assert "global_explanation.csv" in files, "Failed to generated global expl file" + # Test that the results object still generates when report.html has an error + yaml_i["spec"]["output_directory"]["url"] = "s3://test@test/test_dir" + operator_config = ForecastOperatorConfig.from_dict(yaml_i) + results = operate(operator_config) + _check_results_obj(results) + if __name__ == "__main__": pass