Skip to content
This repository has been archived by the owner on Oct 7, 2023. It is now read-only.

Commit

Permalink
feature(Baseline): added all baseline models from fold (#42)
Browse files Browse the repository at this point in the history
* feature(Baseline): added all baseline models from `fold`

* Create test-baselines.yaml
  • Loading branch information
almostintuitive authored Mar 23, 2023
1 parent 2730f22 commit f1fac2d
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 6 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/test-baselines.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: test-baselines

on: push

jobs:

run-tests:
runs-on: ubuntu-latest

steps:
- name: checkout
uses: actions/checkout@v3

- name: setup-python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: install-dependencies
run: |
python -m pip install --upgrade pip
pip install ".[tests]"
- name: run-tests
run: pytest tests/test_baselines.py -s --durations 0
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<p align="center">
<a href="https://codecov.io/gh/dream-faster/fold-models" ><img src="https://codecov.io/gh/dream-faster/fold-models/branch/main/graph/badge.svg?token=Z7I2XSF188"/></a>
<a href="https://github.com/dream-faster/fold-models/actions/workflows/tests.yaml"><img alt="Tests" src="https://github.com/dream-faster/fold-models/actions/workflows/tests.yaml/badge.svg"/></a>
<a href="https://github.com/dream-faster/fold-models/actions/workflows/tests-statsforecast.yaml"><img alt="Tests" src="https://github.com/dream-faster/fold-models/actions/workflows/tests-statsforecast.yaml/badge.svg"/></a>
<a href="https://discord.gg/EKJQgfuBpE"><img alt="Discord Community" src="https://img.shields.io/badge/Discord-%235865F2.svg?logo=discord&logoColor=white"></a>
</p>

Expand All @@ -12,8 +12,8 @@
</a>
<h3 align="center"><b>FOLD-MODELS</b><br> <i>(/fold models/)</i></h3>
<p align="center">
<b>Wrappers for 3rd party time series and tabular ML models.
<br/> To be used with <a href='https://github.com/dream-faster/fold'>Fold. </a> </b><br>
<b>Baseline models, wrappers for 3rd party models.
<br/>To be used with <a href='https://github.com/dream-faster/fold'>Fold.</a> </b><br>
<br/>
<!-- <a href="https://dream-faster.github.io/fold-models/"><strong>Explore the docs »</strong></a> -->
</p>
Expand All @@ -24,7 +24,7 @@

| | | |
| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------------ | ---------------------------------------------------: |
| <img alt='Statsforecast Logo' src='https://raw.githubusercontent.com/Nixtla/neuralforecast/main/nbs/imgs_indx/logo_mid.png' width=160> | StatsForecast | [GitHub](https://github.com/Nixtla/statsforecast) |
| <img alt='XGBoost Logo' src='https://camo.githubusercontent.com/0ea6e7814dd771f740509bbb668d251d485a6e21f12e287be7cc2275e0eab1d1/68747470733a2f2f7867626f6f73742e61692f696d616765732f6c6f676f2f7867626f6f73742d6c6f676f2e737667' width=160> | XGBoost | [GitHub](https://github.com/dmlc/xgboost) |
| <img alt='Sktime Logo' src='https://github.com/sktime/sktime/raw/main/docs/source/images/sktime-logo.jpg?raw=true' width=160> | Sktime | [GitHub](https://github.com/sktime/sktime) |
| <img alt='Statsforecast Logo' src='https://raw.githubusercontent.com/Nixtla/neuralforecast/main/nbs/imgs_indx/logo_mid.png' height=64> | StatsForecast | [GitHub](https://github.com/Nixtla/statsforecast) |
| <img alt='XGBoost Logo' src='https://camo.githubusercontent.com/0ea6e7814dd771f740509bbb668d251d485a6e21f12e287be7cc2275e0eab1d1/68747470733a2f2f7867626f6f73742e61692f696d616765732f6c6f676f2f7867626f6f73742d6c6f676f2e737667' height=64> | XGBoost | [GitHub](https://github.com/dmlc/xgboost) |
| <img alt='Sktime Logo' src='https://github.com/sktime/sktime/raw/main/docs/source/images/sktime-logo.jpg?raw=true' height=64> | Sktime | [GitHub](https://github.com/sktime/sktime) |
| <img alt='Statsmodels Logo' src='https://github.com/statsmodels/statsmodels/raw/main/docs/source/images/statsmodels-logo-v2-horizontal.svg' width=160> | Statsmodels | [GitHub](https://github.com/statsmodels/statsmodels) |
86 changes: 86 additions & 0 deletions src/fold_models/baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from __future__ import annotations

from typing import Union

import pandas as pd
from fold.models.base import Model
from fold.transformations.base import fit_noop


class Naive(Model):
"""
A model that predicts the last value seen.
"""

name = "Naive"
properties = Model.Properties(mode=Model.Properties.Mode.online, memory_size=1)

def predict(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
# it's an online transformation, so len(X) will be always 1,
return pd.Series(
self._state.memory_y.iloc[-1].squeeze(), index=X.index[-1:None]
)

def predict_in_sample(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
return self._state.memory_y.shift(1)

fit = fit_noop
update = fit


class NaiveSeasonal(Model):
"""
A model that predicts the last value seen in the same season.
"""

name = "NaiveSeasonal"

def __init__(self, seasonal_length: int) -> None:
assert seasonal_length > 1, "seasonal_length must be greater than 1"
self.seasonal_length = seasonal_length
self.properties = Model.Properties(
mode=Model.Properties.Mode.online,
memory_size=seasonal_length,
_internal_supports_minibatch_backtesting=True,
)

def predict(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
# it's an online transformation, so len(X) will be always 1,
return pd.Series(
self._state.memory_y.iloc[-self.seasonal_length].squeeze(),
index=X.index[-1:None],
)

def predict_in_sample(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
return self._state.memory_y.shift(self.seasonal_length)

fit = fit_noop
update = fit


class MovingAverage(Model):
"""
A model that predicts the mean of the last values seen.
"""

name = "MovingAverage"

def __init__(self, window_size: int) -> None:
self.window_size = window_size
self.properties = Model.Properties(
mode=Model.Properties.Mode.online,
memory_size=window_size,
_internal_supports_minibatch_backtesting=True,
)

def predict(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
# it's an online transformation, so len(X) will be always 1,
return pd.Series(
self._state.memory_y[-self.window_size :].mean(), index=X.index[-1:None]
)

def predict_in_sample(self, X: pd.DataFrame) -> Union[pd.Series, pd.DataFrame]:
return self._state.memory_y.shift(1).rolling(self.window_size).mean()

fit = fit_noop
update = fit
78 changes: 78 additions & 0 deletions tests/test_baselines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy as np
from fold.loop import backtest, train
from fold.splitters import ExpandingWindowSplitter
from fold.transformations.columns import OnlyPredictions
from fold.transformations.dev import Test
from fold.utils.tests import generate_sine_wave_data

from fold_models.baseline import MovingAverage, Naive, NaiveSeasonal


def test_baseline_naive() -> None:
X, y = generate_sine_wave_data(
cycles=10, length=120, freq="M"
) # create a sine wave with yearly seasonality

def check_if_not_nan(x):
assert not x.isna().squeeze().any()

splitter = ExpandingWindowSplitter(initial_train_window=0.2, step=0.1)
transformations = [
Naive(),
Test(fit_func=check_if_not_nan, transform_func=lambda X: X),
OnlyPredictions(),
]
transformations_over_time = train(transformations, X, y, splitter)
pred = backtest(transformations_over_time, X, y, splitter)
assert (
pred.squeeze() == y.shift(1)[pred.index]
).all() # last year's value should match this year's value, with the sine wave we generated
assert (
len(pred) == 120 * 0.8
) # should return non-NaN predictions for the all out-of-sample sets


def test_baseline_naive_seasonal() -> None:
X, y = generate_sine_wave_data(
cycles=10, length=120, freq="M"
) # create a sine wave with yearly seasonality

def check_if_not_nan(x):
assert not x.isna().squeeze().any()

splitter = ExpandingWindowSplitter(initial_train_window=0.2, step=0.1)
transformations = [
NaiveSeasonal(seasonal_length=12),
Test(fit_func=check_if_not_nan, transform_func=lambda X: X),
OnlyPredictions(),
]
transformations_over_time = train(transformations, X, y, splitter)
pred = backtest(transformations_over_time, X, y, splitter)
assert np.isclose(
pred.squeeze(), y[pred.index], atol=0.02
).all() # last year's value should match this year's value, with the sine wave we generated
assert (
len(pred) == 120 * 0.8
) # should return non-NaN predictions for the all out-of-sample sets


def test_baseline_mean() -> None:
X, y = generate_sine_wave_data(cycles=10, length=400)

def check_if_not_nan(x):
assert not x.isna().squeeze().any()

splitter = ExpandingWindowSplitter(initial_train_window=0.2, step=0.1)
transformations = [
MovingAverage(window_size=12),
Test(fit_func=check_if_not_nan, transform_func=lambda X: X),
OnlyPredictions(),
]
transformations_over_time = train(transformations, X, y, splitter)
pred = backtest(transformations_over_time, X, y, splitter)
assert np.isclose(
y.shift(1).rolling(12).mean()[pred.index], pred.squeeze(), atol=0.01
).all()
assert (
len(pred) == 400 * 0.8
) # should return non-NaN predictions for the all out-of-sample sets

0 comments on commit f1fac2d

Please # to comment.