Documentation Index
Fetch the complete documentation index at: https://nixtlaverse.nixtla.io/llms.txt
Use this file to discover all available pages before exploring further.
import pandas as pd
from fastcore.test import test_fail
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PowerTransformer
from utilsforecast.processing import counts_by_id
from mlforecast import MLForecast
from mlforecast.utils import generate_daily_series
Bases: ABC
Base class used for target transformations.
Differences
Bases: _BaseGroupedArrayTargetTransform
Subtracts previous values of the serie. Can be used to remove trend or seasonalities.
series = generate_daily_series(10, min_length=50, max_length=100)
diffs = Differences([1, 2, 5])
id_counts = counts_by_id(series, 'unique_id')
indptr = np.append(0, id_counts['counts'].cumsum())
ga = GroupedArray(series['y'].values, indptr)
# differences are applied correctly
transformed = diffs.fit_transform(ga)
assert diffs.fitted_ == []
expected = series.copy()
for d in diffs.differences:
expected['y'] -= expected.groupby('unique_id', observed=True)['y'].shift(d)
np.testing.assert_allclose(transformed.data, expected['y'].values)
# fitted differences are restored correctly
diffs.store_fitted = True
transformed = diffs.fit_transform(ga)
keep_mask = ~np.isnan(transformed.data)
restored = diffs.inverse_transform_fitted(transformed)
np.testing.assert_allclose(ga.data[keep_mask], restored.data[keep_mask])
# test transform
new_ga = GroupedArray(np.random.rand(10), np.arange(11))
prev_orig = [diffs.scalers_[i].tails_[::d].copy() for i, d in enumerate(diffs.differences)]
expected = new_ga.data - np.add.reduce(prev_orig)
updates = diffs.update(new_ga)
np.testing.assert_allclose(expected, updates.data)
np.testing.assert_allclose(diffs.scalers_[0].tails_, new_ga.data)
np.testing.assert_allclose(diffs.scalers_[1].tails_[1::2], new_ga.data - prev_orig[0])
np.testing.assert_allclose(diffs.scalers_[2].tails_[4::5], new_ga.data - np.add.reduce(prev_orig[:2]))
# variable sizes
diff1 = Differences([1])
ga = GroupedArray(np.arange(10), np.array([0, 3, 10]))
diff1.fit_transform(ga)
new_ga = GroupedArray(np.arange(4), np.array([0, 1, 4]))
updates = diff1.update(new_ga)
np.testing.assert_allclose(updates.data, np.array([0 - 2, 1 - 9, 2 - 1, 3 - 2]))
np.testing.assert_allclose(diff1.scalers_[0].tails_, np.array([0, 3]))
# short series
ga = GroupedArray(np.arange(20), np.array([0, 2, 20]))
test_fail(lambda: diffs.fit_transform(ga), contains="[0]")
# stack
diffs = Differences([1, 2, 5])
ga = GroupedArray(series['y'].values, indptr)
diffs.fit_transform(ga)
stacked = Differences.stack([diffs, diffs])
for i in range(len(diffs.differences)):
np.testing.assert_allclose(
stacked.scalers_[i].tails_,
np.tile(diffs.scalers_[i].tails_, 2)
)
AutoDifferences
AutoDifferences(max_diffs)
Bases: _BaseGroupedArrayTargetTransform
Find and apply the optimal number of differences to each serie.
Parameters:
| Name | Type | Description | Default |
|---|
max_diffs | int | Maximum number of differences to apply. | required |
inverse_transform_fitted(ga)
Inverse transform fitted values.
Reverses the differencing transformation by reconstructing the original
values from the differenced fitted values. This is used when fitted=True
to restore the fitted predictions to the original scale.
Parameters:
| Name | Type | Description | Default |
|---|
ga | GroupedArray | GroupedArray containing the differenced fitted values. | required |
Returns:
| Type | Description |
|---|
GroupedArray | GroupedArray with fitted values in the original scale. |
Raises:
| Type | Description |
|---|
ValueError | If fitted differences are smaller than provided target. |
AutoSeasonalDifferences
AutoSeasonalDifferences(season_length, max_diffs, n_seasons=10)
Bases: AutoDifferences
Find and apply the optimal number of seasonal differences to each group.
Parameters:
| Name | Type | Description | Default |
|---|
season_length | int | Length of the seasonal period. | required |
max_diffs | int | Maximum number of differences to apply. | required |
n_seasons | int | Number of seasons to use to determine the number of differences. Defaults to 10. If None will use all samples, otherwise season_length * n_seasons samples will be used for the test. Smaller values will be faster but could be less accurate. | 10 |
inverse_transform_fitted(ga)
Inverse transform fitted values.
Reverses the differencing transformation by reconstructing the original
values from the differenced fitted values. This is used when fitted=True
to restore the fitted predictions to the original scale.
Parameters:
| Name | Type | Description | Default |
|---|
ga | GroupedArray | GroupedArray containing the differenced fitted values. | required |
Returns:
| Type | Description |
|---|
GroupedArray | GroupedArray with fitted values in the original scale. |
Raises:
| Type | Description |
|---|
ValueError | If fitted differences are smaller than provided target. |
AutoSeasonalityAndDifferences
AutoSeasonalityAndDifferences(max_season_length, max_diffs, n_seasons=10)
Bases: AutoDifferences
Find the length of the seasonal period and apply the optimal number of differences to each group.
Parameters:
| Name | Type | Description | Default |
|---|
max_season_length | int | Maximum length of the seasonal period. | required |
max_diffs | int | Maximum number of differences to apply. | required |
n_seasons | int | Number of seasons to use to determine the number of differences. Defaults to 10. If None will use all samples, otherwise max_season_length * n_seasons samples will be used for the test. Smaller values will be faster but could be less accurate. | 10 |
Raises:
| Type | Description |
|---|
ValueError | If any series has fewer than max_diffs + 4 observations. This ensures that after differencing, there are at least 4 observations remaining for STL decomposition (minimum 2 periods × minimum period of 2). |
inverse_transform_fitted(ga)
Inverse transform fitted values.
Reverses the differencing transformation by reconstructing the original
values from the differenced fitted values. This is used when fitted=True
to restore the fitted predictions to the original scale.
Parameters:
| Name | Type | Description | Default |
|---|
ga | GroupedArray | GroupedArray containing the differenced fitted values. | required |
Returns:
| Type | Description |
|---|
GroupedArray | GroupedArray with fitted values in the original scale. |
Raises:
| Type | Description |
|---|
ValueError | If fitted differences are smaller than provided target. |
def test_scaler(sc, series):
id_counts = counts_by_id(series, 'unique_id')
indptr = np.append(0, id_counts['counts'].cumsum())
ga = GroupedArray(series['y'].values, indptr)
transformed = sc.fit_transform(ga)
np.testing.assert_allclose(
sc.inverse_transform(transformed).data,
ga.data,
)
transformed2 = sc.update(ga)
np.testing.assert_allclose(transformed.data, transformed2.data)
idxs = [0, 7]
subset = ga.take(idxs)
transformed_subset = transformed.take(idxs)
subsc = sc.take(idxs)
np.testing.assert_allclose(
subsc.inverse_transform(transformed_subset).data,
subset.data,
)
stacked = sc.stack([sc, sc])
stacked_stats = stacked.scaler_.stats_
np.testing.assert_allclose(
stacked_stats,
np.tile(sc.scaler_.stats_, (2, 1)),
)
LocalStandardScaler
Bases: _BaseLocalScaler
Standardizes each serie by subtracting its mean and dividing by its standard deviation.
LocalMinMaxScaler
Bases: _BaseLocalScaler
Scales each serie to be in the [0, 1] interval.
LocalRobustScaler
Bases: _BaseLocalScaler
Scaler robust to outliers.
Parameters:
| Name | Type | Description | Default |
|---|
scale | str | Statistic to use for scaling. Can be either ‘iqr’ (Inter Quartile Range) or ‘mad’ (Median Asbolute Deviation). Defaults to ‘iqr’. | required |
LocalBoxCox
Bases: _BaseLocalScaler
Finds the optimum lambda for each serie and applies the Box-Cox transformation
GlobalSklearnTransformer(transformer)
Bases: BaseTargetTransform
Applies the same scikit-learn transformer to all series.
# need this import in order for isinstance to work
from mlforecast.target_transforms import Differences as ExportedDifferences
sk_boxcox = PowerTransformer(method='box-cox', standardize=False)
boxcox_global = GlobalSklearnTransformer(sk_boxcox)
single_difference = ExportedDifferences([1])
series = generate_daily_series(10)
fcst = MLForecast(
models=[LinearRegression(), HistGradientBoostingRegressor()],
freq='D',
lags=[1, 2],
target_transforms=[boxcox_global, single_difference]
)
prep = fcst.preprocess(series, dropna=False)
expected = (
pd.Series(
sk_boxcox.fit_transform(series[['y']])[:, 0], index=series['unique_id']
).groupby('unique_id', observed=True)
.diff()
.dropna()
.values
)
np.testing.assert_allclose(prep['y'].values, expected)
preds = fcst.fit(series).predict(5)