import numpy as np
import pandas as pd
from nbdev import show_doc

from mlforecast.lag_transforms import ExpandingMean
from mlforecast.utils import generate_daily_series

Setup

rng = np.random.RandomState(0)
series = generate_daily_series(100, equal_ends=True)
starts_ends = series.groupby(
    'unique_id', observed=True, as_index=False
)['ds'].agg(['min', 'max'])
prices = []
for r in starts_ends.itertuples():
    dates = pd.date_range(r.min, r.max + 14 * pd.offsets.Day())
    df = pd.DataFrame({'ds': dates, 'price': rng.rand(dates.size)})
    df['unique_id'] = r.Index
    prices.append(df)
prices = pd.concat(prices)
prices['price2'] = prices['price'] * rng.rand(prices.shape[0])
prices.head()
dspriceunique_idprice2
02000-10-050.54881400.345011
12000-10-060.71518900.445598
22000-10-070.60276300.165147
32000-10-080.54488300.041373
42000-10-090.42365500.391577

source

transform_exog

 transform_exog (df:~DFType, lags:Optional[Iterable[int]]=None, lag_transf
                 orms:Optional[Dict[int,List[Union[Callable,Tuple[Callable
                 ,Any]]]]]=None, id_col:str='unique_id',
                 time_col:str='ds', num_threads:int=1)

Compute lag features for dynamic exogenous regressors.

TypeDefaultDetails
dfDFTypeDataframe with ids, times and values for the exogenous regressors.
lagsOptionalNoneLags of the target to use as features.
lag_transformsOptionalNoneMapping of target lags to their transformations.
id_colstrunique_idColumn that identifies each serie.
time_colstrdsColumn that identifies each timestep, its values can be timestamps or integers.
num_threadsint1Number of threads to use when computing the features.
ReturnsDFTypeOriginal DataFrame with the computed features
transformed = transform_exog(
    prices,
    lags=[1, 2],
    lag_transforms={1: [ExpandingMean()]}
)
transformed.head()
dspriceunique_idprice2price_lag1price_lag2price_expanding_mean_lag1price2_lag1price2_lag2price2_expanding_mean_lag1
02000-10-050.54881400.345011NaNNaNNaNNaNNaNNaN
12000-10-060.71518900.4455980.548814NaN0.5488140.345011NaN0.345011
22000-10-070.60276300.1651470.7151890.5488140.6320010.4455980.3450110.395304
32000-10-080.54488300.0413730.6027630.7151890.6222550.1651470.4455980.318585
42000-10-090.42365500.3915770.5448830.6027630.6029120.0413730.1651470.249282
import polars as pl
prices_pl = pl.from_pandas(prices)
transformed_pl = transform_exog(
    prices_pl,
    lags=[1, 2],
    lag_transforms={1: [ExpandingMean()]},
    num_threads=2,
)
transformed_pl.head()
dspriceunique_idprice2price_lag1price_lag2price_expanding_mean_lag1price2_lag1price2_lag2price2_expanding_mean_lag1
datetime[ns]f64i64f64f64f64f64f64f64f64
2000-10-05 00:00:000.54881400.345011NaNNaNNaNNaNNaNNaN
2000-10-06 00:00:000.71518900.4455980.548814NaN0.5488140.345011NaN0.345011
2000-10-07 00:00:000.60276300.1651470.7151890.5488140.6320010.4455980.3450110.395304
2000-10-08 00:00:000.54488300.0413730.6027630.7151890.6222550.1651470.4455980.318585
2000-10-09 00:00:000.42365500.3915770.5448830.6027630.6029120.0413730.1651470.249282