import numpy as np
import pandas as pd
from nbdev import show_doc
from window_ops.expanding import expanding_mean

from mlforecast.utils import generate_daily_series

Setup

rng = np.random.RandomState(0)
series = generate_daily_series(100, equal_ends=True)
starts_ends = series.groupby('unique_id', as_index=False)['ds'].agg([min, max])
prices = []
for r in starts_ends.itertuples():
    dates = pd.date_range(r.min, r.max + 14 * pd.offsets.Day())
    df = pd.DataFrame({'ds': dates, 'price': rng.rand(dates.size)})
    df['unique_id'] = r.Index
    prices.append(df)
prices = pd.concat(prices)
prices['price2'] = prices['price'] * rng.rand(prices.shape[0])
prices.head()
dspriceunique_idprice2
02000-10-050.548814id_000.345011
12000-10-060.715189id_000.445598
22000-10-070.602763id_000.165147
32000-10-080.544883id_000.041373
42000-10-090.423655id_000.391577

source

transform_exog

 transform_exog
                 (df:Union[pandas.core.frame.DataFrame,polars.dataframe.fr
                 ame.DataFrame], lags:Optional[Iterable[int]]=None, lag_tr
                 ansforms:Optional[Dict[int,List[Union[Callable,Tuple[Call
                 able,Any]]]]]=None, id_col:str='unique_id',
                 time_col:str='ds', num_threads:int=1)

Compute lag features for dynamic exogenous regressors.

TypeDefaultDetails
dfUnionDataframe with ids, times and values for the exogenous regressors.
lagsOptionalNoneLags of the target to use as features.
lag_transformsOptionalNoneMapping of target lags to their transformations.
id_colstrunique_idColumn that identifies each serie.
time_colstrdsColumn that identifies each timestep, its values can be timestamps or integers.
num_threadsint1Number of threads to use when computing the features.
ReturnsUnionOriginal DataFrame with the computed features
transformed = transform_exog(
    prices,
    lags=[1, 2],
    lag_transforms={1: [expanding_mean]}
)
transformed.head()
dspriceunique_idprice2price_lag1price_lag2price_expanding_mean_lag1price2_lag1price2_lag2price2_expanding_mean_lag1
02000-10-050.548814id_000.345011NaNNaNNaNNaNNaNNaN
12000-10-060.715189id_000.4455980.548814NaN0.5488140.345011NaN0.345011
22000-10-070.602763id_000.1651470.7151890.5488140.6320010.4455980.3450110.395304
32000-10-080.544883id_000.0413730.6027630.7151890.6222550.1651470.4455980.318585
42000-10-090.423655id_000.3915770.5448830.6027630.6029120.0413730.1651470.249282
import polars as pl
prices_pl = pl.from_pandas(prices)
transformed_pl = transform_exog(
    prices_pl,
    lags=[1, 2],
    lag_transforms={1: [expanding_mean]},
    num_threads=2,
)
transformed_pl.head()
dspriceunique_idprice2price_lag1price_lag2price_expanding_mean_lag1price2_lag1price2_lag2price2_expanding_mean_lag1
datetime[ns]f64strf64f64f64f64f64f64f64
2000-10-05 00:00:000.548814“id_00”0.345011NaNNaNNaNNaNNaNNaN
2000-10-06 00:00:000.715189“id_00”0.4455980.548814NaN0.5488140.345011NaN0.345011
2000-10-07 00:00:000.602763“id_00”0.1651470.7151890.5488140.6320010.4455980.3450110.395304
2000-10-08 00:00:000.544883“id_00”0.0413730.6027630.7151890.6222550.1651470.4455980.318585
2000-10-09 00:00:000.423655“id_00”0.3915770.5448830.6027630.6029120.0413730.1651470.249282