This notebook offers a step to step guide to create a hierarchical forecasting pipeline.

In the pipeline we will use HierarchicalForecast and StatsForecast core class, to create base predictions, reconcile and evaluate them.

We will use the TourismL dataset that summarizes large Australian national visitor survey.

Outline 1. Installing Packages 2. Prepare TourismL dataset - Read and aggregate - StatsForecast’s Base Predictions 3. Reconciliar 4. Evaluar

1. Installing HierarchicalForecast

We assume you have StatsForecast and HierarchicalForecast already installed, if not check this guide for instructions on how to install HierarchicalForecast.

!pip install hierarchicalforecast statsforecast datasetsforecast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.methods import BottomUp, TopDown, MinTrace, ERM

from hierarchicalforecast.utils import is_strictly_hierarchical
from hierarchicalforecast.utils import HierarchicalPlot, CodeTimer

from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo

2. Preparing TourismL Dataset

2.1 Read Hierarchical Dataset

# ['Labour', 'Traffic', 'TourismSmall', 'TourismLarge', 'Wiki2']
dataset = 'TourismSmall' # 'TourismLarge'
verbose = True
intervals_method = 'bootstrap'
LEVEL = np.arange(0, 100, 2)
with CodeTimer('Read and Parse data   ', verbose):
    print(f'{dataset}')
    if not os.path.exists('./data'):
        os.makedirs('./data')
    
    dataset_info = HierarchicalInfo[dataset]
    Y_df, S_df, tags = HierarchicalData.load(directory=f'./data/{dataset}', group=dataset)
    Y_df['ds'] = pd.to_datetime(Y_df['ds'])

    # Train/Test Splits
    horizon = dataset_info.horizon
    seasonality = dataset_info.seasonality
    Y_test_df = Y_df.groupby('unique_id', as_index=False).tail(horizon)
    Y_train_df = Y_df.drop(Y_test_df.index)
    S_df = S_df.reset_index(names="unique_id")
TourismSmall
Code block 'Read and Parse data   ' took:   0.01112 seconds
dataset_info.seasonality
4
hplot = HierarchicalPlot(S=S_df, tags=tags)
hplot.plot_summing_matrix()

Y_train_df
unique_iddsy
0total1998-03-3184503
1total1998-06-3065312
2total1998-09-3072753
3total1998-12-3170880
4total1999-03-3186893
3191nt-oth-noncity2003-12-31132
3192nt-oth-noncity2004-03-3112
3193nt-oth-noncity2004-06-3040
3194nt-oth-noncity2004-09-30186
3195nt-oth-noncity2004-12-31144

2.2 StatsForecast’s Base Predictions

This cell computes the base predictions Y_hat_df for all the series in Y_df using StatsForecast’s AutoARIMA. Additionally we obtain insample predictions Y_fitted_df for the methods that require them.

with CodeTimer('Fit/Predict Model     ', verbose):
    # Read to avoid unnecesary AutoARIMA computation
    yhat_file = f'./data/{dataset}/Y_hat.csv'
    yfitted_file = f'./data/{dataset}/Y_fitted.csv'

    if os.path.exists(yhat_file):
        Y_hat_df = pd.read_csv(yhat_file, parse_dates=['ds'])
        Y_fitted_df = pd.read_csv(yfitted_file, parse_dates=['ds'])

    else:
        fcst = StatsForecast(
            models=[AutoARIMA(season_length=seasonality)],
            fallback_model=[Naive()],
            freq=dataset_info.freq, 
            n_jobs=-1
        )
        Y_hat_df = fcst.forecast(df=Y_train_df, h=horizon, fitted=True, level=LEVEL)
        Y_fitted_df = fcst.forecast_fitted_values()
        Y_hat_df.to_csv(yhat_file, index=False)
        Y_fitted_df.to_csv(yfitted_file, index=False)

3. Reconcile Predictions

with CodeTimer('Reconcile Predictions ', verbose):
    if is_strictly_hierarchical(S=S_df.drop(columns="unique_id").values.astype(np.float32), tags={key: S_df["unique_id"].isin(val).values.nonzero()[0] for key, val in tags.items()}):
        reconcilers = [
            BottomUp(),
            TopDown(method='average_proportions'),
            TopDown(method='proportion_averages'),
            MinTrace(method='ols'),
            MinTrace(method='wls_var'),
            MinTrace(method='mint_shrink'),
            ERM(method='closed')
        ]
    else:
        reconcilers = [
            BottomUp(),
            MinTrace(method='ols'),
            MinTrace(method='wls_var'),
            MinTrace(method='mint_shrink'),
            ERM(method='closed')
        ]
    
    hrec = HierarchicalReconciliation(reconcilers=reconcilers)
    Y_rec_df = hrec.bootstrap_reconcile(Y_hat_df=Y_hat_df,
                                        Y_df=Y_fitted_df,
                                        S_df=S_df, tags=tags,
                                        level=LEVEL,
                                        intervals_method=intervals_method,
                                        num_samples=10, 
                                        num_seeds=10)
    
    Y_rec_df = Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'], how="left")
Code block 'Reconcile Predictions ' took:   7.62321 seconds

Qualitative evaluation, of parsed quantiles

unique_id = "total"
plot_df = Y_rec_df.query("unique_id == @unique_id").groupby(["unique_id", "ds"], as_index=False).mean()
for col in hrec.level_names['AutoARIMA/BottomUp']:
    plt.plot(plot_df["ds"], plot_df[col], color="orange")
plt.plot(plot_df["ds"], plot_df["y"], label="True")
plt.title(f"AutoARIMA/BottomUp - {unique_id}")
plt.legend()

4. Evaluation

from utilsforecast.losses import scaled_crps, msse
from hierarchicalforecast.evaluation import evaluate
from functools import partial
with CodeTimer('Evaluate Models CRPS and MSSE ', verbose):
    metrics_seeds = []
    for seed in Y_rec_df.seed.unique():
        df_seed = Y_rec_df.query("seed == @seed")
        metrics_seed = evaluate(df = df_seed,
                            tags = tags,
                            metrics = [scaled_crps, 
                                       partial(msse, seasonality=4)],
                            models= hrec.level_names.keys(),
                            level = LEVEL,
                            train_df = Y_train_df,
                            )
        metrics_seed['seed'] = seed
        metrics_seeds.append(metrics_seed)
    metrics_seeds = pd.concat(metrics_seeds)

    metrics_mean = metrics_seeds.groupby(["level", "metric"], as_index=False).mean()
    metrics_std = metrics_seeds.groupby(["level", "metric"], as_index=False).std()

    results = metrics_mean[hrec.level_names.keys()].round(3).astype(str) + "±" + metrics_std[hrec.level_names.keys()].round(4).astype(str)
    results.insert(0, "metric", metrics_mean["metric"])
    results.insert(0, "level", metrics_mean["level"])

results.sort_values(by=["metric", "level"])
Code block 'Evaluate Models CRPS and MSSE ' took:   4.46367 seconds
levelmetricAutoARIMA/BottomUpAutoARIMA/TopDown_method-average_proportionsAutoARIMA/TopDown_method-proportion_averagesAutoARIMA/MinTrace_method-olsAutoARIMA/MinTrace_method-wls_varAutoARIMA/MinTrace_method-mint_shrinkAutoARIMA/ERM_method-closed_lambda_reg-0.01
0Countrymsse1.777±0.02.488±0.02.488±0.02.752±0.02.569±0.02.775±0.03.427±0.0
2Country/Purposemsse1.726±0.03.181±0.03.169±0.02.184±0.01.876±0.01.96±0.03.067±0.0
4Country/Purpose/Statemsse0.881±0.01.657±0.01.652±0.00.98±0.00.857±0.00.867±0.01.559±0.0
6Country/Purpose/State/CityNonCitymsse0.95±0.01.271±0.01.269±0.01.033±0.00.903±0.00.912±0.01.635±0.0
8Overallmsse0.973±0.01.492±0.01.488±0.01.087±0.00.951±0.00.966±0.01.695±0.0
1Countryscaled_crps0.043±0.00090.048±0.00060.048±0.00060.05±0.00060.051±0.00060.053±0.00060.054±0.0009
3Country/Purposescaled_crps0.077±0.0010.114±0.00030.112±0.00040.09±0.00130.087±0.00090.089±0.00090.106±0.0013
5Country/Purpose/Statescaled_crps0.165±0.00090.249±0.00040.247±0.00040.18±0.00180.169±0.00090.169±0.00080.231±0.0021
7Country/Purpose/State/CityNonCityscaled_crps0.218±0.00130.289±0.00040.286±0.00040.228±0.00180.217±0.00130.218±0.00110.302±0.0033
9Overallscaled_crps0.193±0.00110.266±0.00040.263±0.00040.205±0.00170.194±0.00110.195±0.00090.268±0.0027

References