from statsforecast.utils import generate_series
series = generate_series(5, min_length=50, max_length=50, equal_ends=True, n_static_features=1)
series.head()
unique_iddsystatic_0
002000-01-0112.07389743
102000-01-0259.73416643
202000-01-03101.26079443
302000-01-04143.98743043
402000-01-05185.32040643

For the next part, mlflow and mlflavors are needed. Install them with:

pip install mlflow mlflavors

Model Logging

import pandas as pd
import mlflow
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA

import mlflavors
import requests
ARTIFACT_PATH = "model"
DATA_PATH = "./data"
HORIZON = 7
LEVEL = [90]

with mlflow.start_run() as run:
    series = generate_series(5, min_length=50, max_length=50, equal_ends=True, n_static_features=1)
    
    train_df = series.groupby('unique_id').head(43)
    test_df = series.groupby('unique_id').tail(7)
    X_test = test_df.drop(columns=["y"])
    y_test = test_df[["y"]]

    models = [AutoARIMA(season_length=7)]

    sf = StatsForecast(df=train_df, models=models, freq="D", n_jobs=-1)

    sf.fit()

    # Evaluate model
    y_pred = sf.predict(h=HORIZON, X_df=X_test, level=LEVEL)["AutoARIMA"]

    metrics = {
        "mae": mean_absolute_error(y_test, y_pred),
        "mape": mean_absolute_percentage_error(y_test, y_pred),
    }

    print(f"Metrics: \n{metrics}")

    # Log metrics
    mlflow.log_metrics(metrics)

    # Log model using pickle serialization (default).
    mlflavors.statsforecast.log_model(
        statsforecast_model=sf,
        artifact_path=ARTIFACT_PATH,
        serialization_format="pickle",
    )
    model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)

print(f"\nMLflow run id:\n{run.info.run_id}")
Metrics: 
{'mae': 6.712853959225143, 'mape': 0.11719246764336884}
2023/10/20 23:45:36 WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: /var/folders/w2/91_v34nx0xs2npnl3zsl9tmm0000gn/T/tmpt4686vpu/model/model.pkl, flavor: statsforecast), fall back to return ['statsforecast==1.6.0']. Set logging level to DEBUG to see the full traceback.

MLflow run id:
0319bbd664424fcd88d6c532e3ecac77

Viewing Experiment

To view the newly created experiment and logged artifacts open the MLflow UI:

mlflow ui

Loading Statsforecast Model

The statsforecast model can be loaded from the MLFlow registry using the mlflow.statsforecast.load_model function and used to generate predictions.

loaded_model = mlflavors.statsforecast.load_model(model_uri=model_uri)
results = loaded_model.predict(h=HORIZON, X_df=X_test, level=LEVEL)
results.head()
dsAutoARIMAAutoARIMA-lo-90AutoARIMA-hi-90
unique_id
02000-02-1355.89443244.34388067.444984
02000-02-1497.81805486.267502109.368607
02000-02-15146.745422135.194870158.295975
02000-02-16188.888336177.337784200.438904
02000-02-17231.493637219.943085243.044189

Loading Model with pyfunc

Pyfunc is another interface for MLFlow models that has utilities for loading and saving models. This code is equivalent in making predictions as above.

loaded_pyfunc = mlflavors.statsforecast.pyfunc.load_model(model_uri=model_uri)

# Convert test data to 2D numpy array so it can be passed to pyfunc predict using
# a single-row Pandas DataFrame configuration argument
X_test_array = X_test.to_numpy()

# Create configuration DataFrame
predict_conf = pd.DataFrame(
    [
        {
            "X": X_test_array,
            "X_cols": X_test.columns,
            "X_dtypes": list(X_test.dtypes),
            "h": HORIZON,
            "level": LEVEL,
        }
    ]
)


pyfunc_result = loaded_pyfunc.predict(predict_conf)
pyfunc_result.head()
dsAutoARIMAAutoARIMA-lo-90AutoARIMA-hi-90
unique_id
02000-02-1355.89443244.34388067.444984
02000-02-1497.81805486.267502109.368607
02000-02-15146.745422135.194870158.295975
02000-02-16188.888336177.337784200.438904
02000-02-17231.493637219.943085243.044189

Model Serving

This section illustrates an example of serving the pyfunc flavor to a local REST API endpoint and subsequently requesting a prediction from the served model. To serve the model run the command below where you substitute the run id printed during execution training code.

mlflow models serve -m runs:/<run_id>/model --env-manager local --host 127.0.0.1

After running this, the code below can be ran to send a request.

HORIZON = 7
LEVEL = [90, 95]

# Define local host and endpoint url
host = "127.0.0.1"
url = f"http://{host}:5000/invocations"

# Convert DateTime to string for JSON serialization
X_test_pyfunc = X_test.copy()
X_test_pyfunc["ds"] = X_test_pyfunc["ds"].dt.strftime(date_format="%Y-%m-%d")

# Convert to list for JSON serialization
X_test_list = X_test_pyfunc.to_numpy().tolist()

# Convert index to list of strings for JSON serialization
X_cols = list(X_test.columns)

# Convert dtypes to string for JSON serialization
X_dtypes = [str(dtype) for dtype in list(X_test.dtypes)]

predict_conf = pd.DataFrame(
    [
        {
            "X": X_test_list,
            "X_cols": X_cols,
            "X_dtypes": X_dtypes,
            "h": HORIZON,
            "level": LEVEL,
        }
    ]
)

# Create dictionary with pandas DataFrame in the split orientation
json_data = {"dataframe_split": predict_conf.to_dict(orient="split")}

# Score model
response = requests.post(url, json=json_data)
pd.DataFrame(response.json()['predictions']).head()
dsAutoARIMAAutoARIMA-lo-95AutoARIMA-lo-90AutoARIMA-hi-90AutoARIMA-hi-95
02000-02-13T00:00:0055.89443242.13110044.34388067.44498469.657768
12000-02-14T00:00:0097.81805484.05471886.267502109.368607111.581390
22000-02-15T00:00:00146.745422132.982086135.194870158.295975160.508759
32000-02-16T00:00:00188.888336175.125015177.337784200.438904202.651672
42000-02-17T00:00:00231.493637217.730301219.943085243.044189245.256973