Upload 10 files
Browse files- src/__init__.py +38 -0
- src/data.py +31 -0
- src/fit_model.py +44 -0
- src/models/__init__.py +10 -0
- src/models/abstract.py +41 -0
- src/models/autogluon.py +76 -0
- src/models/autopytorch.py +76 -0
- src/models/deep.py +64 -0
- src/models/statsforecast.py +132 -0
- src/score.py +22 -0
src/__init__.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .data import load_dataset, SEASONALITY_MAP
|
2 |
+
from .fit_model import fit_predict_with_model, MODEL_NAME_TO_CLASS
|
3 |
+
from .score import score_predictions
|
4 |
+
|
5 |
+
|
6 |
+
AVAILABLE_MODELS = list(MODEL_NAME_TO_CLASS.keys())
|
7 |
+
|
8 |
+
AVAILABLE_DATASETS = [
|
9 |
+
"car_parts_without_missing",
|
10 |
+
"cif_2016",
|
11 |
+
"covid_deaths",
|
12 |
+
"electricity_hourly",
|
13 |
+
"electricity_weekly",
|
14 |
+
"fred_md",
|
15 |
+
"hospital",
|
16 |
+
"kaggle_web_traffic_weekly",
|
17 |
+
"kdd_cup_2018_without_missing",
|
18 |
+
"m1_monthly",
|
19 |
+
"m1_quarterly",
|
20 |
+
"m1_yearly",
|
21 |
+
"m3_monthly",
|
22 |
+
"m3_other",
|
23 |
+
"m3_quarterly",
|
24 |
+
"m3_yearly",
|
25 |
+
"m4_daily",
|
26 |
+
"m4_hourly",
|
27 |
+
"m4_weekly",
|
28 |
+
"m4_yearly",
|
29 |
+
"m4_monthly",
|
30 |
+
"m4_quarterly",
|
31 |
+
"nn5_daily_without_missing",
|
32 |
+
"nn5_weekly",
|
33 |
+
"pedestrian_counts",
|
34 |
+
"tourism_monthly",
|
35 |
+
"tourism_quarterly",
|
36 |
+
"tourism_yearly",
|
37 |
+
"uber_tlc_without_missing",
|
38 |
+
]
|
src/data.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import copy
|
3 |
+
from typing import Tuple
|
4 |
+
from gluonts.dataset.common import TrainDatasets
|
5 |
+
from gluonts.dataset.repository.datasets import get_dataset
|
6 |
+
|
7 |
+
SEASONALITY_MAP = {
|
8 |
+
"Y": 1,
|
9 |
+
"Q": 4,
|
10 |
+
"M": 12,
|
11 |
+
"W": 1,
|
12 |
+
"D": 7,
|
13 |
+
"H": 24,
|
14 |
+
}
|
15 |
+
|
16 |
+
|
17 |
+
def fix_m3_other_start(ts: dict):
|
18 |
+
new_ts = copy.copy(ts)
|
19 |
+
new_ts["start"] = pd.Period("1750", freq="Y")
|
20 |
+
return new_ts
|
21 |
+
|
22 |
+
|
23 |
+
def load_dataset(dataset_name) -> TrainDatasets:
|
24 |
+
data = get_dataset(dataset_name)
|
25 |
+
# m3_other provided by GluonTS has incorrect freq Q that should be replaced by Y
|
26 |
+
if dataset_name == "m3_other":
|
27 |
+
fixed_train = [fix_m3_other_start(ts) for ts in data.train]
|
28 |
+
fixed_test = [fix_m3_other_start(ts) for ts in data.test]
|
29 |
+
data = TrainDatasets(metadata=data.metadata, train=fixed_train, test=fixed_test)
|
30 |
+
data.metadata.freq = "Y"
|
31 |
+
return data
|
src/fit_model.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gluonts.dataset.common import Dataset
|
2 |
+
|
3 |
+
from .models import (
|
4 |
+
AbstractPredictor,
|
5 |
+
AutoGluonPredictor,
|
6 |
+
AutoPyTorchPredictor,
|
7 |
+
DeepARPredictor,
|
8 |
+
TFTPredictor,
|
9 |
+
AutoARIMAPredictor,
|
10 |
+
AutoETSPredictor,
|
11 |
+
AutoThetaPredictor,
|
12 |
+
StatsEnsemblePredictor,
|
13 |
+
)
|
14 |
+
|
15 |
+
MODEL_NAME_TO_CLASS = {
|
16 |
+
"autogluon": AutoGluonPredictor,
|
17 |
+
"autopytorch": AutoPyTorchPredictor,
|
18 |
+
"deepar": DeepARPredictor,
|
19 |
+
"tft": TFTPredictor,
|
20 |
+
"autoarima": AutoARIMAPredictor,
|
21 |
+
"autoets": AutoETSPredictor,
|
22 |
+
"autotheta": AutoThetaPredictor,
|
23 |
+
"statsensemble": StatsEnsemblePredictor,
|
24 |
+
}
|
25 |
+
|
26 |
+
|
27 |
+
def fit_predict_with_model(
|
28 |
+
model_name: str,
|
29 |
+
dataset: Dataset,
|
30 |
+
prediction_length: int,
|
31 |
+
freq: str,
|
32 |
+
seasonality: int,
|
33 |
+
**model_kwargs,
|
34 |
+
):
|
35 |
+
model_class = MODEL_NAME_TO_CLASS[model_name.lower()]
|
36 |
+
model: AbstractPredictor = model_class(
|
37 |
+
prediction_length=prediction_length,
|
38 |
+
freq=freq,
|
39 |
+
seasonality=seasonality,
|
40 |
+
**model_kwargs,
|
41 |
+
)
|
42 |
+
predictions = model.fit_predict(dataset)
|
43 |
+
info = {"run_time": model.get_runtime()}
|
44 |
+
return predictions, info
|
src/models/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .abstract import AbstractPredictor
|
2 |
+
from .autogluon import AutoGluonPredictor
|
3 |
+
from .autopytorch import AutoPyTorchPredictor
|
4 |
+
from .deep import DeepARPredictor, TFTPredictor
|
5 |
+
from .statsforecast import (
|
6 |
+
AutoARIMAPredictor,
|
7 |
+
AutoETSPredictor,
|
8 |
+
AutoThetaPredictor,
|
9 |
+
StatsEnsemblePredictor,
|
10 |
+
)
|
src/models/abstract.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Optional
|
2 |
+
|
3 |
+
from gluonts.dataset.common import Dataset
|
4 |
+
from gluonts.model.forecast import Forecast
|
5 |
+
|
6 |
+
|
7 |
+
class AbstractPredictor:
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
prediction_length: int,
|
11 |
+
freq: str,
|
12 |
+
seasonality: int,
|
13 |
+
quantile_levels: Optional[List[float]] = None,
|
14 |
+
):
|
15 |
+
self.prediction_length = prediction_length
|
16 |
+
self.freq = freq
|
17 |
+
self.seasonality = seasonality
|
18 |
+
self.quantile_levels = quantile_levels or [
|
19 |
+
0.1,
|
20 |
+
0.2,
|
21 |
+
0.3,
|
22 |
+
0.4,
|
23 |
+
0.5,
|
24 |
+
0.6,
|
25 |
+
0.7,
|
26 |
+
0.8,
|
27 |
+
0.9,
|
28 |
+
]
|
29 |
+
self._runtime = None
|
30 |
+
|
31 |
+
def fit_predict(
|
32 |
+
self,
|
33 |
+
dataset: Dataset
|
34 |
+
) -> List[Forecast]:
|
35 |
+
raise NotImplementedError
|
36 |
+
|
37 |
+
def save_runtime(self, time: float) -> None:
|
38 |
+
self._runtime = time
|
39 |
+
|
40 |
+
def get_runtime(self) -> float:
|
41 |
+
return self._runtime
|
src/models/autogluon.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import List, Optional
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
from gluonts.dataset.common import Dataset
|
6 |
+
from gluonts.model.forecast import Forecast, QuantileForecast
|
7 |
+
|
8 |
+
from .abstract import AbstractPredictor
|
9 |
+
|
10 |
+
|
11 |
+
class AutoGluonPredictor(AbstractPredictor):
|
12 |
+
def __init__(
|
13 |
+
self,
|
14 |
+
prediction_length: int,
|
15 |
+
freq: str,
|
16 |
+
seasonality: int,
|
17 |
+
time_limit: Optional[int] = None,
|
18 |
+
presets: str = "high_quality",
|
19 |
+
eval_metric: str = "MASE",
|
20 |
+
seed: int = 1,
|
21 |
+
enable_ensemble: bool = True,
|
22 |
+
hyperparameters: Optional[dict] = None,
|
23 |
+
**kwargs
|
24 |
+
):
|
25 |
+
super().__init__(prediction_length, freq, seasonality)
|
26 |
+
self.presets = presets
|
27 |
+
self.eval_metric = eval_metric
|
28 |
+
self.time_limit = time_limit
|
29 |
+
self.seed = seed
|
30 |
+
self.enable_ensemble = enable_ensemble
|
31 |
+
self.hyperparameters = hyperparameters
|
32 |
+
|
33 |
+
def fit_predict(self, dataset: Dataset) -> List[Forecast]:
|
34 |
+
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
|
35 |
+
|
36 |
+
train_data = TimeSeriesDataFrame(dataset)
|
37 |
+
predictor = TimeSeriesPredictor(
|
38 |
+
prediction_length=self.prediction_length,
|
39 |
+
eval_metric=self.eval_metric,
|
40 |
+
eval_metric_seasonal_period=self.seasonality,
|
41 |
+
quantile_levels=self.quantile_levels,
|
42 |
+
)
|
43 |
+
start_time = time.time()
|
44 |
+
predictor.fit(
|
45 |
+
train_data,
|
46 |
+
time_limit=self.time_limit,
|
47 |
+
presets=self.presets,
|
48 |
+
random_seed=self.seed,
|
49 |
+
enable_ensemble=self.enable_ensemble,
|
50 |
+
hyperparameters=self.hyperparameters,
|
51 |
+
)
|
52 |
+
predictions = predictor.predict(train_data)
|
53 |
+
self.save_runtime(time.time() - start_time)
|
54 |
+
return self._predictions_df_to_gluonts_forecast(
|
55 |
+
predictions_df=predictions.drop("mean", axis=1), dataset=dataset
|
56 |
+
)
|
57 |
+
|
58 |
+
def _predictions_df_to_gluonts_forecast(
|
59 |
+
self, predictions_df, dataset: Dataset
|
60 |
+
) -> List[Forecast]:
|
61 |
+
agts_forecasts = [
|
62 |
+
f.droplevel("item_id")
|
63 |
+
for _, f in predictions_df.groupby(level="item_id", sort=False)
|
64 |
+
]
|
65 |
+
forecast_list = []
|
66 |
+
for ts, f in zip(dataset, agts_forecasts):
|
67 |
+
item_id = ts["item_id"]
|
68 |
+
forecast_list.append(
|
69 |
+
QuantileForecast(
|
70 |
+
forecast_arrays=f.values.T,
|
71 |
+
forecast_keys=f.columns,
|
72 |
+
start_date=pd.Period(f.index[0], freq=self.freq),
|
73 |
+
item_id=item_id,
|
74 |
+
)
|
75 |
+
)
|
76 |
+
return forecast_list
|
src/models/autopytorch.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import multiprocessing as mp
|
3 |
+
import time
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
from gluonts.dataset.common import Dataset
|
7 |
+
from gluonts.model.forecast import Forecast, SampleForecast
|
8 |
+
|
9 |
+
from .abstract import AbstractPredictor
|
10 |
+
|
11 |
+
|
12 |
+
class AutoPyTorchPredictor(AbstractPredictor):
|
13 |
+
def __init__(
|
14 |
+
self,
|
15 |
+
prediction_length: int,
|
16 |
+
freq: str,
|
17 |
+
seasonality: int,
|
18 |
+
time_limit: int = 6 * 60 * 60,
|
19 |
+
optimize_metric: str = "mean_MASE_forecasting",
|
20 |
+
seed: int = 1,
|
21 |
+
**kwargs
|
22 |
+
):
|
23 |
+
super().__init__(prediction_length, freq, seasonality)
|
24 |
+
self.optimize_metric = optimize_metric
|
25 |
+
self.run_time = time_limit
|
26 |
+
self.seed = seed
|
27 |
+
|
28 |
+
def fit_predict(self, dataset: Dataset) -> List[Forecast]:
|
29 |
+
from autoPyTorch.api.time_series_forecasting import TimeSeriesForecastingTask
|
30 |
+
from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
|
31 |
+
|
32 |
+
y_train = [item["target"] for item in dataset]
|
33 |
+
start_times = [item["start"].to_timestamp(how="S") for item in dataset]
|
34 |
+
|
35 |
+
api = TimeSeriesForecastingTask(
|
36 |
+
seed=self.seed,
|
37 |
+
ensemble_size=20,
|
38 |
+
resampling_strategy=HoldoutValTypes.time_series_hold_out_validation,
|
39 |
+
resampling_strategy_args=None,
|
40 |
+
)
|
41 |
+
api.set_pipeline_options(early_stopping=20, torch_num_threads=mp.cpu_count())
|
42 |
+
|
43 |
+
fit_start_time = time.time()
|
44 |
+
api.search(
|
45 |
+
X_train=None,
|
46 |
+
y_train=copy.deepcopy(y_train),
|
47 |
+
optimize_metric=self.optimize_metric,
|
48 |
+
n_prediction_steps=self.prediction_length,
|
49 |
+
memory_limit=16 * 1024,
|
50 |
+
freq="1" + self.freq,
|
51 |
+
start_times=start_times,
|
52 |
+
normalize_y=False,
|
53 |
+
total_walltime_limit=self.run_time,
|
54 |
+
min_num_test_instances=1000,
|
55 |
+
budget_type="epochs",
|
56 |
+
max_budget=50,
|
57 |
+
min_budget=5,
|
58 |
+
)
|
59 |
+
# # Skip refitting as this raises exceptions for all models as of v0.2.1
|
60 |
+
# refit_dataset = api.dataset.create_refit_set()
|
61 |
+
# api.refit(refit_dataset, 0)
|
62 |
+
|
63 |
+
# Predict for the test set
|
64 |
+
test_sets = api.dataset.generate_test_seqs()
|
65 |
+
predictions = api.predict(test_sets)
|
66 |
+
self.save_runtime(time.time() - fit_start_time)
|
67 |
+
forecast_list = []
|
68 |
+
for ts, pred in zip(dataset, predictions):
|
69 |
+
forecast_list.append(
|
70 |
+
SampleForecast(
|
71 |
+
samples=pred[None],
|
72 |
+
start_date=ts["start"] + len(ts["target"]),
|
73 |
+
item_id=ts["item_id"],
|
74 |
+
)
|
75 |
+
)
|
76 |
+
return forecast_list
|
src/models/deep.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import List, Optional
|
3 |
+
from datetime import timedelta
|
4 |
+
|
5 |
+
from gluonts.dataset.split import split
|
6 |
+
from gluonts.dataset.common import Dataset
|
7 |
+
from gluonts.model.forecast import Forecast
|
8 |
+
from gluonts.torch.model.estimator import Estimator
|
9 |
+
|
10 |
+
|
11 |
+
from .abstract import AbstractPredictor
|
12 |
+
|
13 |
+
|
14 |
+
class GluonTSPredictor(AbstractPredictor):
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
prediction_length: int,
|
18 |
+
freq: str,
|
19 |
+
seasonality: int,
|
20 |
+
time_limit: Optional[int] = None,
|
21 |
+
**kwargs,
|
22 |
+
):
|
23 |
+
super().__init__(prediction_length, freq, seasonality)
|
24 |
+
self.time_limit = time_limit
|
25 |
+
|
26 |
+
def fit_predict(self, dataset: Dataset) -> List[Forecast]:
|
27 |
+
estimator = self._get_estimator()
|
28 |
+
train_data, _ = split(dataset, offset=-self.prediction_length)
|
29 |
+
fit_start_time = time.time()
|
30 |
+
predictor = estimator.train(training_data=train_data, validation_data=dataset)
|
31 |
+
predictions = predictor.predict(dataset)
|
32 |
+
self.save_runtime(time.time() - fit_start_time)
|
33 |
+
return predictions
|
34 |
+
|
35 |
+
def _get_estimator(self) -> Estimator:
|
36 |
+
raise NotImplementedError
|
37 |
+
|
38 |
+
def _get_trainer_kwargs(self):
|
39 |
+
from pytorch_lightning.callbacks import Timer
|
40 |
+
|
41 |
+
# Train until time limit
|
42 |
+
return {"max_epochs": 100_000, "callbacks": [Timer(timedelta(seconds=self.time_limit))]}
|
43 |
+
|
44 |
+
|
45 |
+
class DeepARPredictor(GluonTSPredictor):
|
46 |
+
def _get_estimator(self) -> Estimator:
|
47 |
+
from gluonts.torch.model.deepar import DeepAREstimator
|
48 |
+
|
49 |
+
return DeepAREstimator(
|
50 |
+
freq=self.freq,
|
51 |
+
prediction_length=self.prediction_length,
|
52 |
+
trainer_kwargs=self._get_trainer_kwargs(),
|
53 |
+
)
|
54 |
+
|
55 |
+
|
56 |
+
class TFTPredictor(GluonTSPredictor):
|
57 |
+
def _get_estimator(self) -> Estimator:
|
58 |
+
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
|
59 |
+
|
60 |
+
return TemporalFusionTransformerEstimator(
|
61 |
+
freq=self.freq,
|
62 |
+
prediction_length=self.prediction_length,
|
63 |
+
trainer_kwargs=self._get_trainer_kwargs(),
|
64 |
+
)
|
src/models/statsforecast.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import List
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
from gluonts.dataset.common import Dataset
|
6 |
+
from gluonts.model.forecast import Forecast, QuantileForecast
|
7 |
+
|
8 |
+
from .abstract import AbstractPredictor
|
9 |
+
|
10 |
+
|
11 |
+
class StatsForecastPredictor(AbstractPredictor):
|
12 |
+
def __init__(self, prediction_length: int, freq: str, seasonality: int, **kwargs):
|
13 |
+
super().__init__(prediction_length, freq, seasonality)
|
14 |
+
|
15 |
+
def fit_predict(self, dataset: Dataset) -> List[Forecast]:
|
16 |
+
from statsforecast import StatsForecast
|
17 |
+
from statsforecast.models import SeasonalNaive
|
18 |
+
|
19 |
+
df = self._to_statsforecast_df(dataset)
|
20 |
+
models = self._get_models()
|
21 |
+
predictor = StatsForecast(
|
22 |
+
df=df,
|
23 |
+
freq=self.freq,
|
24 |
+
models=models,
|
25 |
+
fallback_model=SeasonalNaive(season_length=self.seasonality),
|
26 |
+
n_jobs=-1,
|
27 |
+
)
|
28 |
+
start_time = time.time()
|
29 |
+
predictions_df = predictor.forecast(
|
30 |
+
h=self.prediction_length, level=[0, 20, 40, 60, 80]
|
31 |
+
)
|
32 |
+
self.save_runtime(time.time() - start_time)
|
33 |
+
return self._predictions_df_to_gluonts_forecast(
|
34 |
+
predictions_df, dataset, model_names=[str(m) for m in models]
|
35 |
+
)
|
36 |
+
|
37 |
+
def _predictions_df_to_gluonts_forecast(
|
38 |
+
self,
|
39 |
+
predictions_df: pd.DataFrame,
|
40 |
+
dataset: Dataset,
|
41 |
+
model_names: List[str],
|
42 |
+
) -> List[Forecast]:
|
43 |
+
def quantile_to_suffix(q: float) -> str:
|
44 |
+
if q < 0.5:
|
45 |
+
prefix = "-lo-"
|
46 |
+
level = 100 - 200 * q
|
47 |
+
else:
|
48 |
+
prefix = "-hi-"
|
49 |
+
level = 200 * q - 100
|
50 |
+
return prefix + str(int(level))
|
51 |
+
|
52 |
+
# Convert StatsForecast output -> DataFrame with quantile_levels as outputs
|
53 |
+
columns = {}
|
54 |
+
for q in self.quantile_levels:
|
55 |
+
suffix = quantile_to_suffix(q)
|
56 |
+
columns[str(q)] = predictions_df[[m + suffix for m in model_names]].median(
|
57 |
+
axis=1
|
58 |
+
)
|
59 |
+
|
60 |
+
# Convert quantiles DataFrame -> list of QuantileForecasts
|
61 |
+
forecast_df = pd.DataFrame(columns)
|
62 |
+
forecast_list = []
|
63 |
+
for ts in dataset:
|
64 |
+
item_id = ts["item_id"]
|
65 |
+
f = forecast_df.loc[item_id]
|
66 |
+
forecast_list.append(
|
67 |
+
QuantileForecast(
|
68 |
+
forecast_arrays=f.values.T,
|
69 |
+
forecast_keys=f.columns,
|
70 |
+
start_date=pd.Period(
|
71 |
+
predictions_df["ds"].loc[item_id].iloc[0], freq=self.freq
|
72 |
+
),
|
73 |
+
item_id=item_id,
|
74 |
+
)
|
75 |
+
)
|
76 |
+
return forecast_list
|
77 |
+
|
78 |
+
def _to_statsforecast_df(self, dataset: Dataset) -> pd.DataFrame:
|
79 |
+
"""Convert GluonTS Dataset to StatsForecast compatible DataFrame."""
|
80 |
+
dfs = []
|
81 |
+
for item in dataset:
|
82 |
+
target = item["target"]
|
83 |
+
timestamps = pd.date_range(
|
84 |
+
start=item["start"].to_timestamp(how="S"),
|
85 |
+
periods=len(target),
|
86 |
+
freq=self.freq,
|
87 |
+
)
|
88 |
+
df = pd.DataFrame(
|
89 |
+
{
|
90 |
+
"unique_id": [item["item_id"]] * len(target),
|
91 |
+
"ds": timestamps,
|
92 |
+
"y": target,
|
93 |
+
}
|
94 |
+
)
|
95 |
+
dfs.append(df)
|
96 |
+
return pd.concat(dfs)
|
97 |
+
|
98 |
+
|
99 |
+
class AutoARIMAPredictor(StatsForecastPredictor):
|
100 |
+
def _get_models(self):
|
101 |
+
from statsforecast.models import AutoARIMA
|
102 |
+
|
103 |
+
return [AutoARIMA(season_length=self.seasonality)]
|
104 |
+
|
105 |
+
|
106 |
+
class AutoETSPredictor(StatsForecastPredictor):
|
107 |
+
def _get_models(self):
|
108 |
+
from statsforecast.models import AutoETS
|
109 |
+
|
110 |
+
return [AutoETS(season_length=self.seasonality)]
|
111 |
+
|
112 |
+
|
113 |
+
class AutoThetaPredictor(StatsForecastPredictor):
|
114 |
+
def _get_models(self):
|
115 |
+
from statsforecast.models import AutoTheta
|
116 |
+
|
117 |
+
return [AutoTheta(season_length=self.seasonality)]
|
118 |
+
|
119 |
+
|
120 |
+
class StatsEnsemblePredictor(StatsForecastPredictor):
|
121 |
+
def _get_models(self):
|
122 |
+
from statsforecast.models import (
|
123 |
+
AutoETS,
|
124 |
+
AutoARIMA,
|
125 |
+
AutoTheta,
|
126 |
+
)
|
127 |
+
|
128 |
+
return [
|
129 |
+
AutoETS(season_length=self.seasonality),
|
130 |
+
AutoTheta(season_length=self.seasonality),
|
131 |
+
AutoARIMA(season_length=self.seasonality),
|
132 |
+
]
|
src/score.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
from gluonts.dataset.split import split
|
4 |
+
from gluonts.dataset.common import Dataset
|
5 |
+
from gluonts.model.forecast import Forecast
|
6 |
+
from gluonts.evaluation.backtest import _to_dataframe, Evaluator
|
7 |
+
|
8 |
+
|
9 |
+
def score_predictions(
|
10 |
+
dataset: Dataset,
|
11 |
+
predictions: List[Forecast],
|
12 |
+
prediction_length: int,
|
13 |
+
seasonality: int,
|
14 |
+
):
|
15 |
+
_, test_template = split(dataset, offset=-prediction_length)
|
16 |
+
test_data = test_template.generate_instances(prediction_length)
|
17 |
+
ts_iterator = map(_to_dataframe, test_data)
|
18 |
+
evaluator = Evaluator(
|
19 |
+
quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], seasonality=seasonality
|
20 |
+
)
|
21 |
+
metrics, _ = evaluator(ts_iterator=ts_iterator, fcst_iterator=predictions)
|
22 |
+
return metrics
|