kashif HF staff commited on
Commit
45e60de
1 Parent(s): 2c1ba7b

Upload 10 files

Browse files
src/__init__.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .data import load_dataset, SEASONALITY_MAP
2
+ from .fit_model import fit_predict_with_model, MODEL_NAME_TO_CLASS
3
+ from .score import score_predictions
4
+
5
+
6
+ AVAILABLE_MODELS = list(MODEL_NAME_TO_CLASS.keys())
7
+
8
+ AVAILABLE_DATASETS = [
9
+ "car_parts_without_missing",
10
+ "cif_2016",
11
+ "covid_deaths",
12
+ "electricity_hourly",
13
+ "electricity_weekly",
14
+ "fred_md",
15
+ "hospital",
16
+ "kaggle_web_traffic_weekly",
17
+ "kdd_cup_2018_without_missing",
18
+ "m1_monthly",
19
+ "m1_quarterly",
20
+ "m1_yearly",
21
+ "m3_monthly",
22
+ "m3_other",
23
+ "m3_quarterly",
24
+ "m3_yearly",
25
+ "m4_daily",
26
+ "m4_hourly",
27
+ "m4_weekly",
28
+ "m4_yearly",
29
+ "m4_monthly",
30
+ "m4_quarterly",
31
+ "nn5_daily_without_missing",
32
+ "nn5_weekly",
33
+ "pedestrian_counts",
34
+ "tourism_monthly",
35
+ "tourism_quarterly",
36
+ "tourism_yearly",
37
+ "uber_tlc_without_missing",
38
+ ]
src/data.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import copy
3
+ from typing import Tuple
4
+ from gluonts.dataset.common import TrainDatasets
5
+ from gluonts.dataset.repository.datasets import get_dataset
6
+
7
+ SEASONALITY_MAP = {
8
+ "Y": 1,
9
+ "Q": 4,
10
+ "M": 12,
11
+ "W": 1,
12
+ "D": 7,
13
+ "H": 24,
14
+ }
15
+
16
+
17
+ def fix_m3_other_start(ts: dict):
18
+ new_ts = copy.copy(ts)
19
+ new_ts["start"] = pd.Period("1750", freq="Y")
20
+ return new_ts
21
+
22
+
23
+ def load_dataset(dataset_name) -> TrainDatasets:
24
+ data = get_dataset(dataset_name)
25
+ # m3_other provided by GluonTS has incorrect freq Q that should be replaced by Y
26
+ if dataset_name == "m3_other":
27
+ fixed_train = [fix_m3_other_start(ts) for ts in data.train]
28
+ fixed_test = [fix_m3_other_start(ts) for ts in data.test]
29
+ data = TrainDatasets(metadata=data.metadata, train=fixed_train, test=fixed_test)
30
+ data.metadata.freq = "Y"
31
+ return data
src/fit_model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gluonts.dataset.common import Dataset
2
+
3
+ from .models import (
4
+ AbstractPredictor,
5
+ AutoGluonPredictor,
6
+ AutoPyTorchPredictor,
7
+ DeepARPredictor,
8
+ TFTPredictor,
9
+ AutoARIMAPredictor,
10
+ AutoETSPredictor,
11
+ AutoThetaPredictor,
12
+ StatsEnsemblePredictor,
13
+ )
14
+
15
+ MODEL_NAME_TO_CLASS = {
16
+ "autogluon": AutoGluonPredictor,
17
+ "autopytorch": AutoPyTorchPredictor,
18
+ "deepar": DeepARPredictor,
19
+ "tft": TFTPredictor,
20
+ "autoarima": AutoARIMAPredictor,
21
+ "autoets": AutoETSPredictor,
22
+ "autotheta": AutoThetaPredictor,
23
+ "statsensemble": StatsEnsemblePredictor,
24
+ }
25
+
26
+
27
+ def fit_predict_with_model(
28
+ model_name: str,
29
+ dataset: Dataset,
30
+ prediction_length: int,
31
+ freq: str,
32
+ seasonality: int,
33
+ **model_kwargs,
34
+ ):
35
+ model_class = MODEL_NAME_TO_CLASS[model_name.lower()]
36
+ model: AbstractPredictor = model_class(
37
+ prediction_length=prediction_length,
38
+ freq=freq,
39
+ seasonality=seasonality,
40
+ **model_kwargs,
41
+ )
42
+ predictions = model.fit_predict(dataset)
43
+ info = {"run_time": model.get_runtime()}
44
+ return predictions, info
src/models/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .abstract import AbstractPredictor
2
+ from .autogluon import AutoGluonPredictor
3
+ from .autopytorch import AutoPyTorchPredictor
4
+ from .deep import DeepARPredictor, TFTPredictor
5
+ from .statsforecast import (
6
+ AutoARIMAPredictor,
7
+ AutoETSPredictor,
8
+ AutoThetaPredictor,
9
+ StatsEnsemblePredictor,
10
+ )
src/models/abstract.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+ from gluonts.dataset.common import Dataset
4
+ from gluonts.model.forecast import Forecast
5
+
6
+
7
+ class AbstractPredictor:
8
+ def __init__(
9
+ self,
10
+ prediction_length: int,
11
+ freq: str,
12
+ seasonality: int,
13
+ quantile_levels: Optional[List[float]] = None,
14
+ ):
15
+ self.prediction_length = prediction_length
16
+ self.freq = freq
17
+ self.seasonality = seasonality
18
+ self.quantile_levels = quantile_levels or [
19
+ 0.1,
20
+ 0.2,
21
+ 0.3,
22
+ 0.4,
23
+ 0.5,
24
+ 0.6,
25
+ 0.7,
26
+ 0.8,
27
+ 0.9,
28
+ ]
29
+ self._runtime = None
30
+
31
+ def fit_predict(
32
+ self,
33
+ dataset: Dataset
34
+ ) -> List[Forecast]:
35
+ raise NotImplementedError
36
+
37
+ def save_runtime(self, time: float) -> None:
38
+ self._runtime = time
39
+
40
+ def get_runtime(self) -> float:
41
+ return self._runtime
src/models/autogluon.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import List, Optional
3
+ import pandas as pd
4
+
5
+ from gluonts.dataset.common import Dataset
6
+ from gluonts.model.forecast import Forecast, QuantileForecast
7
+
8
+ from .abstract import AbstractPredictor
9
+
10
+
11
+ class AutoGluonPredictor(AbstractPredictor):
12
+ def __init__(
13
+ self,
14
+ prediction_length: int,
15
+ freq: str,
16
+ seasonality: int,
17
+ time_limit: Optional[int] = None,
18
+ presets: str = "high_quality",
19
+ eval_metric: str = "MASE",
20
+ seed: int = 1,
21
+ enable_ensemble: bool = True,
22
+ hyperparameters: Optional[dict] = None,
23
+ **kwargs
24
+ ):
25
+ super().__init__(prediction_length, freq, seasonality)
26
+ self.presets = presets
27
+ self.eval_metric = eval_metric
28
+ self.time_limit = time_limit
29
+ self.seed = seed
30
+ self.enable_ensemble = enable_ensemble
31
+ self.hyperparameters = hyperparameters
32
+
33
+ def fit_predict(self, dataset: Dataset) -> List[Forecast]:
34
+ from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
35
+
36
+ train_data = TimeSeriesDataFrame(dataset)
37
+ predictor = TimeSeriesPredictor(
38
+ prediction_length=self.prediction_length,
39
+ eval_metric=self.eval_metric,
40
+ eval_metric_seasonal_period=self.seasonality,
41
+ quantile_levels=self.quantile_levels,
42
+ )
43
+ start_time = time.time()
44
+ predictor.fit(
45
+ train_data,
46
+ time_limit=self.time_limit,
47
+ presets=self.presets,
48
+ random_seed=self.seed,
49
+ enable_ensemble=self.enable_ensemble,
50
+ hyperparameters=self.hyperparameters,
51
+ )
52
+ predictions = predictor.predict(train_data)
53
+ self.save_runtime(time.time() - start_time)
54
+ return self._predictions_df_to_gluonts_forecast(
55
+ predictions_df=predictions.drop("mean", axis=1), dataset=dataset
56
+ )
57
+
58
+ def _predictions_df_to_gluonts_forecast(
59
+ self, predictions_df, dataset: Dataset
60
+ ) -> List[Forecast]:
61
+ agts_forecasts = [
62
+ f.droplevel("item_id")
63
+ for _, f in predictions_df.groupby(level="item_id", sort=False)
64
+ ]
65
+ forecast_list = []
66
+ for ts, f in zip(dataset, agts_forecasts):
67
+ item_id = ts["item_id"]
68
+ forecast_list.append(
69
+ QuantileForecast(
70
+ forecast_arrays=f.values.T,
71
+ forecast_keys=f.columns,
72
+ start_date=pd.Period(f.index[0], freq=self.freq),
73
+ item_id=item_id,
74
+ )
75
+ )
76
+ return forecast_list
src/models/autopytorch.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import multiprocessing as mp
3
+ import time
4
+ from typing import List
5
+
6
+ from gluonts.dataset.common import Dataset
7
+ from gluonts.model.forecast import Forecast, SampleForecast
8
+
9
+ from .abstract import AbstractPredictor
10
+
11
+
12
+ class AutoPyTorchPredictor(AbstractPredictor):
13
+ def __init__(
14
+ self,
15
+ prediction_length: int,
16
+ freq: str,
17
+ seasonality: int,
18
+ time_limit: int = 6 * 60 * 60,
19
+ optimize_metric: str = "mean_MASE_forecasting",
20
+ seed: int = 1,
21
+ **kwargs
22
+ ):
23
+ super().__init__(prediction_length, freq, seasonality)
24
+ self.optimize_metric = optimize_metric
25
+ self.run_time = time_limit
26
+ self.seed = seed
27
+
28
+ def fit_predict(self, dataset: Dataset) -> List[Forecast]:
29
+ from autoPyTorch.api.time_series_forecasting import TimeSeriesForecastingTask
30
+ from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
31
+
32
+ y_train = [item["target"] for item in dataset]
33
+ start_times = [item["start"].to_timestamp(how="S") for item in dataset]
34
+
35
+ api = TimeSeriesForecastingTask(
36
+ seed=self.seed,
37
+ ensemble_size=20,
38
+ resampling_strategy=HoldoutValTypes.time_series_hold_out_validation,
39
+ resampling_strategy_args=None,
40
+ )
41
+ api.set_pipeline_options(early_stopping=20, torch_num_threads=mp.cpu_count())
42
+
43
+ fit_start_time = time.time()
44
+ api.search(
45
+ X_train=None,
46
+ y_train=copy.deepcopy(y_train),
47
+ optimize_metric=self.optimize_metric,
48
+ n_prediction_steps=self.prediction_length,
49
+ memory_limit=16 * 1024,
50
+ freq="1" + self.freq,
51
+ start_times=start_times,
52
+ normalize_y=False,
53
+ total_walltime_limit=self.run_time,
54
+ min_num_test_instances=1000,
55
+ budget_type="epochs",
56
+ max_budget=50,
57
+ min_budget=5,
58
+ )
59
+ # # Skip refitting as this raises exceptions for all models as of v0.2.1
60
+ # refit_dataset = api.dataset.create_refit_set()
61
+ # api.refit(refit_dataset, 0)
62
+
63
+ # Predict for the test set
64
+ test_sets = api.dataset.generate_test_seqs()
65
+ predictions = api.predict(test_sets)
66
+ self.save_runtime(time.time() - fit_start_time)
67
+ forecast_list = []
68
+ for ts, pred in zip(dataset, predictions):
69
+ forecast_list.append(
70
+ SampleForecast(
71
+ samples=pred[None],
72
+ start_date=ts["start"] + len(ts["target"]),
73
+ item_id=ts["item_id"],
74
+ )
75
+ )
76
+ return forecast_list
src/models/deep.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import List, Optional
3
+ from datetime import timedelta
4
+
5
+ from gluonts.dataset.split import split
6
+ from gluonts.dataset.common import Dataset
7
+ from gluonts.model.forecast import Forecast
8
+ from gluonts.torch.model.estimator import Estimator
9
+
10
+
11
+ from .abstract import AbstractPredictor
12
+
13
+
14
+ class GluonTSPredictor(AbstractPredictor):
15
+ def __init__(
16
+ self,
17
+ prediction_length: int,
18
+ freq: str,
19
+ seasonality: int,
20
+ time_limit: Optional[int] = None,
21
+ **kwargs,
22
+ ):
23
+ super().__init__(prediction_length, freq, seasonality)
24
+ self.time_limit = time_limit
25
+
26
+ def fit_predict(self, dataset: Dataset) -> List[Forecast]:
27
+ estimator = self._get_estimator()
28
+ train_data, _ = split(dataset, offset=-self.prediction_length)
29
+ fit_start_time = time.time()
30
+ predictor = estimator.train(training_data=train_data, validation_data=dataset)
31
+ predictions = predictor.predict(dataset)
32
+ self.save_runtime(time.time() - fit_start_time)
33
+ return predictions
34
+
35
+ def _get_estimator(self) -> Estimator:
36
+ raise NotImplementedError
37
+
38
+ def _get_trainer_kwargs(self):
39
+ from pytorch_lightning.callbacks import Timer
40
+
41
+ # Train until time limit
42
+ return {"max_epochs": 100_000, "callbacks": [Timer(timedelta(seconds=self.time_limit))]}
43
+
44
+
45
+ class DeepARPredictor(GluonTSPredictor):
46
+ def _get_estimator(self) -> Estimator:
47
+ from gluonts.torch.model.deepar import DeepAREstimator
48
+
49
+ return DeepAREstimator(
50
+ freq=self.freq,
51
+ prediction_length=self.prediction_length,
52
+ trainer_kwargs=self._get_trainer_kwargs(),
53
+ )
54
+
55
+
56
+ class TFTPredictor(GluonTSPredictor):
57
+ def _get_estimator(self) -> Estimator:
58
+ from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
59
+
60
+ return TemporalFusionTransformerEstimator(
61
+ freq=self.freq,
62
+ prediction_length=self.prediction_length,
63
+ trainer_kwargs=self._get_trainer_kwargs(),
64
+ )
src/models/statsforecast.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import List
3
+ import pandas as pd
4
+
5
+ from gluonts.dataset.common import Dataset
6
+ from gluonts.model.forecast import Forecast, QuantileForecast
7
+
8
+ from .abstract import AbstractPredictor
9
+
10
+
11
+ class StatsForecastPredictor(AbstractPredictor):
12
+ def __init__(self, prediction_length: int, freq: str, seasonality: int, **kwargs):
13
+ super().__init__(prediction_length, freq, seasonality)
14
+
15
+ def fit_predict(self, dataset: Dataset) -> List[Forecast]:
16
+ from statsforecast import StatsForecast
17
+ from statsforecast.models import SeasonalNaive
18
+
19
+ df = self._to_statsforecast_df(dataset)
20
+ models = self._get_models()
21
+ predictor = StatsForecast(
22
+ df=df,
23
+ freq=self.freq,
24
+ models=models,
25
+ fallback_model=SeasonalNaive(season_length=self.seasonality),
26
+ n_jobs=-1,
27
+ )
28
+ start_time = time.time()
29
+ predictions_df = predictor.forecast(
30
+ h=self.prediction_length, level=[0, 20, 40, 60, 80]
31
+ )
32
+ self.save_runtime(time.time() - start_time)
33
+ return self._predictions_df_to_gluonts_forecast(
34
+ predictions_df, dataset, model_names=[str(m) for m in models]
35
+ )
36
+
37
+ def _predictions_df_to_gluonts_forecast(
38
+ self,
39
+ predictions_df: pd.DataFrame,
40
+ dataset: Dataset,
41
+ model_names: List[str],
42
+ ) -> List[Forecast]:
43
+ def quantile_to_suffix(q: float) -> str:
44
+ if q < 0.5:
45
+ prefix = "-lo-"
46
+ level = 100 - 200 * q
47
+ else:
48
+ prefix = "-hi-"
49
+ level = 200 * q - 100
50
+ return prefix + str(int(level))
51
+
52
+ # Convert StatsForecast output -> DataFrame with quantile_levels as outputs
53
+ columns = {}
54
+ for q in self.quantile_levels:
55
+ suffix = quantile_to_suffix(q)
56
+ columns[str(q)] = predictions_df[[m + suffix for m in model_names]].median(
57
+ axis=1
58
+ )
59
+
60
+ # Convert quantiles DataFrame -> list of QuantileForecasts
61
+ forecast_df = pd.DataFrame(columns)
62
+ forecast_list = []
63
+ for ts in dataset:
64
+ item_id = ts["item_id"]
65
+ f = forecast_df.loc[item_id]
66
+ forecast_list.append(
67
+ QuantileForecast(
68
+ forecast_arrays=f.values.T,
69
+ forecast_keys=f.columns,
70
+ start_date=pd.Period(
71
+ predictions_df["ds"].loc[item_id].iloc[0], freq=self.freq
72
+ ),
73
+ item_id=item_id,
74
+ )
75
+ )
76
+ return forecast_list
77
+
78
+ def _to_statsforecast_df(self, dataset: Dataset) -> pd.DataFrame:
79
+ """Convert GluonTS Dataset to StatsForecast compatible DataFrame."""
80
+ dfs = []
81
+ for item in dataset:
82
+ target = item["target"]
83
+ timestamps = pd.date_range(
84
+ start=item["start"].to_timestamp(how="S"),
85
+ periods=len(target),
86
+ freq=self.freq,
87
+ )
88
+ df = pd.DataFrame(
89
+ {
90
+ "unique_id": [item["item_id"]] * len(target),
91
+ "ds": timestamps,
92
+ "y": target,
93
+ }
94
+ )
95
+ dfs.append(df)
96
+ return pd.concat(dfs)
97
+
98
+
99
+ class AutoARIMAPredictor(StatsForecastPredictor):
100
+ def _get_models(self):
101
+ from statsforecast.models import AutoARIMA
102
+
103
+ return [AutoARIMA(season_length=self.seasonality)]
104
+
105
+
106
+ class AutoETSPredictor(StatsForecastPredictor):
107
+ def _get_models(self):
108
+ from statsforecast.models import AutoETS
109
+
110
+ return [AutoETS(season_length=self.seasonality)]
111
+
112
+
113
+ class AutoThetaPredictor(StatsForecastPredictor):
114
+ def _get_models(self):
115
+ from statsforecast.models import AutoTheta
116
+
117
+ return [AutoTheta(season_length=self.seasonality)]
118
+
119
+
120
+ class StatsEnsemblePredictor(StatsForecastPredictor):
121
+ def _get_models(self):
122
+ from statsforecast.models import (
123
+ AutoETS,
124
+ AutoARIMA,
125
+ AutoTheta,
126
+ )
127
+
128
+ return [
129
+ AutoETS(season_length=self.seasonality),
130
+ AutoTheta(season_length=self.seasonality),
131
+ AutoARIMA(season_length=self.seasonality),
132
+ ]
src/score.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from gluonts.dataset.split import split
4
+ from gluonts.dataset.common import Dataset
5
+ from gluonts.model.forecast import Forecast
6
+ from gluonts.evaluation.backtest import _to_dataframe, Evaluator
7
+
8
+
9
+ def score_predictions(
10
+ dataset: Dataset,
11
+ predictions: List[Forecast],
12
+ prediction_length: int,
13
+ seasonality: int,
14
+ ):
15
+ _, test_template = split(dataset, offset=-prediction_length)
16
+ test_data = test_template.generate_instances(prediction_length)
17
+ ts_iterator = map(_to_dataframe, test_data)
18
+ evaluator = Evaluator(
19
+ quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], seasonality=seasonality
20
+ )
21
+ metrics, _ = evaluator(ts_iterator=ts_iterator, fcst_iterator=predictions)
22
+ return metrics