Upload feature extractor
Browse files- feature_extraction_moment.py +91 -0
- preprocessor_config.json +6 -0
feature_extraction_moment.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# FeatureExtractorでの実施事項
|
2 |
+
# - 時系列データをdataframe, numpy array, torch tensorの状態からtorch tensor化
|
3 |
+
# - input validation
|
4 |
+
|
5 |
+
from typing import List, Optional, Union
|
6 |
+
|
7 |
+
from pandas import DataFrame
|
8 |
+
import numpy as np
|
9 |
+
import torch
|
10 |
+
import tensorflow as tf
|
11 |
+
import jax.numpy as jnp
|
12 |
+
|
13 |
+
from transformers import FeatureExtractionMixin
|
14 |
+
from transformers import TensorType
|
15 |
+
from transformers import BatchFeature
|
16 |
+
from transformers.utils import logging
|
17 |
+
|
18 |
+
logger = logging.get_logger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
class MomentFeatureExtractor(FeatureExtractionMixin):
|
22 |
+
|
23 |
+
# TODO: 本来はMoment側のTokenizerもts_tokenizerとして入れたかったが、モデルに組み込まれてしまっている。
|
24 |
+
# refers: https://github.com/moment-timeseries-foundation-model/moment/blob/088b253a1138ac7e48a7efc9bf902336c9eec8d9/momentfm/models/moment.py#L105
|
25 |
+
|
26 |
+
model_input_names = ["time_series_values", "input_mask"]
|
27 |
+
|
28 |
+
def __init__(self, **kwargs):
|
29 |
+
super().__init__(**kwargs)
|
30 |
+
|
31 |
+
|
32 |
+
def __call__(
|
33 |
+
self,
|
34 |
+
time_series: Union[DataFrame, np.ndarray, torch.Tensor, List[DataFrame], List[np.ndarray], List[torch.Tensor]] = None,
|
35 |
+
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
|
36 |
+
torch_dtype: Optional[Union[str, torch.dtype]] = torch.float,
|
37 |
+
) -> BatchFeature:
|
38 |
+
if time_series is not None:
|
39 |
+
time_series_values = self._convert_time_series(time_series, return_tensors, torch_dtype)
|
40 |
+
else:
|
41 |
+
time_series_values = None
|
42 |
+
|
43 |
+
return BatchFeature(data={"time_series_values": time_series_values})
|
44 |
+
|
45 |
+
|
46 |
+
def _convert_time_series(self, time_series, return_tensors, torch_dtype):
|
47 |
+
# DataFrame, np.ndarray, または torch.Tensor を torch.Tensor に変換
|
48 |
+
if isinstance(time_series, list):
|
49 |
+
# リスト内の各要素を torch.Tensor に変換し、最終的には1つのTensorに結合
|
50 |
+
time_series_tensor = torch.stack([self._convert_to_tensor(ts, torch_dtype) for ts in time_series])
|
51 |
+
else:
|
52 |
+
time_series_tensor = self._convert_to_tensor(time_series, torch_dtype)
|
53 |
+
|
54 |
+
# 次元数の確認
|
55 |
+
if time_series_tensor.dim() > 3:
|
56 |
+
raise ValueError("time_series_tensor must not have more than 3 dimensions")
|
57 |
+
elif time_series_tensor.dim() == 2:
|
58 |
+
time_series_tensor = time_series_tensor.unsqueeze(0)
|
59 |
+
elif time_series_tensor.dim() == 1:
|
60 |
+
time_series_tensor = time_series_tensor.unsqueeze(0).unsqueeze(0)
|
61 |
+
|
62 |
+
# 形式の出力
|
63 |
+
batch_size, n_channels, d_model = time_series_tensor.shape
|
64 |
+
logger.info(f"Batch size: {batch_size}, Number of channels: {n_channels}, Dimension of model: {d_model}")
|
65 |
+
|
66 |
+
# seq_lenを最大値512までに絞り込み
|
67 |
+
if time_series_tensor.shape[2] > 512:
|
68 |
+
time_series_tensor = time_series_tensor[:, :, :512]
|
69 |
+
logger.info("Sequence length has been truncated to 512.")
|
70 |
+
|
71 |
+
# return_tensorsの指定に応じてデータ形式を変換
|
72 |
+
if return_tensors == 'pt' or return_tensors == TensorType.PYTORCH:
|
73 |
+
return time_series_tensor
|
74 |
+
elif return_tensors == 'np' or return_tensors == TensorType.NUMPY:
|
75 |
+
return time_series_tensor.numpy()
|
76 |
+
elif return_tensors == 'tf' or return_tensors == TensorType.TENSORFLOW:
|
77 |
+
return tf.convert_to_tensor(time_series_tensor.numpy())
|
78 |
+
elif return_tensors == 'jax' or return_tensors == TensorType.JAX:
|
79 |
+
return jnp.array(time_series_tensor.numpy())
|
80 |
+
else:
|
81 |
+
raise ValueError("Unsupported return_tensors type")
|
82 |
+
|
83 |
+
def _convert_to_tensor(self, time_series, torch_dtype):
|
84 |
+
if isinstance(time_series, DataFrame):
|
85 |
+
time_series_tensor = torch.tensor(time_series.values, dtype=torch_dtype).t()
|
86 |
+
elif isinstance(time_series, np.ndarray):
|
87 |
+
time_series_tensor = torch.tensor(time_series, dtype=torch_dtype)
|
88 |
+
elif isinstance(time_series, torch.Tensor):
|
89 |
+
time_series_tensor = time_series.to(torch_dtype)
|
90 |
+
|
91 |
+
return time_series_tensor
|
preprocessor_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoFeatureExtractor": "feature_extraction_moment.MomentFeatureExtractor"
|
4 |
+
},
|
5 |
+
"feature_extractor_type": "MomentFeatureExtractor"
|
6 |
+
}
|