alwaysgood commited on
Commit
0f29326
·
verified ·
1 Parent(s): 0fda1cf

Update utils/timefeatures.py

Browse files
Files changed (1) hide show
  1. utils/timefeatures.py +73 -114
utils/timefeatures.py CHANGED
@@ -1,148 +1,107 @@
1
- # From: gluonts/src/gluonts/time_feature/_base.py
2
- # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License").
5
- # You may not use this file except in compliance with the License.
6
- # A copy of the License is located at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # or in the "license" file accompanying this file. This file is distributed
11
- # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12
- # express or implied. See the License for the specific language governing
13
- # permissions and limitations under the License.
14
-
15
- from typing import List
16
 
17
  import numpy as np
18
  import pandas as pd
19
- from pandas.tseries import offsets
20
- from pandas.tseries.frequencies import to_offset
21
-
22
 
23
- class TimeFeature:
24
- def __init__(self):
25
- pass
26
 
 
27
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
28
  pass
29
 
30
- def __repr__(self):
31
- return self.__class__.__name__ + "()"
32
-
33
-
34
- class SecondOfMinute(TimeFeature):
35
  """Minute of hour encoded as value between [-0.5, 0.5]"""
36
-
37
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
38
- return index.second / 59.0 - 0.5
 
39
 
40
-
41
- class MinuteOfHour(TimeFeature):
42
  """Minute of hour encoded as value between [-0.5, 0.5]"""
43
-
44
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
45
- return index.minute / 59.0 - 0.5
 
46
 
47
-
48
- class HourOfDay(TimeFeature):
49
  """Hour of day encoded as value between [-0.5, 0.5]"""
50
-
51
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
52
- return index.hour / 23.0 - 0.5
53
-
54
-
55
- class DayOfWeek(TimeFeature):
56
- """Hour of day encoded as value between [-0.5, 0.5]"""
57
 
 
 
58
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
59
- return index.dayofweek / 6.0 - 0.5
60
-
61
 
62
- class DayOfMonth(TimeFeature):
63
  """Day of month encoded as value between [-0.5, 0.5]"""
64
-
65
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
66
- return (index.day - 1) / 30.0 - 0.5
67
-
68
 
69
- class DayOfYear(TimeFeature):
70
  """Day of year encoded as value between [-0.5, 0.5]"""
71
-
72
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
73
- return (index.dayofyear - 1) / 365.0 - 0.5
74
-
75
 
76
- class MonthOfYear(TimeFeature):
77
  """Month of year encoded as value between [-0.5, 0.5]"""
78
-
79
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
80
- return (index.month - 1) / 11.0 - 0.5
81
-
82
 
83
- class WeekOfYear(TimeFeature):
84
  """Week of year encoded as value between [-0.5, 0.5]"""
85
-
86
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
87
- return (index.isocalendar().week - 1) / 52.0 - 0.5
88
-
89
-
90
- def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
 
91
  """
92
- Returns a list of time features that will be appropriate for the given frequency string.
93
- Parameters
94
- ----------
95
- freq_str
96
- Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
97
  """
98
-
99
  features_by_offsets = {
100
- offsets.YearEnd: [],
101
- offsets.QuarterEnd: [MonthOfYear],
102
- offsets.MonthEnd: [MonthOfYear],
103
- offsets.Week: [DayOfMonth, WeekOfYear],
104
- offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
105
- offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
106
- offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
107
- offsets.Minute: [
108
- MinuteOfHour,
109
- HourOfDay,
110
- DayOfWeek,
111
- DayOfMonth,
112
- DayOfYear,
113
- ],
114
- offsets.Second: [
115
- SecondOfMinute,
116
- MinuteOfHour,
117
- HourOfDay,
118
- DayOfWeek,
119
- DayOfMonth,
120
- DayOfYear,
121
- ],
122
  }
123
-
124
- offset = to_offset(freq_str)
125
-
126
- for offset_type, feature_classes in features_by_offsets.items():
127
- if isinstance(offset, offset_type):
128
- return [cls() for cls in feature_classes]
129
-
130
- supported_freq_msg = f"""
131
- Unsupported frequency {freq_str}
132
- The following frequencies are supported:
133
- Y - yearly
134
- alias: A
135
- M - monthly
136
- W - weekly
137
- D - daily
138
- B - business days
139
- H - hourly
140
- T - minutely
141
- alias: min
142
- S - secondly
143
- """
144
- raise RuntimeError(supported_freq_msg)
145
-
146
-
147
- def time_features(dates, freq='h'):
148
- return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
 
 
 
 
1
+ # utils/timefeatures.py (최종 수정 버전)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import numpy as np
4
  import pandas as pd
5
+ from typing import List
 
 
6
 
7
+ # -------------------------------------------------------------------------
8
+ # 이 파일의 모든 코드를 아래 내용으로 교체하면 됩니다.
9
+ # -------------------------------------------------------------------------
10
 
11
+ class BaseTimeFeature:
12
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
13
  pass
14
 
15
+ class SecondOfMinute(BaseTimeFeature):
 
 
 
 
16
  """Minute of hour encoded as value between [-0.5, 0.5]"""
 
17
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
18
+ # ⭐️ 수정: index.second -> index.dt.second
19
+ return index.dt.second / 59.0 - 0.5
20
 
21
+ class MinuteOfHour(BaseTimeFeature):
 
22
  """Minute of hour encoded as value between [-0.5, 0.5]"""
 
23
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
24
+ # ⭐️ 수정: index.minute -> index.dt.minute
25
+ return index.dt.minute / 59.0 - 0.5
26
 
27
+ class HourOfDay(BaseTimeFeature):
 
28
  """Hour of day encoded as value between [-0.5, 0.5]"""
 
29
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
30
+ # ⭐️ 수정: index.hour -> index.dt.hour
31
+ return index.dt.hour / 23.0 - 0.5
 
 
 
32
 
33
+ class DayOfWeek(BaseTimeFeature):
34
+ """Day of week encoded as value between [-0.5, 0.5]"""
35
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
36
+ # ⭐️ 수정: index.dayofweek -> index.dt.dayofweek
37
+ return index.dt.dayofweek / 6.0 - 0.5
38
 
39
+ class DayOfMonth(BaseTimeFeature):
40
  """Day of month encoded as value between [-0.5, 0.5]"""
 
41
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
42
+ # ⭐️ 수정: index.day -> index.dt.day
43
+ return (index.dt.day - 1) / 30.0 - 0.5
44
 
45
+ class DayOfYear(BaseTimeFeature):
46
  """Day of year encoded as value between [-0.5, 0.5]"""
 
47
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
48
+ # ⭐️ 수정: index.dayofyear -> index.dt.dayofyear
49
+ return (index.dt.dayofyear - 1) / 365.0 - 0.5
50
 
51
+ class MonthOfYear(BaseTimeFeature):
52
  """Month of year encoded as value between [-0.5, 0.5]"""
 
53
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
54
+ # ⭐️ 수정: index.month -> index.dt.month
55
+ return (index.dt.month - 1) / 11.0 - 0.5
56
 
57
+ class WeekOfYear(BaseTimeFeature):
58
  """Week of year encoded as value between [-0.5, 0.5]"""
 
59
  def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
60
+ # ⭐️ 수정: index.isocalendar().week -> index.dt.isocalendar().week
61
+ # .astype(float) 추가
62
+ return (index.dt.isocalendar().week.astype(float) - 1) / 52.0 - 0.5
63
+
64
+ def time_features_from_frequency_str(freq_str: str) -> List[BaseTimeFeature]:
65
  """
66
+ Returns a list of time features that will be used for a given frequency string.
 
 
 
 
67
  """
 
68
  features_by_offsets = {
69
+ "Y": ["year"],
70
+ "M": ["month", "year"],
71
+ "W": ["day", "week", "month", "year"],
72
+ "D": ["day", "week", "month", "year"],
73
+ "B": ["day", "week", "month", "year"],
74
+ "H": ["hour", "day", "week", "month", "year"],
75
+ "T": ["minute", "hour", "day", "week", "month", "year"],
76
+ "min": ["minute", "hour", "day", "week", "month", "year"],
77
+ "S": ["second", "minute", "hour", "day", "week", "month", "year"],
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
+ offset = freq_str.split("-")[-1]
80
+ for name, feats in features_by_offsets.items():
81
+ if offset.startswith(name):
82
+ return [
83
+ cls()
84
+ for cls in FEATURES_MAP.values()
85
+ if cls.name in feats
86
+ ]
87
+
88
+ FEATURES_MAP = {
89
+ "year": "Year",
90
+ "month": MonthOfYear,
91
+ "week": WeekOfYear,
92
+ "day": DayOfMonth,
93
+ "dayofweek": DayOfWeek,
94
+ "dayofyear": DayOfYear,
95
+ "hour": HourOfDay,
96
+ "minute": MinuteOfHour,
97
+ "second": SecondOfMinute,
98
+ }
99
+
100
+ def time_features(dates, freq="H"):
101
+ # ⭐️ 이 함수 내부 로직을 pandas 최신 버전에 맞게 수정했습니다.
102
+ if isinstance(dates, pd.DataFrame):
103
+ dates = pd.to_datetime(dates.iloc[:, 0])
104
+
105
+ return np.vstack(
106
+ [feat(dates) for feat in time_features_from_frequency_str(freq)]
107
+ ).transpose(1, 0)