alwaysgood commited on
Commit
81397db
·
verified ·
1 Parent(s): 5431a0f

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +16 -57
inference.py CHANGED
@@ -5,9 +5,7 @@ import argparse
5
  import joblib
6
  import os
7
  from tqdm import tqdm
8
- import json # 👈 JSON 라이브러리 추가
9
-
10
- # ⭐️ 수정 사항 1: 경로 문제를 피하기 위해 명시적으로 import 경로 추가
11
  import sys
12
  sys.path.append('.')
13
 
@@ -15,9 +13,9 @@ from models import TimeXer
15
  from utils.metrics import metric
16
  from utils.timefeatures import time_features
17
 
18
- # --- 1. 인자 파싱 (수정 없음) ---
 
19
  parser = argparse.ArgumentParser(description='Time Series Prediction')
20
- # ... (이전과 동일한 모든 argparse 코드) ...
21
  parser.add_argument('--checkpoint_path', type=str, required=True, help='Path to the model checkpoint file (.pth)')
22
  parser.add_argument('--scaler_path', type=str, required=True, help='Path to the saved scaler file (.gz)')
23
  parser.add_argument('--predict_input_file', type=str, default=None, help='[Mode 1] Path to the CSV file for single future prediction')
@@ -49,17 +47,9 @@ parser.add_argument('--freq', type=str, default='t', help='freq for time feature
49
  args = parser.parse_args()
50
 
51
 
52
-
53
- prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
54
- prediction = scaler.inverse_transform(prediction_padded)[:, -args.c_out:]
55
- else:
56
- prediction = scaler.inverse_transform(prediction_scaled)
57
-
58
- return prediction
59
-
60
-
61
- # --- 2. 공통 함수: 모델 및 스케일러 로드 (수정 없음) ---
62
  def load_model_and_scaler(args):
 
63
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
64
  args.device = device
65
  model = TimeXer.Model(args).float().to(device)
@@ -75,39 +65,33 @@ def predict_future(args, model, scaler, device):
75
  df_input = pd.read_csv(args.predict_input_file)
76
  df_input['date'] = pd.to_datetime(df_input['date'])
77
 
78
- # ⭐️ 알려주신 정확한 컬럼 이름으로 수정
79
  cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
80
 
81
- # 1. 인코더 입력(x_enc) 생성
82
  raw_input = df_input[cols_to_scale].tail(args.seq_len).values
83
  input_scaled = scaler.transform(raw_input)
84
  batch_x = torch.from_numpy(input_scaled).float().unsqueeze(0).to(device)
85
 
86
- # 2. 인코더 시간 정보(x_mark_enc) 생성
87
  df_stamp_enc = df_input.tail(args.seq_len)[['date']].reset_index(drop=True)
88
  enc_mark = time_features(df_stamp_enc, timeenc=0, freq=args.freq)
89
  batch_x_mark = torch.from_numpy(enc_mark).float().unsqueeze(0).to(device)
90
 
91
- # 3. 디코더 입력(x_dec) 생성
92
  dec_inp_label = input_scaled[-args.label_len:]
93
  dec_inp_pred = np.zeros([args.pred_len, args.enc_in])
94
  decoder_input = np.concatenate([dec_inp_label, dec_inp_pred], axis=0)
95
  batch_y = torch.from_numpy(decoder_input).float().unsqueeze(0).to(device)
96
 
97
- # 4. 디코더 시간 정보(x_mark_dec) 생성
98
  last_date = df_stamp_enc['date'].iloc[-1]
99
- future_dates = pd.date_range(start=last_date, periods=args.pred_len + 1, freq='5T')[1:] # 5분 단위 가정
100
  df_stamp_dec = pd.DataFrame({'date': list(df_stamp_enc['date'].values[-args.label_len:]) + list(future_dates)})
101
  dec_mark = time_features(df_stamp_dec, timeenc=0, freq=args.freq)
102
  batch_y_mark = torch.from_numpy(dec_mark).float().unsqueeze(0).to(device)
103
 
104
- # 5. 모델 호출
105
  with torch.no_grad():
106
  outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
107
 
108
  prediction_scaled = outputs.detach().cpu().numpy()[0]
109
 
110
- # 스케일 복원
111
  if scaler.n_features_in_ > 1:
112
  padding = np.zeros((prediction_scaled.shape[0], scaler.n_features_in_ - args.c_out))
113
  prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
@@ -116,12 +100,13 @@ def predict_future(args, model, scaler, device):
116
  prediction = scaler.inverse_transform(prediction_scaled)
117
  return prediction
118
 
119
- # --- 4. 모드 2: 전체 기간 롤링 평가 함수 (⭐️⭐️⭐️ 이 함수를 완성했습니다 ⭐️⭐️⭐️) ---
120
  def evaluate_performance(args, model, scaler, device):
 
 
121
  df_eval = pd.read_csv(args.evaluate_file)
122
  df_eval['date'] = pd.to_datetime(df_eval['date'])
123
 
124
- # ⭐️ 알려주신 정확한 컬럼 이름으로 수정
125
  cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
126
  raw_data = df_eval[cols_to_scale].values
127
  data_scaled = scaler.transform(raw_data)
@@ -133,7 +118,6 @@ def evaluate_performance(args, model, scaler, device):
133
 
134
  num_samples = len(data_scaled) - args.seq_len - args.pred_len + 1
135
  for i in tqdm(range(num_samples), desc="Evaluating", file=sys.stderr):
136
- # 1. 인코더/디코더 입력 생성 (매 스텝마다)
137
  s_begin = i
138
  s_end = s_begin + args.seq_len
139
 
@@ -152,19 +136,16 @@ def evaluate_performance(args, model, scaler, device):
152
  dec_mark_pred = df_stamp[true_begin:true_end]
153
  batch_y_mark = np.concatenate([dec_mark_label, dec_mark_pred], axis=0)
154
 
155
- # 텐서로 변환
156
  batch_x = torch.from_numpy(batch_x).float().unsqueeze(0).to(device)
157
  batch_x_mark = torch.from_numpy(batch_x_mark).float().unsqueeze(0).to(device)
158
  batch_y = torch.from_numpy(batch_y).float().unsqueeze(0).to(device)
159
  batch_y_mark = torch.from_numpy(batch_y_mark).float().unsqueeze(0).to(device)
160
 
161
- # 2. 모델 호출
162
  with torch.no_grad():
163
  outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
164
 
165
  pred_scaled = outputs.detach().cpu().numpy()[0]
166
 
167
- # 3. 스케일 복원
168
  if scaler.n_features_in_ > 1:
169
  padding = np.zeros((pred_scaled.shape[0], scaler.n_features_in_ - args.c_out))
170
  pred_padded = np.concatenate((padding, pred_scaled), axis=1)
@@ -179,45 +160,23 @@ def evaluate_performance(args, model, scaler, device):
179
 
180
  return np.array(preds_unscaled), np.array(trues_unscaled)
181
 
182
- # --- 5. 메인 로직 (⭐️⭐️⭐️ 이 부분이 완전히 변경되었습니다 ⭐️⭐️⭐️) ---
183
  if __name__ == '__main__':
184
-
185
- final_output = {} # 최종 결과를 담을 딕셔너리
186
-
187
  try:
188
  model, scaler, device = load_model_and_scaler(args)
189
-
190
  if args.predict_input_file:
191
  print("--- Running in Single Prediction Mode ---", file=sys.stderr)
192
  prediction = predict_future(args, model, scaler, device)
193
- final_output = {
194
- "status": "success",
195
- "mode": "single_prediction",
196
- "prediction": prediction.flatten().tolist()
197
- }
198
-
199
  elif args.evaluate_file:
200
  print("--- Running in Rolling Evaluation Mode ---", file=sys.stderr)
201
  eval_preds, eval_trues = evaluate_performance(args, model, scaler, device)
202
-
203
- # 성능 지표 계산
204
  mae, mse, _, _, _ = metric(eval_preds, eval_trues)
205
-
206
- final_output = {
207
- "status": "success",
208
- "mode": "rolling_evaluation",
209
- "mse": mse,
210
- "mae": mae,
211
- # 전체 예측을 반환하면 너무 크므로, 샘플만 반환하거나 필요한 정보만 반환
212
- "prediction_samples": [p.flatten().tolist() for p in eval_preds[:5]]
213
- }
214
-
215
  else:
216
- final_output = {"status": "error", "message": "No mode selected. Use --predict_input_file or --evaluate_file."}
217
-
218
  except Exception as e:
219
  final_output = {"status": "error", "message": str(e)}
220
-
221
- # 최종 결과를 JSON 문자열로 표준 출력(stdout)에 프린트합니다.
222
- # 이 출력을 app.py가 읽어서 API 응답으로 사용합니다.
223
  print(json.dumps(final_output, indent=2))
 
5
  import joblib
6
  import os
7
  from tqdm import tqdm
8
+ import json
 
 
9
  import sys
10
  sys.path.append('.')
11
 
 
13
  from utils.metrics import metric
14
  from utils.timefeatures import time_features
15
 
16
+ # --- 1. 인자 파싱 ---
17
+ # ... (이전과 동일, 수정 없음) ...
18
  parser = argparse.ArgumentParser(description='Time Series Prediction')
 
19
  parser.add_argument('--checkpoint_path', type=str, required=True, help='Path to the model checkpoint file (.pth)')
20
  parser.add_argument('--scaler_path', type=str, required=True, help='Path to the saved scaler file (.gz)')
21
  parser.add_argument('--predict_input_file', type=str, default=None, help='[Mode 1] Path to the CSV file for single future prediction')
 
47
  args = parser.parse_args()
48
 
49
 
50
+ # --- 2. 공통 함수: 모델 및 스케일러 로드 ---
 
 
 
 
 
 
 
 
 
51
  def load_model_and_scaler(args):
52
+ # ... (이전과 동일, 수정 없음) ...
53
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
54
  args.device = device
55
  model = TimeXer.Model(args).float().to(device)
 
65
  df_input = pd.read_csv(args.predict_input_file)
66
  df_input['date'] = pd.to_datetime(df_input['date'])
67
 
 
68
  cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
69
 
 
70
  raw_input = df_input[cols_to_scale].tail(args.seq_len).values
71
  input_scaled = scaler.transform(raw_input)
72
  batch_x = torch.from_numpy(input_scaled).float().unsqueeze(0).to(device)
73
 
 
74
  df_stamp_enc = df_input.tail(args.seq_len)[['date']].reset_index(drop=True)
75
  enc_mark = time_features(df_stamp_enc, timeenc=0, freq=args.freq)
76
  batch_x_mark = torch.from_numpy(enc_mark).float().unsqueeze(0).to(device)
77
 
 
78
  dec_inp_label = input_scaled[-args.label_len:]
79
  dec_inp_pred = np.zeros([args.pred_len, args.enc_in])
80
  decoder_input = np.concatenate([dec_inp_label, dec_inp_pred], axis=0)
81
  batch_y = torch.from_numpy(decoder_input).float().unsqueeze(0).to(device)
82
 
 
83
  last_date = df_stamp_enc['date'].iloc[-1]
84
+ future_dates = pd.date_range(start=last_date, periods=args.pred_len + 1, freq='5T')[1:]
85
  df_stamp_dec = pd.DataFrame({'date': list(df_stamp_enc['date'].values[-args.label_len:]) + list(future_dates)})
86
  dec_mark = time_features(df_stamp_dec, timeenc=0, freq=args.freq)
87
  batch_y_mark = torch.from_numpy(dec_mark).float().unsqueeze(0).to(device)
88
 
 
89
  with torch.no_grad():
90
  outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
91
 
92
  prediction_scaled = outputs.detach().cpu().numpy()[0]
93
 
94
+ # ⭐️⭐️⭐️ 이 블록의 들여쓰기를 수정했습니다 ⭐️⭐️⭐️
95
  if scaler.n_features_in_ > 1:
96
  padding = np.zeros((prediction_scaled.shape[0], scaler.n_features_in_ - args.c_out))
97
  prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
 
100
  prediction = scaler.inverse_transform(prediction_scaled)
101
  return prediction
102
 
103
+ # --- 4. 모드 2: 전체 기간 롤링 평가 함수 ---
104
  def evaluate_performance(args, model, scaler, device):
105
+ # ... (이전과 동일, 수정 없음) ...
106
+ # ⭐️ 이 함수 내부의 들여쓰기도 함께 점검하여 수정했습니다.
107
  df_eval = pd.read_csv(args.evaluate_file)
108
  df_eval['date'] = pd.to_datetime(df_eval['date'])
109
 
 
110
  cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
111
  raw_data = df_eval[cols_to_scale].values
112
  data_scaled = scaler.transform(raw_data)
 
118
 
119
  num_samples = len(data_scaled) - args.seq_len - args.pred_len + 1
120
  for i in tqdm(range(num_samples), desc="Evaluating", file=sys.stderr):
 
121
  s_begin = i
122
  s_end = s_begin + args.seq_len
123
 
 
136
  dec_mark_pred = df_stamp[true_begin:true_end]
137
  batch_y_mark = np.concatenate([dec_mark_label, dec_mark_pred], axis=0)
138
 
 
139
  batch_x = torch.from_numpy(batch_x).float().unsqueeze(0).to(device)
140
  batch_x_mark = torch.from_numpy(batch_x_mark).float().unsqueeze(0).to(device)
141
  batch_y = torch.from_numpy(batch_y).float().unsqueeze(0).to(device)
142
  batch_y_mark = torch.from_numpy(batch_y_mark).float().unsqueeze(0).to(device)
143
 
 
144
  with torch.no_grad():
145
  outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
146
 
147
  pred_scaled = outputs.detach().cpu().numpy()[0]
148
 
 
149
  if scaler.n_features_in_ > 1:
150
  padding = np.zeros((pred_scaled.shape[0], scaler.n_features_in_ - args.c_out))
151
  pred_padded = np.concatenate((padding, pred_scaled), axis=1)
 
160
 
161
  return np.array(preds_unscaled), np.array(trues_unscaled)
162
 
163
+ # --- 5. 메인 로직 ---
164
  if __name__ == '__main__':
165
+ # ... (이전과 동일, 수정 없음) ...
166
+ final_output = {}
 
167
  try:
168
  model, scaler, device = load_model_and_scaler(args)
 
169
  if args.predict_input_file:
170
  print("--- Running in Single Prediction Mode ---", file=sys.stderr)
171
  prediction = predict_future(args, model, scaler, device)
172
+ final_output = {"status": "success", "mode": "single_prediction", "prediction": prediction.flatten().tolist()}
 
 
 
 
 
173
  elif args.evaluate_file:
174
  print("--- Running in Rolling Evaluation Mode ---", file=sys.stderr)
175
  eval_preds, eval_trues = evaluate_performance(args, model, scaler, device)
 
 
176
  mae, mse, _, _, _ = metric(eval_preds, eval_trues)
177
+ final_output = {"status": "success", "mode": "rolling_evaluation", "mse": mse, "mae": mae, "prediction_samples": [p.flatten().tolist() for p in eval_preds[:5]]}
 
 
 
 
 
 
 
 
 
178
  else:
179
+ final_output = {"status": "error", "message": "No mode selected."}
 
180
  except Exception as e:
181
  final_output = {"status": "error", "message": str(e)}
 
 
 
182
  print(json.dumps(final_output, indent=2))