Spaces:

alwaysgood
/

my-tide-env

Sleeping

App Files Files Community

alwaysgood commited on Aug 6

Commit

81397db

verified ·

1 Parent(s): 5431a0f

Update inference.py

Browse files

Files changed (1) hide show

inference.py +16 -57

inference.py CHANGED Viewed

@@ -5,9 +5,7 @@ import argparse
 import joblib
 import os
 from tqdm import tqdm
-import json  # 👈 JSON 라이브러리 추가
-# ⭐️ 수정 사항 1: 경로 문제를 피하기 위해 명시적으로 import 경로 추가
 import sys
 sys.path.append('.')
@@ -15,9 +13,9 @@ from models import TimeXer
 from utils.metrics import metric
 from utils.timefeatures import time_features
-# --- 1. 인자 파싱 (수정 없음) ---
 parser = argparse.ArgumentParser(description='Time Series Prediction')
-# ... (이전과 동일한 모든 argparse 코드) ...
 parser.add_argument('--checkpoint_path', type=str, required=True, help='Path to the model checkpoint file (.pth)')
 parser.add_argument('--scaler_path', type=str, required=True, help='Path to the saved scaler file (.gz)')
 parser.add_argument('--predict_input_file', type=str, default=None, help='[Mode 1] Path to the CSV file for single future prediction')
@@ -49,17 +47,9 @@ parser.add_argument('--freq', type=str, default='t', help='freq for time feature
 args = parser.parse_args()
-        prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
-        prediction = scaler.inverse_transform(prediction_padded)[:, -args.c_out:]
-    else:
-        prediction = scaler.inverse_transform(prediction_scaled)
-    return prediction
-# --- 2. 공통 함수: 모델 및 스케일러 로드 (수정 없음) ---
 def load_model_and_scaler(args):
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     args.device = device
     model = TimeXer.Model(args).float().to(device)
@@ -75,39 +65,33 @@ def predict_future(args, model, scaler, device):
     df_input = pd.read_csv(args.predict_input_file)
     df_input['date'] = pd.to_datetime(df_input['date'])
-    # ⭐️ 알려주신 정확한 컬럼 이름으로 수정
     cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
-    # 1. 인코더 입력(x_enc) 생성
     raw_input = df_input[cols_to_scale].tail(args.seq_len).values
     input_scaled = scaler.transform(raw_input)
     batch_x = torch.from_numpy(input_scaled).float().unsqueeze(0).to(device)
-    # 2. 인코더 시간 정보(x_mark_enc) 생성
     df_stamp_enc = df_input.tail(args.seq_len)[['date']].reset_index(drop=True)
     enc_mark = time_features(df_stamp_enc, timeenc=0, freq=args.freq)
     batch_x_mark = torch.from_numpy(enc_mark).float().unsqueeze(0).to(device)
-    # 3. 디코더 입력(x_dec) 생성
     dec_inp_label = input_scaled[-args.label_len:]
     dec_inp_pred = np.zeros([args.pred_len, args.enc_in])
     decoder_input = np.concatenate([dec_inp_label, dec_inp_pred], axis=0)
     batch_y = torch.from_numpy(decoder_input).float().unsqueeze(0).to(device)
-    # 4. 디코더 시간 정보(x_mark_dec) 생성
     last_date = df_stamp_enc['date'].iloc[-1]
-    future_dates = pd.date_range(start=last_date, periods=args.pred_len + 1, freq='5T')[1:] # 5분 단위 가정
     df_stamp_dec = pd.DataFrame({'date': list(df_stamp_enc['date'].values[-args.label_len:]) + list(future_dates)})
     dec_mark = time_features(df_stamp_dec, timeenc=0, freq=args.freq)
     batch_y_mark = torch.from_numpy(dec_mark).float().unsqueeze(0).to(device)
-    # 5. 모델 호출
     with torch.no_grad():
         outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
     prediction_scaled = outputs.detach().cpu().numpy()[0]
-    # 스케일 복원
     if scaler.n_features_in_ > 1:
         padding = np.zeros((prediction_scaled.shape[0], scaler.n_features_in_ - args.c_out))
         prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
@@ -116,12 +100,13 @@ def predict_future(args, model, scaler, device):
         prediction = scaler.inverse_transform(prediction_scaled)
     return prediction
-# --- 4. 모드 2: 전체 기간 롤링 평가 함수 (⭐️⭐️⭐️ 이 함수를 완성했습니다 ⭐️⭐️⭐️) ---
 def evaluate_performance(args, model, scaler, device):
     df_eval = pd.read_csv(args.evaluate_file)
     df_eval['date'] = pd.to_datetime(df_eval['date'])
-    # ⭐️ 알려주신 정확한 컬럼 이름으로 수정
     cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
     raw_data = df_eval[cols_to_scale].values
     data_scaled = scaler.transform(raw_data)
@@ -133,7 +118,6 @@ def evaluate_performance(args, model, scaler, device):
     num_samples = len(data_scaled) - args.seq_len - args.pred_len + 1
     for i in tqdm(range(num_samples), desc="Evaluating", file=sys.stderr):
-        # 1. 인코더/디코더 입력 생성 (매 스텝마다)
         s_begin = i
         s_end = s_begin + args.seq_len
@@ -152,19 +136,16 @@ def evaluate_performance(args, model, scaler, device):
         dec_mark_pred = df_stamp[true_begin:true_end]
         batch_y_mark = np.concatenate([dec_mark_label, dec_mark_pred], axis=0)
-        # 텐서로 변환
         batch_x = torch.from_numpy(batch_x).float().unsqueeze(0).to(device)
         batch_x_mark = torch.from_numpy(batch_x_mark).float().unsqueeze(0).to(device)
         batch_y = torch.from_numpy(batch_y).float().unsqueeze(0).to(device)
         batch_y_mark = torch.from_numpy(batch_y_mark).float().unsqueeze(0).to(device)
-        # 2. 모델 호출
         with torch.no_grad():
             outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
         pred_scaled = outputs.detach().cpu().numpy()[0]
-        # 3. 스케일 복원
         if scaler.n_features_in_ > 1:
             padding = np.zeros((pred_scaled.shape[0], scaler.n_features_in_ - args.c_out))
             pred_padded = np.concatenate((padding, pred_scaled), axis=1)
@@ -179,45 +160,23 @@ def evaluate_performance(args, model, scaler, device):
     return np.array(preds_unscaled), np.array(trues_unscaled)
-# --- 5. 메인 로직 (⭐️⭐️⭐️ 이 부분이 완전히 변경되었습니다 ⭐️⭐️⭐️) ---
 if __name__ == '__main__':
-    final_output = {} # 최종 결과를 담을 딕셔너리
     try:
         model, scaler, device = load_model_and_scaler(args)
         if args.predict_input_file:
             print("--- Running in Single Prediction Mode ---", file=sys.stderr)
             prediction = predict_future(args, model, scaler, device)
-            final_output = {
-                "status": "success",
-                "mode": "single_prediction",
-                "prediction": prediction.flatten().tolist()
-            }
         elif args.evaluate_file:
             print("--- Running in Rolling Evaluation Mode ---", file=sys.stderr)
             eval_preds, eval_trues = evaluate_performance(args, model, scaler, device)
-            # 성능 지표 계산
             mae, mse, _, _, _ = metric(eval_preds, eval_trues)
-            final_output = {
-                "status": "success",
-                "mode": "rolling_evaluation",
-                "mse": mse,
-                "mae": mae,
-                # 전체 예측을 반환하면 너무 크므로, 샘플만 반환하거나 필요한 정보만 반환
-                "prediction_samples": [p.flatten().tolist() for p in eval_preds[:5]]
-            }
         else:
-            final_output = {"status": "error", "message": "No mode selected. Use --predict_input_file or --evaluate_file."}
     except Exception as e:
         final_output = {"status": "error", "message": str(e)}
-    # 최종 결과를 JSON 문자열로 표준 출력(stdout)에 프린트합니다.
-    # 이 출력을 app.py가 읽어서 API 응답으로 사용합니다.
     print(json.dumps(final_output, indent=2))

 import joblib
 import os
 from tqdm import tqdm
+import json
 import sys
 sys.path.append('.')
 from utils.metrics import metric
 from utils.timefeatures import time_features
+# --- 1. 인자 파싱 ---
+# ... (이전과 동일, 수정 없음) ...
 parser = argparse.ArgumentParser(description='Time Series Prediction')
 parser.add_argument('--checkpoint_path', type=str, required=True, help='Path to the model checkpoint file (.pth)')
 parser.add_argument('--scaler_path', type=str, required=True, help='Path to the saved scaler file (.gz)')
 parser.add_argument('--predict_input_file', type=str, default=None, help='[Mode 1] Path to the CSV file for single future prediction')
 args = parser.parse_args()
+# --- 2. 공통 함수: 모델 및 스케일러 로드 ---
 def load_model_and_scaler(args):
+    # ... (이전과 동일, 수정 없음) ...
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     args.device = device
     model = TimeXer.Model(args).float().to(device)
     df_input = pd.read_csv(args.predict_input_file)
     df_input['date'] = pd.to_datetime(df_input['date'])
     cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
     raw_input = df_input[cols_to_scale].tail(args.seq_len).values
     input_scaled = scaler.transform(raw_input)
     batch_x = torch.from_numpy(input_scaled).float().unsqueeze(0).to(device)
     df_stamp_enc = df_input.tail(args.seq_len)[['date']].reset_index(drop=True)
     enc_mark = time_features(df_stamp_enc, timeenc=0, freq=args.freq)
     batch_x_mark = torch.from_numpy(enc_mark).float().unsqueeze(0).to(device)
     dec_inp_label = input_scaled[-args.label_len:]
     dec_inp_pred = np.zeros([args.pred_len, args.enc_in])
     decoder_input = np.concatenate([dec_inp_label, dec_inp_pred], axis=0)
     batch_y = torch.from_numpy(decoder_input).float().unsqueeze(0).to(device)
     last_date = df_stamp_enc['date'].iloc[-1]
+    future_dates = pd.date_range(start=last_date, periods=args.pred_len + 1, freq='5T')[1:]
     df_stamp_dec = pd.DataFrame({'date': list(df_stamp_enc['date'].values[-args.label_len:]) + list(future_dates)})
     dec_mark = time_features(df_stamp_dec, timeenc=0, freq=args.freq)
     batch_y_mark = torch.from_numpy(dec_mark).float().unsqueeze(0).to(device)
     with torch.no_grad():
         outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
     prediction_scaled = outputs.detach().cpu().numpy()[0]
+    # ⭐️⭐️⭐️ 이 블록의 들여쓰기를 수정했습니다 ⭐️⭐️⭐️
     if scaler.n_features_in_ > 1:
         padding = np.zeros((prediction_scaled.shape[0], scaler.n_features_in_ - args.c_out))
         prediction_padded = np.concatenate((padding, prediction_scaled), axis=1)
         prediction = scaler.inverse_transform(prediction_scaled)
     return prediction
+# --- 4. 모드 2: 전체 기간 롤링 평가 함수 ---
 def evaluate_performance(args, model, scaler, device):
+    # ... (이전과 동일, 수정 없음) ...
+    # ⭐️ 이 함수 내부의 들여쓰기도 함께 점검하여 수정했습니다.
     df_eval = pd.read_csv(args.evaluate_file)
     df_eval['date'] = pd.to_datetime(df_eval['date'])
     cols_to_scale = ['air_pres', 'wind_dir', 'wind_speed', 'air_temp', 'residual']
     raw_data = df_eval[cols_to_scale].values
     data_scaled = scaler.transform(raw_data)
     num_samples = len(data_scaled) - args.seq_len - args.pred_len + 1
     for i in tqdm(range(num_samples), desc="Evaluating", file=sys.stderr):
         s_begin = i
         s_end = s_begin + args.seq_len
         dec_mark_pred = df_stamp[true_begin:true_end]
         batch_y_mark = np.concatenate([dec_mark_label, dec_mark_pred], axis=0)
         batch_x = torch.from_numpy(batch_x).float().unsqueeze(0).to(device)
         batch_x_mark = torch.from_numpy(batch_x_mark).float().unsqueeze(0).to(device)
         batch_y = torch.from_numpy(batch_y).float().unsqueeze(0).to(device)
         batch_y_mark = torch.from_numpy(batch_y_mark).float().unsqueeze(0).to(device)
         with torch.no_grad():
             outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark)
         pred_scaled = outputs.detach().cpu().numpy()[0]
         if scaler.n_features_in_ > 1:
             padding = np.zeros((pred_scaled.shape[0], scaler.n_features_in_ - args.c_out))
             pred_padded = np.concatenate((padding, pred_scaled), axis=1)
     return np.array(preds_unscaled), np.array(trues_unscaled)
+# --- 5. 메인 로직 ---
 if __name__ == '__main__':
+    # ... (이전과 동일, 수정 없음) ...
+    final_output = {}
     try:
         model, scaler, device = load_model_and_scaler(args)
         if args.predict_input_file:
             print("--- Running in Single Prediction Mode ---", file=sys.stderr)
             prediction = predict_future(args, model, scaler, device)
+            final_output = {"status": "success", "mode": "single_prediction", "prediction": prediction.flatten().tolist()}
         elif args.evaluate_file:
             print("--- Running in Rolling Evaluation Mode ---", file=sys.stderr)
             eval_preds, eval_trues = evaluate_performance(args, model, scaler, device)
             mae, mse, _, _, _ = metric(eval_preds, eval_trues)
+            final_output = {"status": "success", "mode": "rolling_evaluation", "mse": mse, "mae": mae, "prediction_samples": [p.flatten().tolist() for p in eval_preds[:5]]}
         else:
+            final_output = {"status": "error", "message": "No mode selected."}
     except Exception as e:
         final_output = {"status": "error", "message": str(e)}
     print(json.dumps(final_output, indent=2))