first-commit
Browse files- README.md +139 -0
- config.json +21 -0
- configuration_time_rcd.py +104 -0
- model.safetensors +3 -0
- modeling_time_rcd.py +740 -0
- preprocessor_config.json +7 -0
- processing_time_rcd.py +196 -0
- requirements.txt +4 -0
README.md
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- time-series
|
| 5 |
+
- anomaly-detection
|
| 6 |
+
- zero-shot
|
| 7 |
+
- pytorch
|
| 8 |
+
- transformers
|
| 9 |
+
library_name: transformers
|
| 10 |
+
pipeline_tag: time-series-classification
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Time-RCD: Zero-Shot Time Series Anomaly Detection
|
| 14 |
+
|
| 15 |
+
Time-RCD is a transformer-based model for zero-shot anomaly detection in time series data.
|
| 16 |
+
|
| 17 |
+
## Quick Start
|
| 18 |
+
|
| 19 |
+
```python
|
| 20 |
+
from transformers import AutoModel, AutoConfig
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
# Load model
|
| 24 |
+
model = AutoModel.from_pretrained(
|
| 25 |
+
"thu-sail-lab/Time_RCD",
|
| 26 |
+
trust_remote_code=True
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Load processor
|
| 30 |
+
from transformers import AutoProcessor
|
| 31 |
+
processor = AutoProcessor.from_pretrained(
|
| 32 |
+
"thu-sail-lab/Time_RCD",
|
| 33 |
+
trust_remote_code=True
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Prepare data
|
| 37 |
+
data = np.random.randn(10000, 1) # [n_samples, n_features]
|
| 38 |
+
|
| 39 |
+
# Process data
|
| 40 |
+
processed = processor(
|
| 41 |
+
data,
|
| 42 |
+
return_tensors="pt"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Get anomaly scores
|
| 46 |
+
outputs = model(**processed)
|
| 47 |
+
anomaly_scores = outputs.anomaly_scores.numpy()
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
## Model Details
|
| 51 |
+
|
| 52 |
+
- **Architecture:** Transformer encoder with patch embedding
|
| 53 |
+
- **Parameters:** ~5M parameters
|
| 54 |
+
- **Patch Size:** 4
|
| 55 |
+
- **Hidden Dimension:** 512
|
| 56 |
+
- **Projection Dimension:** 256
|
| 57 |
+
- **Layers:** 8 transformer layers
|
| 58 |
+
- **Attention Heads:** 8 heads
|
| 59 |
+
|
| 60 |
+
## Features
|
| 61 |
+
|
| 62 |
+
✅ **Zero-shot detection** - No training required
|
| 63 |
+
✅ **Multi-variate support** - Handle multiple features
|
| 64 |
+
✅ **Flexible windows** - Configurable window sizes
|
| 65 |
+
✅ **Robust normalization** - Built-in preprocessing
|
| 66 |
+
|
| 67 |
+
## Usage Examples
|
| 68 |
+
|
| 69 |
+
### Basic Anomaly Detection
|
| 70 |
+
|
| 71 |
+
```python
|
| 72 |
+
from transformers import AutoModel
|
| 73 |
+
import numpy as np
|
| 74 |
+
|
| 75 |
+
model = AutoModel.from_pretrained("thu-sail-lab/Time_RCD", trust_remote_code=True)
|
| 76 |
+
|
| 77 |
+
# Your time series (n_samples, n_features)
|
| 78 |
+
data = np.random.randn(10000, 1)
|
| 79 |
+
|
| 80 |
+
# Get anomaly scores
|
| 81 |
+
outputs = model.zero_shot(data)
|
| 82 |
+
scores = outputs.anomaly_scores.numpy()
|
| 83 |
+
|
| 84 |
+
# Detect anomalies (e.g., top 5%)
|
| 85 |
+
threshold = np.percentile(scores, 95)
|
| 86 |
+
anomalies = scores > threshold
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
### With Custom Processing
|
| 90 |
+
|
| 91 |
+
```python
|
| 92 |
+
from transformers import AutoModel, AutoProcessor
|
| 93 |
+
|
| 94 |
+
model = AutoModel.from_pretrained("thu-sail-lab/Time_RCD", trust_remote_code=True)
|
| 95 |
+
processor = AutoProcessor.from_pretrained("thu-sail-lab/Time_RCD", trust_remote_code=True)
|
| 96 |
+
|
| 97 |
+
# Configure processor
|
| 98 |
+
processor.win_size = 5000
|
| 99 |
+
processor.normalize = True
|
| 100 |
+
|
| 101 |
+
# Process and detect
|
| 102 |
+
processed = processor(data, return_tensors="pt")
|
| 103 |
+
outputs = model(**processed)
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
## Configuration
|
| 107 |
+
|
| 108 |
+
| Parameter | Default | Description |
|
| 109 |
+
|-----------|---------|-------------|
|
| 110 |
+
| patch_size | 4 | Patch size for embedding |
|
| 111 |
+
| d_model | 512 | Model dimension |
|
| 112 |
+
| d_proj | 256 | Projection dimension |
|
| 113 |
+
| num_layers | 8 | Transformer layers |
|
| 114 |
+
| num_heads | 8 | Attention heads |
|
| 115 |
+
| use_rope | True | Rotary position embeddings |
|
| 116 |
+
|
| 117 |
+
## Performance
|
| 118 |
+
|
| 119 |
+
Evaluated on various time series anomaly detection benchmarks.
|
| 120 |
+
|
| 121 |
+
## Limitations
|
| 122 |
+
|
| 123 |
+
- Requires sufficient data (> window size)
|
| 124 |
+
- Performance varies by domain
|
| 125 |
+
- High-dimensional data may need preprocessing
|
| 126 |
+
|
| 127 |
+
## Citation
|
| 128 |
+
|
| 129 |
+
```bibtex
|
| 130 |
+
@article{time-rcd,
|
| 131 |
+
title={Time-RCD: Zero-Shot Time Series Anomaly Detection},
|
| 132 |
+
author={Your Name},
|
| 133 |
+
year={2025}
|
| 134 |
+
}
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
## License
|
| 138 |
+
|
| 139 |
+
Apache 2.0
|
config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation": "gelu",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"Time_RCD"
|
| 5 |
+
],
|
| 6 |
+
"batch_size": 64,
|
| 7 |
+
"d_ff_dropout": 0.05,
|
| 8 |
+
"d_model": 512,
|
| 9 |
+
"d_proj": 256,
|
| 10 |
+
"dropout": 0.1,
|
| 11 |
+
"dtype": "float32",
|
| 12 |
+
"max_seq_len": 512,
|
| 13 |
+
"model_type": "time_rcd",
|
| 14 |
+
"num_features": 1,
|
| 15 |
+
"num_heads": 8,
|
| 16 |
+
"num_layers": 8,
|
| 17 |
+
"patch_size": 16,
|
| 18 |
+
"transformers_version": "4.56.2",
|
| 19 |
+
"use_rope": true,
|
| 20 |
+
"win_size": 5000
|
| 21 |
+
}
|
configuration_time_rcd.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 6 |
+
from typing import Dict, Any
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TimeRCDConfig(PretrainedConfig):
|
| 10 |
+
"""
|
| 11 |
+
Configuration class for Time_RCD model.
|
| 12 |
+
|
| 13 |
+
This is the configuration class to store the configuration of a [`Time_RCD`] model. It is used to
|
| 14 |
+
instantiate a Time_RCD model according to the specified arguments, defining the model architecture.
|
| 15 |
+
|
| 16 |
+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
|
| 17 |
+
Read the documentation from [`PretrainedConfig`] for more information.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
d_model (`int`, *optional*, defaults to 512):
|
| 21 |
+
Dimension of model hidden states.
|
| 22 |
+
d_proj (`int`, *optional*, defaults to 256):
|
| 23 |
+
Dimension of projection layer.
|
| 24 |
+
patch_size (`int`, *optional*, defaults to 4):
|
| 25 |
+
Size of time series patches.
|
| 26 |
+
num_layers (`int`, *optional*, defaults to 8):
|
| 27 |
+
Number of transformer layers.
|
| 28 |
+
num_heads (`int`, *optional*, defaults to 8):
|
| 29 |
+
Number of attention heads.
|
| 30 |
+
d_ff_dropout (`float`, *optional*, defaults to 0.1):
|
| 31 |
+
Dropout rate for feed-forward networks.
|
| 32 |
+
use_rope (`bool`, *optional*, defaults to True):
|
| 33 |
+
Whether to use Rotary Position Embedding.
|
| 34 |
+
activation (`str`, *optional*, defaults to "gelu"):
|
| 35 |
+
Activation function name.
|
| 36 |
+
num_features (`int`, *optional*, defaults to 1):
|
| 37 |
+
Number of input features in the time series.
|
| 38 |
+
dropout (`float`, *optional*, defaults to 0.1):
|
| 39 |
+
Dropout rate for the model.
|
| 40 |
+
max_seq_len (`int`, *optional*, defaults to 512):
|
| 41 |
+
Maximum sequence length.
|
| 42 |
+
win_size (`int`, *optional*, defaults to 5000):
|
| 43 |
+
Window size for inference.
|
| 44 |
+
batch_size (`int`, *optional*, defaults to 64):
|
| 45 |
+
Default batch size for inference.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
model_type = "time_rcd"
|
| 49 |
+
|
| 50 |
+
def __init__(
|
| 51 |
+
self,
|
| 52 |
+
d_model: int = 512,
|
| 53 |
+
d_proj: int = 256,
|
| 54 |
+
patch_size: int = 4, # Your specific configuration
|
| 55 |
+
num_layers: int = 8,
|
| 56 |
+
num_heads: int = 8,
|
| 57 |
+
d_ff_dropout: float = 0.1,
|
| 58 |
+
use_rope: bool = True,
|
| 59 |
+
activation: str = "gelu",
|
| 60 |
+
num_features: int = 1,
|
| 61 |
+
dropout: float = 0.1,
|
| 62 |
+
max_seq_len: int = 512,
|
| 63 |
+
win_size: int = 5000,
|
| 64 |
+
batch_size: int = 64,
|
| 65 |
+
**kwargs
|
| 66 |
+
):
|
| 67 |
+
super().__init__(**kwargs)
|
| 68 |
+
|
| 69 |
+
self.d_model = d_model
|
| 70 |
+
self.d_proj = d_proj
|
| 71 |
+
self.patch_size = patch_size
|
| 72 |
+
self.num_layers = num_layers
|
| 73 |
+
self.num_heads = num_heads
|
| 74 |
+
self.d_ff_dropout = d_ff_dropout
|
| 75 |
+
self.use_rope = use_rope
|
| 76 |
+
self.activation = activation
|
| 77 |
+
self.num_features = num_features
|
| 78 |
+
self.dropout = dropout
|
| 79 |
+
self.max_seq_len = max_seq_len
|
| 80 |
+
self.win_size = win_size
|
| 81 |
+
self.batch_size = batch_size
|
| 82 |
+
|
| 83 |
+
@classmethod
|
| 84 |
+
def from_pretrained_config(cls, original_config_dict: Dict[str, Any]):
|
| 85 |
+
"""Convert from your original configuration format."""
|
| 86 |
+
return cls(
|
| 87 |
+
d_model=original_config_dict.get("ts_config", {}).get("d_model", 512),
|
| 88 |
+
d_proj=original_config_dict.get("ts_config", {}).get("d_proj", 256),
|
| 89 |
+
patch_size=original_config_dict.get("ts_config", {}).get("patch_size", 16),
|
| 90 |
+
num_layers=original_config_dict.get("ts_config", {}).get("num_layers", 8),
|
| 91 |
+
num_heads=original_config_dict.get("ts_config", {}).get("num_heads", 8),
|
| 92 |
+
d_ff_dropout=original_config_dict.get("ts_config", {}).get("d_ff_dropout", 0.1),
|
| 93 |
+
use_rope=original_config_dict.get("ts_config", {}).get("use_rope", True),
|
| 94 |
+
activation=original_config_dict.get("ts_config", {}).get("activation", "gelu"),
|
| 95 |
+
num_features=original_config_dict.get("ts_config", {}).get("num_features", 1),
|
| 96 |
+
dropout=original_config_dict.get("dropout", 0.1),
|
| 97 |
+
max_seq_len=original_config_dict.get("max_seq_len", 512),
|
| 98 |
+
win_size=original_config_dict.get("win_size", 5000),
|
| 99 |
+
batch_size=original_config_dict.get("batch_size", 64),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Backward compatibility alias
|
| 104 |
+
AnomalyCLIPConfig = TimeRCDConfig
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73a44d4a4fd9d1e47a878d1d454df30a6f597c154b39a8ced16b3c1095dcd2ac
|
| 3 |
+
size 148240612
|
modeling_time_rcd.py
ADDED
|
@@ -0,0 +1,740 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Time-RCD Model for HuggingFace Integration
|
| 3 |
+
|
| 4 |
+
This file contains a simplified Time_RCD model that:
|
| 5 |
+
1. Inherits directly from PreTrainedModel (no extra layers)
|
| 6 |
+
2. Matches your original Time_RCD implementation exactly
|
| 7 |
+
3. Can load from your local checkpoint
|
| 8 |
+
4. Provides HuggingFace compatibility
|
| 9 |
+
|
| 10 |
+
The structure is:
|
| 11 |
+
Time_RCD -> PreTrainedModel (single inheritance, clean & simple)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import torch
|
| 15 |
+
import torch.nn as nn
|
| 16 |
+
import torch.nn.functional as F
|
| 17 |
+
import numpy as np
|
| 18 |
+
import os
|
| 19 |
+
import math
|
| 20 |
+
from typing import Optional, Tuple, Union, Dict, Any
|
| 21 |
+
from dataclasses import dataclass
|
| 22 |
+
|
| 23 |
+
# Try to import einops, fall back to manual implementation if not available
|
| 24 |
+
try:
|
| 25 |
+
from einops import rearrange
|
| 26 |
+
HAS_EINOPS = True
|
| 27 |
+
except ImportError:
|
| 28 |
+
HAS_EINOPS = False
|
| 29 |
+
def rearrange(tensor, pattern):
|
| 30 |
+
# Simple fallback for the specific pattern we use
|
| 31 |
+
if pattern == "two num_heads -> two num_heads 1 1":
|
| 32 |
+
return tensor.unsqueeze(-1).unsqueeze(-1)
|
| 33 |
+
else:
|
| 34 |
+
raise NotImplementedError(f"Pattern {pattern} not implemented in fallback")
|
| 35 |
+
|
| 36 |
+
from transformers import PreTrainedModel
|
| 37 |
+
from transformers.modeling_outputs import ModelOutput
|
| 38 |
+
from transformers.utils import logging
|
| 39 |
+
|
| 40 |
+
from .configuration_time_rcd import TimeRCDConfig
|
| 41 |
+
|
| 42 |
+
logger = logging.get_logger(__name__)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass
|
| 46 |
+
class TimeRCDOutput(ModelOutput):
|
| 47 |
+
"""
|
| 48 |
+
Output for Time_RCD model.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
anomaly_scores (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
|
| 52 |
+
Anomaly scores for each time step.
|
| 53 |
+
anomaly_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, 2)`):
|
| 54 |
+
Raw logits for anomaly classification.
|
| 55 |
+
reconstruction (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_features)`):
|
| 56 |
+
Reconstructed time series.
|
| 57 |
+
embeddings (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_features, d_proj)`):
|
| 58 |
+
Time series embeddings from the encoder.
|
| 59 |
+
"""
|
| 60 |
+
anomaly_scores: Optional[torch.FloatTensor] = None
|
| 61 |
+
anomaly_logits: Optional[torch.FloatTensor] = None
|
| 62 |
+
reconstruction: Optional[torch.FloatTensor] = None
|
| 63 |
+
embeddings: Optional[torch.FloatTensor] = None
|
| 64 |
+
|
| 65 |
+
class Time_RCD(PreTrainedModel):
|
| 66 |
+
"""
|
| 67 |
+
Time-RCD Model for Time Series Anomaly Detection
|
| 68 |
+
|
| 69 |
+
This is the main model class that directly inherits from PreTrainedModel.
|
| 70 |
+
It matches your original Time_RCD implementation structure exactly:
|
| 71 |
+
- TimeSeriesEncoder for encoding
|
| 72 |
+
- reconstruction_head for reconstruction
|
| 73 |
+
- anomaly_head for anomaly detection
|
| 74 |
+
|
| 75 |
+
No extra inheritance layers - clean and simple!
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
config_class = TimeRCDConfig
|
| 79 |
+
base_model_prefix = "time_rcd"
|
| 80 |
+
supports_gradient_checkpointing = True
|
| 81 |
+
|
| 82 |
+
def __init__(self, config: TimeRCDConfig):
|
| 83 |
+
super().__init__(config)
|
| 84 |
+
self.config = config
|
| 85 |
+
|
| 86 |
+
# Time series encoder (matches your original implementation)
|
| 87 |
+
self.ts_encoder = TimeSeriesEncoder(
|
| 88 |
+
d_model=config.d_model,
|
| 89 |
+
d_proj=config.d_proj,
|
| 90 |
+
patch_size=config.patch_size,
|
| 91 |
+
num_layers=config.num_layers,
|
| 92 |
+
num_heads=config.num_heads,
|
| 93 |
+
d_ff_dropout=config.d_ff_dropout,
|
| 94 |
+
use_rope=config.use_rope,
|
| 95 |
+
num_features=config.num_features,
|
| 96 |
+
activation=config.activation
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Reconstruction head (exactly like your original)
|
| 100 |
+
self.reconstruction_head = nn.Sequential(
|
| 101 |
+
nn.Linear(config.d_proj, config.d_proj * 4),
|
| 102 |
+
nn.GELU(),
|
| 103 |
+
nn.Dropout(config.dropout),
|
| 104 |
+
nn.Linear(config.d_proj * 4, config.d_proj * 4),
|
| 105 |
+
nn.GELU(),
|
| 106 |
+
nn.Dropout(config.dropout),
|
| 107 |
+
nn.Linear(config.d_proj * 4, 1) # Output: (B, seq_len, num_features, 1)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# Anomaly detection head (exactly like your original)
|
| 111 |
+
self.anomaly_head = nn.Sequential(
|
| 112 |
+
nn.Linear(config.d_proj, config.d_proj // 2),
|
| 113 |
+
nn.GELU(),
|
| 114 |
+
nn.Dropout(config.dropout),
|
| 115 |
+
nn.Linear(config.d_proj // 2, 2) # Binary classification: (B, seq_len, num_features, 2)
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# Initialize weights
|
| 119 |
+
self.post_init()
|
| 120 |
+
|
| 121 |
+
def _init_weights(self, module):
|
| 122 |
+
"""Initialize the weights (standard HuggingFace pattern)"""
|
| 123 |
+
if isinstance(module, nn.Linear):
|
| 124 |
+
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range if hasattr(self.config, 'initializer_range') else 0.02)
|
| 125 |
+
if module.bias is not None:
|
| 126 |
+
module.bias.data.zero_()
|
| 127 |
+
elif isinstance(module, nn.LayerNorm):
|
| 128 |
+
module.bias.data.zero_()
|
| 129 |
+
module.weight.data.fill_(1.0)
|
| 130 |
+
|
| 131 |
+
def forward(
|
| 132 |
+
self,
|
| 133 |
+
time_series: torch.Tensor,
|
| 134 |
+
attention_mask: Optional[torch.Tensor] = None,
|
| 135 |
+
return_dict: Optional[bool] = None,
|
| 136 |
+
) -> Union[Tuple, TimeRCDOutput]:
|
| 137 |
+
"""
|
| 138 |
+
Forward pass through Time_RCD model
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
time_series (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_features)`):
|
| 142 |
+
Input time series data.
|
| 143 |
+
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
|
| 144 |
+
Mask to avoid performing attention on padding token indices.
|
| 145 |
+
return_dict (`bool`, *optional*):
|
| 146 |
+
Whether to return a ModelOutput instead of a plain tuple.
|
| 147 |
+
"""
|
| 148 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 149 |
+
|
| 150 |
+
batch_size, seq_len, num_features = time_series.shape
|
| 151 |
+
|
| 152 |
+
# Normalize time series (exactly like your original)
|
| 153 |
+
time_series = (time_series - time_series.mean(dim=1, keepdim=True)) / (time_series.std(dim=1, keepdim=True) + 1e-8)
|
| 154 |
+
|
| 155 |
+
# Get embeddings from encoder
|
| 156 |
+
embeddings = self.ts_encoder(time_series, attention_mask) # (B, seq_len, num_features, d_proj)
|
| 157 |
+
|
| 158 |
+
# Get reconstruction
|
| 159 |
+
reconstruction = self.reconstruction_head(embeddings) # (B, seq_len, num_features, 1)
|
| 160 |
+
reconstruction = reconstruction.squeeze(-1) # (B, seq_len, num_features)
|
| 161 |
+
|
| 162 |
+
# Get anomaly predictions
|
| 163 |
+
anomaly_logits = self.anomaly_head(embeddings) # (B, seq_len, num_features, 2)
|
| 164 |
+
anomaly_logits = torch.mean(anomaly_logits, dim=-2) # Average over features: (B, seq_len, 2)
|
| 165 |
+
anomaly_scores = F.softmax(anomaly_logits, dim=-1)[..., 1] # Probability of anomaly: (B, seq_len)
|
| 166 |
+
|
| 167 |
+
if not return_dict:
|
| 168 |
+
return (anomaly_scores, anomaly_logits, reconstruction, embeddings)
|
| 169 |
+
|
| 170 |
+
return TimeRCDOutput(
|
| 171 |
+
anomaly_scores=anomaly_scores,
|
| 172 |
+
anomaly_logits=anomaly_logits,
|
| 173 |
+
reconstruction=reconstruction,
|
| 174 |
+
embeddings=embeddings
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
def zero_shot(self, data: np.ndarray, batch_size: int = 64, win_size: int = 5000) -> tuple:
|
| 178 |
+
"""
|
| 179 |
+
Zero-shot inference method matching AnomalyCLIP structure.
|
| 180 |
+
|
| 181 |
+
The model handles normalization internally, so no external processor needed!
|
| 182 |
+
This method only handles windowing for long sequences.
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
data: Input time series data of shape (n_samples, n_features) or (n_samples,)
|
| 186 |
+
batch_size: Batch size for processing
|
| 187 |
+
win_size: Window size for processing (only used if data > win_size)
|
| 188 |
+
|
| 189 |
+
Returns:
|
| 190 |
+
tuple: (scores, logits) where:
|
| 191 |
+
- scores: list of anomaly score arrays per batch
|
| 192 |
+
- logits: list of anomaly logit arrays per batch
|
| 193 |
+
"""
|
| 194 |
+
import tqdm
|
| 195 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 196 |
+
|
| 197 |
+
self.eval()
|
| 198 |
+
device = next(self.parameters()).device
|
| 199 |
+
|
| 200 |
+
# Ensure numpy and 2D shape
|
| 201 |
+
data = np.asarray(data)
|
| 202 |
+
if data.ndim == 1:
|
| 203 |
+
data = data.reshape(-1, 1)
|
| 204 |
+
|
| 205 |
+
# Adjust window size if data is too short
|
| 206 |
+
if len(data) <= win_size:
|
| 207 |
+
win_size = len(data)
|
| 208 |
+
|
| 209 |
+
# Create windows if data is longer than win_size
|
| 210 |
+
windows = []
|
| 211 |
+
masks = []
|
| 212 |
+
|
| 213 |
+
if len(data) > win_size:
|
| 214 |
+
# Create non-overlapping windows
|
| 215 |
+
for i in range(0, len(data), win_size):
|
| 216 |
+
window = data[i:i + win_size]
|
| 217 |
+
if len(window) < win_size:
|
| 218 |
+
# Pad last window if needed
|
| 219 |
+
padded = np.zeros((win_size, data.shape[1]))
|
| 220 |
+
padded[:len(window)] = window
|
| 221 |
+
window = padded
|
| 222 |
+
mask = np.zeros(win_size, dtype=bool)
|
| 223 |
+
mask[:len(window)] = True
|
| 224 |
+
else:
|
| 225 |
+
mask = np.ones(win_size, dtype=bool)
|
| 226 |
+
windows.append(window)
|
| 227 |
+
masks.append(mask)
|
| 228 |
+
else:
|
| 229 |
+
# Single window
|
| 230 |
+
windows.append(data)
|
| 231 |
+
masks.append(np.ones(len(data), dtype=bool))
|
| 232 |
+
|
| 233 |
+
# Convert to tensors
|
| 234 |
+
time_series_windows = torch.tensor(np.array(windows), dtype=torch.float32)
|
| 235 |
+
attention_masks = torch.tensor(np.array(masks), dtype=torch.bool)
|
| 236 |
+
|
| 237 |
+
# Create dataloader
|
| 238 |
+
dataset = TensorDataset(time_series_windows, attention_masks)
|
| 239 |
+
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
|
| 240 |
+
|
| 241 |
+
loop = tqdm.tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
|
| 242 |
+
scores = []
|
| 243 |
+
logits = []
|
| 244 |
+
|
| 245 |
+
with torch.no_grad():
|
| 246 |
+
for i, (batch_ts, batch_mask) in loop:
|
| 247 |
+
batch_ts = batch_ts.to(device)
|
| 248 |
+
batch_mask = batch_mask.to(device)
|
| 249 |
+
|
| 250 |
+
# Forward pass (model normalizes internally!)
|
| 251 |
+
outputs = self(
|
| 252 |
+
time_series=batch_ts,
|
| 253 |
+
attention_mask=batch_mask,
|
| 254 |
+
return_dict=True
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
# Extract scores and logits
|
| 258 |
+
anomaly_probs = outputs.anomaly_scores.cpu().numpy() # (B, seq_len)
|
| 259 |
+
anomaly_logits = outputs.anomaly_logits # (B, seq_len, 2)
|
| 260 |
+
logit_diff = anomaly_logits[..., 1] - anomaly_logits[..., 0] # (B, seq_len)
|
| 261 |
+
|
| 262 |
+
scores.append(anomaly_probs)
|
| 263 |
+
logits.append(logit_diff.cpu().numpy())
|
| 264 |
+
|
| 265 |
+
return scores, logits
|
| 266 |
+
|
| 267 |
+
@classmethod
|
| 268 |
+
def from_original_checkpoint(cls, checkpoint_path: str, config: Optional[TimeRCDConfig] = None):
|
| 269 |
+
"""
|
| 270 |
+
Load model from your original checkpoint format
|
| 271 |
+
|
| 272 |
+
Args:
|
| 273 |
+
checkpoint_path: Path to your .pth checkpoint file
|
| 274 |
+
config: Model configuration (optional - will auto-detect from checkpoint if not provided)
|
| 275 |
+
|
| 276 |
+
Returns:
|
| 277 |
+
Loaded Time_RCD model
|
| 278 |
+
"""
|
| 279 |
+
print(f"Loading Time_RCD from checkpoint: {checkpoint_path}")
|
| 280 |
+
|
| 281 |
+
# Load checkpoint
|
| 282 |
+
if not os.path.exists(checkpoint_path):
|
| 283 |
+
raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
|
| 284 |
+
|
| 285 |
+
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
| 286 |
+
print(f"Checkpoint keys: {list(checkpoint.keys())}")
|
| 287 |
+
|
| 288 |
+
# Auto-detect config from checkpoint if not provided
|
| 289 |
+
if config is None:
|
| 290 |
+
print("📋 Auto-detecting config from checkpoint...")
|
| 291 |
+
if 'config' in checkpoint:
|
| 292 |
+
ckpt_config = checkpoint['config']
|
| 293 |
+
ts_config = ckpt_config.get('ts_config', {})
|
| 294 |
+
|
| 295 |
+
config = TimeRCDConfig(
|
| 296 |
+
d_model=ts_config.get('d_model', 512),
|
| 297 |
+
d_proj=ts_config.get('d_proj', 256),
|
| 298 |
+
patch_size=ts_config.get('patch_size', 4), # Important!
|
| 299 |
+
num_layers=ts_config.get('num_layers', 8),
|
| 300 |
+
num_heads=ts_config.get('num_heads', 8),
|
| 301 |
+
d_ff_dropout=ts_config.get('d_ff_dropout', 0.1),
|
| 302 |
+
use_rope=ts_config.get('use_rope', True),
|
| 303 |
+
activation=ts_config.get('activation', 'gelu'),
|
| 304 |
+
num_features=ts_config.get('num_features', 1),
|
| 305 |
+
max_seq_len=ckpt_config.get('max_seq_len', 512),
|
| 306 |
+
win_size=ckpt_config.get('win_size', 5000),
|
| 307 |
+
batch_size=ckpt_config.get('batch_size', 64),
|
| 308 |
+
dropout=0.1
|
| 309 |
+
)
|
| 310 |
+
print(f"✅ Auto-detected config: patch_size={config.patch_size}, d_model={config.d_model}, d_proj={config.d_proj}")
|
| 311 |
+
else:
|
| 312 |
+
print("⚠️ No config found in checkpoint, using defaults")
|
| 313 |
+
config = TimeRCDConfig()
|
| 314 |
+
|
| 315 |
+
# Create model
|
| 316 |
+
model = cls(config)
|
| 317 |
+
|
| 318 |
+
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
| 319 |
+
print(f"Checkpoint keys: {list(checkpoint.keys())}")
|
| 320 |
+
|
| 321 |
+
# Handle different checkpoint formats
|
| 322 |
+
if 'model_state_dict' in checkpoint:
|
| 323 |
+
state_dict = checkpoint['model_state_dict']
|
| 324 |
+
elif 'state_dict' in checkpoint:
|
| 325 |
+
state_dict = checkpoint['state_dict']
|
| 326 |
+
else:
|
| 327 |
+
state_dict = checkpoint
|
| 328 |
+
|
| 329 |
+
# Remove 'module.' prefix if present (from DDP training)
|
| 330 |
+
new_state_dict = {}
|
| 331 |
+
for key, value in state_dict.items():
|
| 332 |
+
if key.startswith('module.'):
|
| 333 |
+
new_key = key[7:] # Remove 'module.' prefix
|
| 334 |
+
else:
|
| 335 |
+
new_key = key
|
| 336 |
+
new_state_dict[new_key] = value
|
| 337 |
+
|
| 338 |
+
# Load state dict with flexible matching
|
| 339 |
+
try:
|
| 340 |
+
model.load_state_dict(new_state_dict, strict=False)
|
| 341 |
+
print("✅ Successfully loaded checkpoint with flexible matching")
|
| 342 |
+
except Exception as e:
|
| 343 |
+
print(f"⚠️ Error loading state dict: {e}")
|
| 344 |
+
print("Available checkpoint keys:", list(new_state_dict.keys())[:10])
|
| 345 |
+
print("Model keys:", list(model.state_dict().keys())[:10])
|
| 346 |
+
|
| 347 |
+
return model
|
| 348 |
+
|
| 349 |
+
def save_pretrained(self, save_directory: str, **kwargs):
|
| 350 |
+
"""
|
| 351 |
+
Save the model in HuggingFace format
|
| 352 |
+
|
| 353 |
+
This allows you to use .from_pretrained() later
|
| 354 |
+
"""
|
| 355 |
+
super().save_pretrained(save_directory, **kwargs)
|
| 356 |
+
print(f"✅ Model saved to {save_directory}")
|
| 357 |
+
print("You can now load it with:")
|
| 358 |
+
print(f"model = Time_RCD.from_pretrained('{save_directory}')")
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
class TimeSeriesEncoder(nn.Module):
|
| 363 |
+
"""
|
| 364 |
+
Time Series Encoder with PatchTST-like patching, RoPE.
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
d_model (int): Model dimension
|
| 368 |
+
d_proj (int): Projection dimension
|
| 369 |
+
patch_size (int): Size of each patch
|
| 370 |
+
num_layers (int): Number of encoder layers
|
| 371 |
+
num_heads (int): Number of attention heads
|
| 372 |
+
d_ff_dropout (float): Dropout rate
|
| 373 |
+
max_total_tokens (int): Maximum sequence length
|
| 374 |
+
use_rope (bool): Use RoPE if True
|
| 375 |
+
num_features (int): Number of features in the time series
|
| 376 |
+
activation (str): "relu" or "gelu"
|
| 377 |
+
|
| 378 |
+
Inputs:
|
| 379 |
+
time_series (Tensor): Shape (batch_size, seq_len, num_features)
|
| 380 |
+
mask (Tensor): Shape (batch_size, seq_len)
|
| 381 |
+
|
| 382 |
+
Outputs:
|
| 383 |
+
local_embeddings (Tensor): Shape (batch_size, seq_len, num_features, d_proj)
|
| 384 |
+
"""
|
| 385 |
+
|
| 386 |
+
def __init__(self, d_model=2048, d_proj=512, patch_size=32, num_layers=6, num_heads=8,
|
| 387 |
+
d_ff_dropout=0.1, max_total_tokens=8192, use_rope=True, num_features=1,
|
| 388 |
+
activation="relu"):
|
| 389 |
+
super().__init__()
|
| 390 |
+
self.patch_size = patch_size
|
| 391 |
+
self.d_model = d_model
|
| 392 |
+
self.d_proj = d_proj
|
| 393 |
+
self.num_layers = num_layers
|
| 394 |
+
self.num_heads = num_heads
|
| 395 |
+
self.d_ff_dropout = d_ff_dropout
|
| 396 |
+
self.max_total_tokens = max_total_tokens
|
| 397 |
+
self.use_rope = use_rope
|
| 398 |
+
self.num_features = num_features
|
| 399 |
+
self.activation = activation
|
| 400 |
+
|
| 401 |
+
# Patch embedding layer
|
| 402 |
+
self.embedding_layer = nn.Linear(patch_size, d_model)
|
| 403 |
+
|
| 404 |
+
if use_rope:
|
| 405 |
+
# Initialize RoPE and custom encoder
|
| 406 |
+
self.rope_embedder = RotaryEmbedding(d_model)
|
| 407 |
+
self.transformer_encoder = CustomTransformerEncoder(
|
| 408 |
+
d_model=d_model,
|
| 409 |
+
nhead=num_heads,
|
| 410 |
+
dim_feedforward=d_model * 4,
|
| 411 |
+
dropout=d_ff_dropout,
|
| 412 |
+
activation=activation,
|
| 413 |
+
num_layers=num_layers,
|
| 414 |
+
num_features=num_features
|
| 415 |
+
)
|
| 416 |
+
else:
|
| 417 |
+
# Standard encoder without RoPE
|
| 418 |
+
encoder_layer = nn.TransformerEncoderLayer(
|
| 419 |
+
d_model=d_model,
|
| 420 |
+
nhead=num_heads,
|
| 421 |
+
dim_feedforward=d_model * 4,
|
| 422 |
+
dropout=d_ff_dropout,
|
| 423 |
+
batch_first=True,
|
| 424 |
+
activation=activation
|
| 425 |
+
)
|
| 426 |
+
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
|
| 427 |
+
|
| 428 |
+
# Output projection layers
|
| 429 |
+
self.projection_layer = nn.Linear(d_model, patch_size * d_proj)
|
| 430 |
+
self._init_parameters()
|
| 431 |
+
|
| 432 |
+
def _init_parameters(self):
|
| 433 |
+
for name, param in self.named_parameters():
|
| 434 |
+
if 'weight' in name and 'linear' in name:
|
| 435 |
+
if self.activation == "relu":
|
| 436 |
+
nn.init.kaiming_uniform_(param, nonlinearity='relu')
|
| 437 |
+
elif self.activation == "gelu":
|
| 438 |
+
nn.init.kaiming_uniform_(param, nonlinearity='gelu')
|
| 439 |
+
elif 'bias' in name:
|
| 440 |
+
nn.init.constant_(param, 0.0)
|
| 441 |
+
|
| 442 |
+
def forward(self, time_series, mask=None):
|
| 443 |
+
"""Forward pass to generate local embeddings."""
|
| 444 |
+
if time_series.dim() == 2:
|
| 445 |
+
time_series = time_series.unsqueeze(-1)
|
| 446 |
+
device = time_series.device
|
| 447 |
+
B, seq_len, num_features = time_series.size()
|
| 448 |
+
assert num_features == self.num_features, f"Number of features mismatch with data: {num_features} vs param: {self.num_features}"
|
| 449 |
+
|
| 450 |
+
# Create mask if not provided
|
| 451 |
+
if mask is None:
|
| 452 |
+
mask = torch.ones(B, seq_len, dtype=torch.bool, device=device)
|
| 453 |
+
|
| 454 |
+
assert mask.size() == (B, seq_len), f"Mask shape mismatch: expected ({B}, {seq_len}), got {mask.size()}"
|
| 455 |
+
|
| 456 |
+
# Pad sequence to be divisible by patch_size
|
| 457 |
+
padded_length = math.ceil(seq_len / self.patch_size) * self.patch_size
|
| 458 |
+
if padded_length > seq_len:
|
| 459 |
+
pad_amount = padded_length - seq_len
|
| 460 |
+
time_series = F.pad(time_series, (0, 0, 0, pad_amount), value=0)
|
| 461 |
+
mask = F.pad(mask, (0, pad_amount), value=0)
|
| 462 |
+
|
| 463 |
+
# Convert to patches
|
| 464 |
+
num_patches = padded_length // self.patch_size
|
| 465 |
+
total_length = num_patches * num_features
|
| 466 |
+
patches = time_series.view(B, num_patches, self.patch_size, num_features)
|
| 467 |
+
patches = patches.permute(0, 3, 1, 2).contiguous() # (B, num_features, num_patches, patch_size)
|
| 468 |
+
patches = patches.view(B, num_features * num_patches, self.patch_size) # (B, L, patch_size)
|
| 469 |
+
# Create feature IDs for patches
|
| 470 |
+
feature_id = torch.arange(num_features, device=device).repeat_interleave(
|
| 471 |
+
num_patches) # (num_features * num_patches = L,)
|
| 472 |
+
feature_id = feature_id.unsqueeze(0).expand(B, -1) # (B, L)
|
| 473 |
+
|
| 474 |
+
# Embed patches
|
| 475 |
+
embedded_patches = self.embedding_layer(patches) # (B, L, d_model)
|
| 476 |
+
|
| 477 |
+
# Create patch-level mask
|
| 478 |
+
mask = mask.view(B, num_patches, self.patch_size)
|
| 479 |
+
patch_mask = mask.sum(dim=-1) > 0 # (B, num_patches)
|
| 480 |
+
full_mask = patch_mask.unsqueeze(1).expand(-1, num_features, -1) # (B, num_features, num_patches)
|
| 481 |
+
full_mask = full_mask.reshape(B, num_features * num_patches) # (B, L)
|
| 482 |
+
|
| 483 |
+
# Generate RoPE frequencies if applicable
|
| 484 |
+
if self.use_rope:
|
| 485 |
+
freqs = self.rope_embedder(total_length).to(device)
|
| 486 |
+
else:
|
| 487 |
+
freqs = None
|
| 488 |
+
|
| 489 |
+
# Encode sequence
|
| 490 |
+
if num_features > 1:
|
| 491 |
+
output = self.transformer_encoder(
|
| 492 |
+
embedded_patches,
|
| 493 |
+
freqs=freqs,
|
| 494 |
+
src_id=feature_id,
|
| 495 |
+
attn_mask=full_mask
|
| 496 |
+
)
|
| 497 |
+
else:
|
| 498 |
+
output = self.transformer_encoder(
|
| 499 |
+
embedded_patches,
|
| 500 |
+
freqs=freqs,
|
| 501 |
+
attn_mask=full_mask
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
# Extract and project local embeddings
|
| 505 |
+
patch_embeddings = output # (B, L, d_model)
|
| 506 |
+
patch_proj = self.projection_layer(patch_embeddings) # (B, L, patch_size * d_proj)
|
| 507 |
+
local_embeddings = patch_proj.view(B, num_features, num_patches, self.patch_size, self.d_proj)
|
| 508 |
+
local_embeddings = local_embeddings.permute(0, 2, 3, 1, 4) # (B, num_patches, patch_size, num_features, d_proj)
|
| 509 |
+
local_embeddings = local_embeddings.view(B, -1, num_features, self.d_proj)[:, :seq_len, :,
|
| 510 |
+
:] # (B, seq_len, num_features, d_proj)
|
| 511 |
+
|
| 512 |
+
return local_embeddings
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
class CustomTransformerEncoder(nn.Module):
|
| 516 |
+
"""Stack of Transformer Encoder Layers."""
|
| 517 |
+
|
| 518 |
+
def __init__(self, d_model, nhead, dim_feedforward, dropout, activation, num_layers, num_features):
|
| 519 |
+
super().__init__()
|
| 520 |
+
self.layers = nn.ModuleList([
|
| 521 |
+
TransformerEncoderLayerWithRoPE(
|
| 522 |
+
d_model=d_model,
|
| 523 |
+
nhead=nhead,
|
| 524 |
+
dim_feedforward=dim_feedforward,
|
| 525 |
+
dropout=dropout,
|
| 526 |
+
activation=activation,
|
| 527 |
+
num_features=num_features
|
| 528 |
+
) for _ in range(num_layers)
|
| 529 |
+
])
|
| 530 |
+
|
| 531 |
+
def forward(self, src, freqs, src_id=None, attn_mask=None):
|
| 532 |
+
output = src
|
| 533 |
+
for layer in self.layers:
|
| 534 |
+
output = layer(output, freqs, src_id, attn_mask=attn_mask)
|
| 535 |
+
return output
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
class TransformerEncoderLayerWithRoPE(nn.Module):
|
| 539 |
+
"""Transformer Encoder Layer with RoPE and RMSNorm."""
|
| 540 |
+
|
| 541 |
+
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", num_features=1):
|
| 542 |
+
super().__init__()
|
| 543 |
+
self.self_attn = MultiheadAttentionWithRoPE(d_model, nhead, num_features)
|
| 544 |
+
self.dropout = nn.Dropout(dropout)
|
| 545 |
+
self.input_norm = RMSNorm(d_model)
|
| 546 |
+
self.output_norm = RMSNorm(d_model)
|
| 547 |
+
self.mlp = LlamaMLP(d_model, dim_feedforward)
|
| 548 |
+
self.dropout1 = nn.Dropout(dropout)
|
| 549 |
+
self.dropout2 = nn.Dropout(dropout)
|
| 550 |
+
self.activation = F.relu if activation == "relu" else F.gelu
|
| 551 |
+
|
| 552 |
+
def forward(self, src, freqs, src_id=None, attn_mask=None):
|
| 553 |
+
residual = src
|
| 554 |
+
src = self.input_norm(src)
|
| 555 |
+
src = self.self_attn(src, src, src, freqs, src_id, src_id, attn_mask=attn_mask)
|
| 556 |
+
src = src + residual
|
| 557 |
+
residual = src
|
| 558 |
+
src = self.output_norm(src)
|
| 559 |
+
src = self.mlp(src)
|
| 560 |
+
src = residual + self.dropout2(src)
|
| 561 |
+
return src
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
class RMSNorm(nn.Module):
|
| 565 |
+
"""Root Mean Square Normalization layer."""
|
| 566 |
+
|
| 567 |
+
def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None:
|
| 568 |
+
super().__init__()
|
| 569 |
+
self.scale = nn.Parameter(torch.ones(size))
|
| 570 |
+
self.eps = eps
|
| 571 |
+
self.dim = dim
|
| 572 |
+
|
| 573 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 574 |
+
norm_x = x.to(torch.float32).pow(2).mean(dim=self.dim, keepdim=True)
|
| 575 |
+
x_normed = x * torch.rsqrt(norm_x + self.eps)
|
| 576 |
+
return (self.scale * x_normed).type_as(x)
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
class RotaryEmbedding(nn.Module):
|
| 580 |
+
"""Rotary Positional Embedding for injecting positional information."""
|
| 581 |
+
|
| 582 |
+
def __init__(self, dim):
|
| 583 |
+
super().__init__()
|
| 584 |
+
inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
|
| 585 |
+
self.register_buffer("inv_freq", inv_freq)
|
| 586 |
+
|
| 587 |
+
def forward(self, seq_len):
|
| 588 |
+
t = torch.arange(seq_len, device=self.inv_freq.device).type_as(self.inv_freq)
|
| 589 |
+
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
| 590 |
+
return freqs # Shape: (seq_len, dim // 2)
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
class BinaryAttentionBias(nn.Module):
|
| 594 |
+
"""Binary Variate Attention for time series data."""
|
| 595 |
+
|
| 596 |
+
def __init__(self,
|
| 597 |
+
num_heads: int):
|
| 598 |
+
super().__init__()
|
| 599 |
+
self.num_heads = num_heads
|
| 600 |
+
self.emd = nn.Embedding(2, num_heads)
|
| 601 |
+
|
| 602 |
+
def forward(self,
|
| 603 |
+
query_id: torch.Tensor,
|
| 604 |
+
kv_id: torch.Tensor,
|
| 605 |
+
) -> torch.Tensor:
|
| 606 |
+
ind = torch.eq(query_id.unsqueeze(-1), kv_id.unsqueeze(-2))
|
| 607 |
+
ind = ind.unsqueeze(1) # (batch_size, 1, q_len, kv_len)
|
| 608 |
+
weight = rearrange(self.emd.weight, "two num_heads -> two num_heads 1 1") # (2, num_heads, 1, 1)
|
| 609 |
+
bias = ~ind * weight[:1] + ind * weight[1:] # (batch_size, num_heads, q_len, kv_len)
|
| 610 |
+
return bias
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
class MultiheadAttentionWithRoPE(nn.Module):
|
| 614 |
+
"""Multi-head Attention with Rotary Positional Encoding (RoPE), non-causal by default."""
|
| 615 |
+
"========== NOtice that this applies BinaryAttentionBias ==========="
|
| 616 |
+
|
| 617 |
+
def __init__(self, embed_dim, num_heads, num_features):
|
| 618 |
+
super().__init__()
|
| 619 |
+
self.embed_dim = embed_dim
|
| 620 |
+
self.num_heads = num_heads
|
| 621 |
+
self.head_dim = embed_dim // num_heads
|
| 622 |
+
self.num_features = num_features
|
| 623 |
+
assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
|
| 624 |
+
|
| 625 |
+
# Linear projections for Q, K, V, and output
|
| 626 |
+
self.q_proj = nn.Linear(embed_dim, embed_dim, bias=False)
|
| 627 |
+
self.k_proj = nn.Linear(embed_dim, embed_dim, bias=False)
|
| 628 |
+
self.v_proj = nn.Linear(embed_dim, embed_dim, bias=False)
|
| 629 |
+
self.out_proj = nn.Linear(embed_dim, embed_dim, bias=False)
|
| 630 |
+
|
| 631 |
+
# Binary attention bias for time series
|
| 632 |
+
if num_features > 1:
|
| 633 |
+
self.binary_attention_bias = BinaryAttentionBias(num_heads)
|
| 634 |
+
|
| 635 |
+
def apply_rope(self, x, freqs):
|
| 636 |
+
"""Apply Rotary Positional Encoding to the input tensor."""
|
| 637 |
+
B, seq_len, embed_dim = x.shape
|
| 638 |
+
assert embed_dim == self.embed_dim, "Embedding dimension mismatch"
|
| 639 |
+
assert freqs.shape == (seq_len, embed_dim // 2), "freqs shape mismatch"
|
| 640 |
+
|
| 641 |
+
# Reshape for rotation: split embed_dim into pairs
|
| 642 |
+
x_ = x.view(B, seq_len, embed_dim // 2, 2)
|
| 643 |
+
cos = freqs.cos().unsqueeze(0) # (1, seq_len, embed_dim // 2, 1)
|
| 644 |
+
sin = freqs.sin().unsqueeze(0) # (1, seq_len, embed_dim // 2, 1)
|
| 645 |
+
|
| 646 |
+
# Apply rotation to each pair
|
| 647 |
+
x_rot = torch.stack(
|
| 648 |
+
[
|
| 649 |
+
x_[..., 0] * cos - x_[..., 1] * sin,
|
| 650 |
+
x_[..., 0] * sin + x_[..., 1] * cos,
|
| 651 |
+
],
|
| 652 |
+
dim=-1
|
| 653 |
+
)
|
| 654 |
+
return x_rot.view(B, seq_len, embed_dim)
|
| 655 |
+
|
| 656 |
+
def forward(self, query, key, value, freqs, query_id=None, kv_id=None, attn_mask=None):
|
| 657 |
+
"""
|
| 658 |
+
Forward pass for multi-head attention with RoPE.
|
| 659 |
+
|
| 660 |
+
Args:
|
| 661 |
+
query (Tensor): Shape (B, T, C)
|
| 662 |
+
key (Tensor): Shape (B, T, C)
|
| 663 |
+
value (Tensor): Shape (B, T, C)
|
| 664 |
+
freqs (Tensor): RoPE frequencies, shape (T, embed_dim // 2)
|
| 665 |
+
query_id (Tensor, optional): Shape (B, q_len), feature IDs for query
|
| 666 |
+
kv_id (Tensor, optional): Shape (B, kv_len), feature IDs for key/value
|
| 667 |
+
attn_mask (Tensor, optional): Shape (B, T), True for valid positions, False for padding.
|
| 668 |
+
|
| 669 |
+
Returns:
|
| 670 |
+
Tensor: Attention output, shape (B, T, C)
|
| 671 |
+
"""
|
| 672 |
+
B, T, C = query.shape
|
| 673 |
+
assert key.shape == (B, T, C) and value.shape == (B, T, C), "query, key, value shapes must match"
|
| 674 |
+
|
| 675 |
+
# Project inputs to Q, K, V
|
| 676 |
+
Q = self.q_proj(query)
|
| 677 |
+
K = self.k_proj(key)
|
| 678 |
+
V = self.v_proj(value)
|
| 679 |
+
|
| 680 |
+
# Apply RoPE to Q and K
|
| 681 |
+
Q_rot = self.apply_rope(Q, freqs)
|
| 682 |
+
K_rot = self.apply_rope(K, freqs)
|
| 683 |
+
|
| 684 |
+
# Reshape for multi-head attention
|
| 685 |
+
Q_rot = Q_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2) # (B, nh, T, hs)
|
| 686 |
+
K_rot = K_rot.view(B, T, self.num_heads, self.head_dim).transpose(1, 2) # (B, nh, T, hs)
|
| 687 |
+
V = V.view(B, T, self.num_heads, self.head_dim).transpose(1, 2) # (B, nh, T, hs)
|
| 688 |
+
|
| 689 |
+
# Prepare attention mask for padding
|
| 690 |
+
if attn_mask is not None:
|
| 691 |
+
attn_mask = attn_mask.unsqueeze(1).unsqueeze(2) # (B, 1, 1, T)
|
| 692 |
+
else:
|
| 693 |
+
attn_mask = None
|
| 694 |
+
|
| 695 |
+
if query_id is not None and kv_id is not None:
|
| 696 |
+
# Add binary attention bias
|
| 697 |
+
attn_bias = self.binary_attention_bias(query_id, kv_id) # (B, num_heads, q_len, kv_len)
|
| 698 |
+
scores = torch.matmul(Q_rot, K_rot.transpose(-2, -1)) / math.sqrt(
|
| 699 |
+
self.head_dim) # (B, num_heads, q_len, kv_len)
|
| 700 |
+
scores += attn_bias
|
| 701 |
+
if attn_mask is not None:
|
| 702 |
+
scores = scores.masked_fill(~attn_mask, float('-inf'))
|
| 703 |
+
attn_weights = F.softmax(scores, dim=-1) # (B, num_heads, q_len, kv_len)
|
| 704 |
+
y = torch.matmul(attn_weights, V) # (B, num_heads, q_len, hs)
|
| 705 |
+
|
| 706 |
+
else:
|
| 707 |
+
# Compute scaled dot-product attention (non-causal) without binary bias
|
| 708 |
+
# for param in self.binary_attention_bias.parameters():
|
| 709 |
+
# param.requires_grad = False
|
| 710 |
+
y = F.scaled_dot_product_attention(
|
| 711 |
+
Q_rot, K_rot, V,
|
| 712 |
+
attn_mask=attn_mask,
|
| 713 |
+
is_causal=False # Non-causal attention for encoder
|
| 714 |
+
) # (B, nh, T, hs)
|
| 715 |
+
|
| 716 |
+
# Reshape and project output
|
| 717 |
+
y = y.transpose(1, 2).contiguous().view(B, T, C)
|
| 718 |
+
y = self.out_proj(y)
|
| 719 |
+
return y
|
| 720 |
+
|
| 721 |
+
|
| 722 |
+
class LlamaMLP(nn.Module):
|
| 723 |
+
def __init__(self, d_model, dim_feedforward=2048):
|
| 724 |
+
super().__init__()
|
| 725 |
+
self.hidden_size = d_model
|
| 726 |
+
self.intermediate_size = dim_feedforward
|
| 727 |
+
self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
|
| 728 |
+
self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=True)
|
| 729 |
+
self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=True)
|
| 730 |
+
self.act_fn = F.gelu
|
| 731 |
+
|
| 732 |
+
def forward(self, x):
|
| 733 |
+
down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
|
| 734 |
+
return down_proj
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
# For backward compatibility, create aliases
|
| 739 |
+
TimeRCDModel = Time_RCD # Alias for consistency
|
| 740 |
+
AnomalyCLIPModel = Time_RCD # For existing code that uses this name
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"processor_type": "TimeRCDProcessor",
|
| 3 |
+
"win_size": 5000,
|
| 4 |
+
"stride": 5000,
|
| 5 |
+
"normalize": true,
|
| 6 |
+
"pad_to_multiple": true
|
| 7 |
+
}
|
processing_time_rcd.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Time_RCD Processor for Time Series Preprocessing
|
| 3 |
+
|
| 4 |
+
This processor handles:
|
| 5 |
+
- Data windowing/sliding windows
|
| 6 |
+
- Normalization (per-window z-score)
|
| 7 |
+
- Padding to window size multiples
|
| 8 |
+
- Creating attention masks
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
>>> from huggingface_time_rcd import TimeRCDProcessor
|
| 12 |
+
>>> processor = TimeRCDProcessor(win_size=5000, normalize=True)
|
| 13 |
+
>>> inputs = processor(time_series_data)
|
| 14 |
+
>>> # inputs contains: {'time_series': tensor, 'attention_mask': tensor}
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import torch
|
| 19 |
+
from typing import Optional, Dict, Any
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TimeRCDProcessor:
|
| 23 |
+
"""
|
| 24 |
+
Processor for preparing time series data for Time_RCD model.
|
| 25 |
+
|
| 26 |
+
Mimics the AnomalyClipDataset preprocessing pipeline:
|
| 27 |
+
- Creates sliding windows
|
| 28 |
+
- Normalizes per-window (z-score normalization)
|
| 29 |
+
- Pads to window_size multiples
|
| 30 |
+
- Creates attention masks for padding
|
| 31 |
+
|
| 32 |
+
Parameters
|
| 33 |
+
----------
|
| 34 |
+
win_size : int, default=5000
|
| 35 |
+
Window size for creating sliding windows
|
| 36 |
+
stride : int, default=None
|
| 37 |
+
Stride for sliding windows. If None, uses win_size (non-overlapping)
|
| 38 |
+
normalize : bool, default=True
|
| 39 |
+
Whether to normalize each window (zero mean, unit variance)
|
| 40 |
+
pad_to_multiple : bool, default=True
|
| 41 |
+
Whether to pad data to make length a multiple of window_size
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(
|
| 45 |
+
self,
|
| 46 |
+
win_size: int = 5000,
|
| 47 |
+
stride: Optional[int] = None,
|
| 48 |
+
normalize: bool = True,
|
| 49 |
+
pad_to_multiple: bool = True,
|
| 50 |
+
):
|
| 51 |
+
self.win_size = win_size
|
| 52 |
+
self.stride = stride if stride is not None else win_size
|
| 53 |
+
self.normalize = normalize
|
| 54 |
+
self.pad_to_multiple = pad_to_multiple
|
| 55 |
+
|
| 56 |
+
def __call__(
|
| 57 |
+
self,
|
| 58 |
+
time_series: np.ndarray,
|
| 59 |
+
return_tensors: Optional[str] = "pt",
|
| 60 |
+
) -> Dict[str, Any]:
|
| 61 |
+
"""
|
| 62 |
+
Preprocess time series data.
|
| 63 |
+
|
| 64 |
+
Parameters
|
| 65 |
+
----------
|
| 66 |
+
time_series : np.ndarray
|
| 67 |
+
Input time series data of shape (n_samples, n_features) or (n_samples,)
|
| 68 |
+
return_tensors : str, optional
|
| 69 |
+
Type of tensors to return: "pt" (PyTorch) or None
|
| 70 |
+
|
| 71 |
+
Returns
|
| 72 |
+
-------
|
| 73 |
+
dict
|
| 74 |
+
Dictionary containing:
|
| 75 |
+
- 'time_series': Processed time series windows
|
| 76 |
+
- 'attention_mask': Attention masks indicating real vs padded data
|
| 77 |
+
"""
|
| 78 |
+
# Ensure numpy array
|
| 79 |
+
time_series = np.asarray(time_series)
|
| 80 |
+
|
| 81 |
+
# Ensure 2D shape (N, C)
|
| 82 |
+
if time_series.ndim == 1:
|
| 83 |
+
time_series = time_series.reshape(-1, 1)
|
| 84 |
+
|
| 85 |
+
original_length = time_series.shape[0]
|
| 86 |
+
|
| 87 |
+
# Normalize if requested
|
| 88 |
+
if self.normalize:
|
| 89 |
+
time_series = self._normalize_data(time_series)
|
| 90 |
+
|
| 91 |
+
# Pad to multiple if requested
|
| 92 |
+
if self.pad_to_multiple:
|
| 93 |
+
time_series, padding_mask = self._pad_data_to_multiple(time_series)
|
| 94 |
+
else:
|
| 95 |
+
padding_mask = np.ones(time_series.shape[0], dtype=bool)
|
| 96 |
+
|
| 97 |
+
# Create windows
|
| 98 |
+
windows, masks = self._create_windows(time_series, padding_mask)
|
| 99 |
+
|
| 100 |
+
# Convert to tensors if requested
|
| 101 |
+
if return_tensors == "pt":
|
| 102 |
+
windows = torch.tensor(windows, dtype=torch.float32)
|
| 103 |
+
masks = torch.tensor(masks, dtype=torch.bool)
|
| 104 |
+
|
| 105 |
+
return {
|
| 106 |
+
"time_series": windows,
|
| 107 |
+
"attention_mask": masks
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
def _normalize_data(self, data: np.ndarray, epsilon: float = 1e-8) -> np.ndarray:
|
| 111 |
+
"""Normalize data using mean and standard deviation (per-feature)."""
|
| 112 |
+
mean = np.mean(data, axis=0)
|
| 113 |
+
std = np.std(data, axis=0)
|
| 114 |
+
std = np.where(std == 0, epsilon, std)
|
| 115 |
+
return (data - mean) / std
|
| 116 |
+
|
| 117 |
+
def _pad_data_to_multiple(self, data: np.ndarray) -> tuple:
|
| 118 |
+
"""
|
| 119 |
+
Pad data to make its length a multiple of window_size.
|
| 120 |
+
Returns padded data and padding mask.
|
| 121 |
+
"""
|
| 122 |
+
data_length = data.shape[0]
|
| 123 |
+
remainder = data_length % self.win_size
|
| 124 |
+
|
| 125 |
+
if remainder == 0:
|
| 126 |
+
# No padding needed
|
| 127 |
+
padding_mask = np.ones(data_length, dtype=bool)
|
| 128 |
+
return data, padding_mask
|
| 129 |
+
|
| 130 |
+
# Calculate padding needed
|
| 131 |
+
padding_length = self.win_size - remainder
|
| 132 |
+
|
| 133 |
+
# Pad by repeating the last row
|
| 134 |
+
last_row = data[-1:, :]
|
| 135 |
+
padding_data = np.repeat(last_row, padding_length, axis=0)
|
| 136 |
+
padded_data = np.vstack([data, padding_data])
|
| 137 |
+
|
| 138 |
+
# Create padding mask: True for real data, False for padded data
|
| 139 |
+
padding_mask = np.ones(data_length + padding_length, dtype=bool)
|
| 140 |
+
padding_mask[data_length:] = False
|
| 141 |
+
|
| 142 |
+
return padded_data, padding_mask
|
| 143 |
+
|
| 144 |
+
def _create_windows(self, data: np.ndarray, padding_mask: np.ndarray) -> tuple:
|
| 145 |
+
"""
|
| 146 |
+
Create sliding windows from time series data.
|
| 147 |
+
Returns windows and corresponding masks.
|
| 148 |
+
"""
|
| 149 |
+
windows = []
|
| 150 |
+
masks = []
|
| 151 |
+
|
| 152 |
+
for i in range(0, len(data) - self.win_size + 1, self.stride):
|
| 153 |
+
window = data[i:i + self.win_size, :]
|
| 154 |
+
mask = padding_mask[i:i + self.win_size]
|
| 155 |
+
windows.append(window)
|
| 156 |
+
masks.append(mask)
|
| 157 |
+
|
| 158 |
+
return np.array(windows), np.array(masks)
|
| 159 |
+
|
| 160 |
+
def save_pretrained(self, save_directory: str):
|
| 161 |
+
"""Save processor configuration to directory."""
|
| 162 |
+
import json
|
| 163 |
+
import os
|
| 164 |
+
|
| 165 |
+
os.makedirs(save_directory, exist_ok=True)
|
| 166 |
+
|
| 167 |
+
config = {
|
| 168 |
+
"processor_type": "TimeRCDProcessor",
|
| 169 |
+
"win_size": self.win_size,
|
| 170 |
+
"stride": self.stride,
|
| 171 |
+
"normalize": self.normalize,
|
| 172 |
+
"pad_to_multiple": self.pad_to_multiple,
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
with open(os.path.join(save_directory, "preprocessor_config.json"), "w") as f:
|
| 176 |
+
json.dump(config, f, indent=2)
|
| 177 |
+
|
| 178 |
+
@classmethod
|
| 179 |
+
def from_pretrained(cls, pretrained_model_name_or_path: str):
|
| 180 |
+
"""Load processor from pretrained configuration."""
|
| 181 |
+
import json
|
| 182 |
+
import os
|
| 183 |
+
|
| 184 |
+
config_file = os.path.join(pretrained_model_name_or_path, "preprocessor_config.json")
|
| 185 |
+
|
| 186 |
+
if not os.path.exists(config_file):
|
| 187 |
+
raise FileNotFoundError(f"Preprocessor config not found at {config_file}")
|
| 188 |
+
|
| 189 |
+
with open(config_file, "r") as f:
|
| 190 |
+
config = json.load(f)
|
| 191 |
+
|
| 192 |
+
# Remove processor_type from config
|
| 193 |
+
config.pop("processor_type", None)
|
| 194 |
+
|
| 195 |
+
return cls(**config)
|
| 196 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.0.0
|
| 2 |
+
transformers>=4.30.0
|
| 3 |
+
numpy>=1.20.0
|
| 4 |
+
scikit-learn>=1.0.0
|