Trading-Benchmark-Model
Collection
Models for portfolio construction
•
7 items
•
Updated
class MambaTransformerSimple(nn.Module):
def __init__(
self,
d_feat: int = 8,
hidden_size: int = 64,
num_layers: int = 1,
dropout: float = 0.0,
noise_level: float = 0.0,
d_state: int = 16,
d_conv: int = 4,
expand: int = 2,
mask_type: str = "none",
) -> None:
super().__init__()
self.mask_type = mask_type
self.transformer_encoder_layer = nn.TransformerEncoderLayer(
d_model=hidden_size,
nhead=4,
dim_feedforward=hidden_size * 4,
dropout=dropout,
activation="relu",
batch_first=False,
)
self.transformer_encoder = nn.TransformerEncoder(
self.transformer_encoder_layer, num_layers=num_layers
)
self.input_proj = nn.Linear(d_feat, hidden_size)
self.mamba = Mamba(
d_model=hidden_size, d_state=d_state, d_conv=d_conv, expand=expand
)
self.mid_norm = nn.LayerNorm(hidden_size)
self.out = nn.Sequential(
nn.Linear(hidden_size, hidden_size), nn.GELU(), nn.Linear(hidden_size, 1)
)
def _generate_causal_mask(self, seq_len: int, device: torch.device) -> torch.Tensor:
"""Generate causal attention mask."""
mask = torch.triu(
torch.ones(seq_len, seq_len, device=device) * float("-inf"), diagonal=1
)
return mask
def forward(self, x: torch.Tensor) -> torch.Tensor:
b, t, s, f = x.shape
x = x.permute(0, 2, 1, 3).reshape(b * s, t, f)
x = self.input_proj(x) # [b * s, t, h]
mamba_out = self.mamba(x) # [b * s, t, h]
mamba_out = mamba_out.permute(1, 0, 2).contiguous() # [t, b * s, h]
mamba_out = self.mid_norm(mamba_out)
if self.mask_type == "causal":
mask = self._generate_causal_mask(t, x.device)
else:
mask = None
tfm_out = self.transformer_encoder(mamba_out, mask=mask) # [t, b * s, h]
tfm_out = tfm_out[-1].reshape(b, s, -1)
final_out = self.out(tfm_out).squeeze(-1) # [b, s]
return final_out
num_layers: 1
d_feat: 8
hidden_size: 64
d_state: 16
d_conv: 4
expand: 2
dropout: 0.1
noise_level: 0.0
mask_type: "none"