Azuremis's picture
Upload Two-Tower model
cb576c3 verified
raw
history blame contribute delete
533 Bytes
batch_size: 256
checkpoint_dir: checkpoints
data: data/processed/classic_triplets.parquet
device: cpu
embedding:
embedding_dim: 64
type: lookup
encoder:
arch: mean
hidden_dim: 128
tied_weights: true
epochs: 3
huggingface:
private: false
push_to_hub: true
repo_id: mlx7-two-tower-retrieval
learning_rate: 1e-3
loss:
margin: 0.2
type: triplet
max_sequence_length: 64
optimizer:
lr: 0.001
type: adamw
tokeniser:
max_len: 64
type: char
use_wandb: true
wandb:
entity: azuremis
project: two-tower-retrieval