Mineral-7B / create_multiple_safetensors.py
Luke-Bergen's picture
Create create_multiple_safetensors.py
0d9ff8c verified
from safetensors.torch import save_file
import torch
from huggingface_hub import HfApi
import json
# DEIN TOKEN HIER EINFÜGEN
TOKEN = 'DEIN_TOKEN_HIER'
repo_id = 'Luke-Bergen/Mineral-7B'
print("Creating safetensors files for Mineral-7B (7B parameters)...")
print("This creates RANDOM weights - model won't work but structure is correct!\n")
# Shard 1: Embedding + erste 10 Layers (~4.8 GB)
print("Creating model-00001-of-00003.safetensors...")
shard1 = {
'model.embed_tokens.weight': torch.randn(32000, 4096, dtype=torch.bfloat16),
}
# Layers 0-10
for i in range(11):
shard1.update({
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
})
save_file(shard1, 'model-00001-of-00003.safetensors')
print(f"✓ Shard 1 created ({len(shard1)} tensors)")
# Shard 2: Layers 11-21 (~4.8 GB)
print("Creating model-00002-of-00003.safetensors...")
shard2 = {}
for i in range(11, 22):
shard2.update({
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
})
save_file(shard2, 'model-00002-of-00003.safetensors')
print(f"✓ Shard 2 created ({len(shard2)} tensors)")
# Shard 3: Layers 22-31 + Vision + Memory + LM Head (~4.7 GB)
print("Creating model-00003-of-00003.safetensors...")
shard3 = {}
for i in range(22, 32):
shard3.update({
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
})
# Final layers
shard3.update({
'model.norm.weight': torch.randn(4096, dtype=torch.bfloat16),
'lm_head.weight': torch.randn(32000, 4096, dtype=torch.bfloat16),
# Vision encoder
'vision_encoder.embeddings.patch_embedding.weight': torch.randn(1024, 3, 14, 14, dtype=torch.bfloat16),
'vision_encoder.embeddings.position_embedding.weight': torch.randn(257, 1024, dtype=torch.bfloat16),
'vision_projection.weight': torch.randn(4096, 1024, dtype=torch.bfloat16),
# Memory layers
'memory_layers.0.memory_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
'memory_layers.0.memory_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
})
save_file(shard3, 'model-00003-of-00003.safetensors')
print(f"✓ Shard 3 created ({len(shard3)} tensors)")
# Create index
print("\nCreating model.safetensors.index.json...")
weight_map = {}
for key in shard1.keys():
weight_map[key] = "model-00001-of-00003.safetensors"
for key in shard2.keys():
weight_map[key] = "model-00002-of-00003.safetensors"
for key in shard3.keys():
weight_map[key] = "model-00003-of-00003.safetensors"
index = {
"metadata": {
"total_size": 14336204800
},
"weight_map": weight_map
}
with open('model.safetensors.index.json', 'w') as f:
json.dump(index, f, indent=2)
print("✓ Index created")
# Upload all files
print("\n" + "="*60)
print("Uploading to Hugging Face...")
print("="*60)
api = HfApi(token=TOKEN)
files = [
'model-00001-of-00003.safetensors',
'model-00002-of-00003.safetensors',
'model-00003-of-00003.safetensors',
'model.safetensors.index.json'
]
for file in files:
print(f"\nUploading {file}...")
try:
api.upload_file(
path_or_fileobj=file,
path_in_repo=file,
repo_id=repo_id,
repo_type='model'
)
print(f"✓ {file} uploaded successfully!")
except Exception as e:
print(f"✗ Error uploading {file}: {e}")
print("\n" + "="*60)
print(f"✓ All safetensors uploaded!")
print(f"View your model: https://huggingface.co/{repo_id}")
print("="*60)
print("\n⚠️ WARNING: These are RANDOM weights!")
print("The model structure is correct but it won't generate meaningful text.")
print("You need to train or fine-tune to get working weights.")
# Cleanup
import os
for file in files:
if os.path.exists(file):
os.remove(file)
print("\n✓ Local files cleaned up")