|
|
from safetensors.torch import save_file |
|
|
import torch |
|
|
from huggingface_hub import HfApi |
|
|
import json |
|
|
|
|
|
|
|
|
TOKEN = 'DEIN_TOKEN_HIER' |
|
|
|
|
|
repo_id = 'Luke-Bergen/Mineral-7B' |
|
|
|
|
|
print("Creating safetensors files for Mineral-7B (7B parameters)...") |
|
|
print("This creates RANDOM weights - model won't work but structure is correct!\n") |
|
|
|
|
|
|
|
|
print("Creating model-00001-of-00003.safetensors...") |
|
|
shard1 = { |
|
|
'model.embed_tokens.weight': torch.randn(32000, 4096, dtype=torch.bfloat16), |
|
|
} |
|
|
|
|
|
|
|
|
for i in range(11): |
|
|
shard1.update({ |
|
|
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
}) |
|
|
|
|
|
save_file(shard1, 'model-00001-of-00003.safetensors') |
|
|
print(f"✓ Shard 1 created ({len(shard1)} tensors)") |
|
|
|
|
|
|
|
|
print("Creating model-00002-of-00003.safetensors...") |
|
|
shard2 = {} |
|
|
for i in range(11, 22): |
|
|
shard2.update({ |
|
|
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
}) |
|
|
|
|
|
save_file(shard2, 'model-00002-of-00003.safetensors') |
|
|
print(f"✓ Shard 2 created ({len(shard2)} tensors)") |
|
|
|
|
|
|
|
|
print("Creating model-00003-of-00003.safetensors...") |
|
|
shard3 = {} |
|
|
for i in range(22, 32): |
|
|
shard3.update({ |
|
|
f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
}) |
|
|
|
|
|
|
|
|
shard3.update({ |
|
|
'model.norm.weight': torch.randn(4096, dtype=torch.bfloat16), |
|
|
'lm_head.weight': torch.randn(32000, 4096, dtype=torch.bfloat16), |
|
|
|
|
|
'vision_encoder.embeddings.patch_embedding.weight': torch.randn(1024, 3, 14, 14, dtype=torch.bfloat16), |
|
|
'vision_encoder.embeddings.position_embedding.weight': torch.randn(257, 1024, dtype=torch.bfloat16), |
|
|
'vision_projection.weight': torch.randn(4096, 1024, dtype=torch.bfloat16), |
|
|
|
|
|
'memory_layers.0.memory_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
'memory_layers.0.memory_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16), |
|
|
}) |
|
|
|
|
|
save_file(shard3, 'model-00003-of-00003.safetensors') |
|
|
print(f"✓ Shard 3 created ({len(shard3)} tensors)") |
|
|
|
|
|
|
|
|
print("\nCreating model.safetensors.index.json...") |
|
|
weight_map = {} |
|
|
|
|
|
for key in shard1.keys(): |
|
|
weight_map[key] = "model-00001-of-00003.safetensors" |
|
|
for key in shard2.keys(): |
|
|
weight_map[key] = "model-00002-of-00003.safetensors" |
|
|
for key in shard3.keys(): |
|
|
weight_map[key] = "model-00003-of-00003.safetensors" |
|
|
|
|
|
index = { |
|
|
"metadata": { |
|
|
"total_size": 14336204800 |
|
|
}, |
|
|
"weight_map": weight_map |
|
|
} |
|
|
|
|
|
with open('model.safetensors.index.json', 'w') as f: |
|
|
json.dump(index, f, indent=2) |
|
|
print("✓ Index created") |
|
|
|
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("Uploading to Hugging Face...") |
|
|
print("="*60) |
|
|
|
|
|
api = HfApi(token=TOKEN) |
|
|
|
|
|
files = [ |
|
|
'model-00001-of-00003.safetensors', |
|
|
'model-00002-of-00003.safetensors', |
|
|
'model-00003-of-00003.safetensors', |
|
|
'model.safetensors.index.json' |
|
|
] |
|
|
|
|
|
for file in files: |
|
|
print(f"\nUploading {file}...") |
|
|
try: |
|
|
api.upload_file( |
|
|
path_or_fileobj=file, |
|
|
path_in_repo=file, |
|
|
repo_id=repo_id, |
|
|
repo_type='model' |
|
|
) |
|
|
print(f"✓ {file} uploaded successfully!") |
|
|
except Exception as e: |
|
|
print(f"✗ Error uploading {file}: {e}") |
|
|
|
|
|
print("\n" + "="*60) |
|
|
print(f"✓ All safetensors uploaded!") |
|
|
print(f"View your model: https://huggingface.co/{repo_id}") |
|
|
print("="*60) |
|
|
print("\n⚠️ WARNING: These are RANDOM weights!") |
|
|
print("The model structure is correct but it won't generate meaningful text.") |
|
|
print("You need to train or fine-tune to get working weights.") |
|
|
|
|
|
|
|
|
import os |
|
|
for file in files: |
|
|
if os.path.exists(file): |
|
|
os.remove(file) |
|
|
print("\n✓ Local files cleaned up") |