Mineral-7B / create_multiple_safetensors.py

Create create_multiple_safetensors.py

0d9ff8c verified 21 days ago

6.27 kB

	from safetensors.torch import save_file
	import torch
	from huggingface_hub import HfApi
	import json

	# DEIN TOKEN HIER EINFÜGEN
	TOKEN = 'DEIN_TOKEN_HIER'

	repo_id = 'Luke-Bergen/Mineral-7B'

	print("Creating safetensors files for Mineral-7B (7B parameters)...")
	print("This creates RANDOM weights - model won't work but structure is correct!\n")

	# Shard 1: Embedding + erste 10 Layers (~4.8 GB)
	print("Creating model-00001-of-00003.safetensors...")
	shard1 = {
	'model.embed_tokens.weight': torch.randn(32000, 4096, dtype=torch.bfloat16),
	}

	# Layers 0-10
	for i in range(11):
	shard1.update({
	f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
	f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	})

	save_file(shard1, 'model-00001-of-00003.safetensors')
	print(f"✓ Shard 1 created ({len(shard1)} tensors)")

	# Shard 2: Layers 11-21 (~4.8 GB)
	print("Creating model-00002-of-00003.safetensors...")
	shard2 = {}
	for i in range(11, 22):
	shard2.update({
	f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
	f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	})

	save_file(shard2, 'model-00002-of-00003.safetensors')
	print(f"✓ Shard 2 created ({len(shard2)} tensors)")

	# Shard 3: Layers 22-31 + Vision + Memory + LM Head (~4.7 GB)
	print("Creating model-00003-of-00003.safetensors...")
	shard3 = {}
	for i in range(22, 32):
	shard3.update({
	f'model.layers.{i}.self_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.v_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.self_attn.o_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.gate_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.up_proj.weight': torch.randn(11008, 4096, dtype=torch.bfloat16),
	f'model.layers.{i}.mlp.down_proj.weight': torch.randn(4096, 11008, dtype=torch.bfloat16),
	f'model.layers.{i}.input_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	f'model.layers.{i}.post_attention_layernorm.weight': torch.randn(4096, dtype=torch.bfloat16),
	})

	# Final layers
	shard3.update({
	'model.norm.weight': torch.randn(4096, dtype=torch.bfloat16),
	'lm_head.weight': torch.randn(32000, 4096, dtype=torch.bfloat16),
	# Vision encoder
	'vision_encoder.embeddings.patch_embedding.weight': torch.randn(1024, 3, 14, 14, dtype=torch.bfloat16),
	'vision_encoder.embeddings.position_embedding.weight': torch.randn(257, 1024, dtype=torch.bfloat16),
	'vision_projection.weight': torch.randn(4096, 1024, dtype=torch.bfloat16),
	# Memory layers
	'memory_layers.0.memory_attn.q_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	'memory_layers.0.memory_attn.k_proj.weight': torch.randn(4096, 4096, dtype=torch.bfloat16),
	})

	save_file(shard3, 'model-00003-of-00003.safetensors')
	print(f"✓ Shard 3 created ({len(shard3)} tensors)")

	# Create index
	print("\nCreating model.safetensors.index.json...")
	weight_map = {}

	for key in shard1.keys():
	weight_map[key] = "model-00001-of-00003.safetensors"
	for key in shard2.keys():
	weight_map[key] = "model-00002-of-00003.safetensors"
	for key in shard3.keys():
	weight_map[key] = "model-00003-of-00003.safetensors"

	index = {
	"metadata": {
	"total_size": 14336204800
	},
	"weight_map": weight_map
	}

	with open('model.safetensors.index.json', 'w') as f:
	json.dump(index, f, indent=2)
	print("✓ Index created")

	# Upload all files
	print("\n" + "="*60)
	print("Uploading to Hugging Face...")
	print("="*60)

	api = HfApi(token=TOKEN)

	files = [
	'model-00001-of-00003.safetensors',
	'model-00002-of-00003.safetensors',
	'model-00003-of-00003.safetensors',
	'model.safetensors.index.json'
	]

	for file in files:
	print(f"\nUploading {file}...")
	try:
	api.upload_file(
	path_or_fileobj=file,
	path_in_repo=file,
	repo_id=repo_id,
	repo_type='model'
	)
	print(f"✓ {file} uploaded successfully!")
	except Exception as e:
	print(f"✗ Error uploading {file}: {e}")

	print("\n" + "="*60)
	print(f"✓ All safetensors uploaded!")
	print(f"View your model: https://huggingface.co/{repo_id}")
	print("="*60)
	print("\n⚠️ WARNING: These are RANDOM weights!")
	print("The model structure is correct but it won't generate meaningful text.")
	print("You need to train or fine-tune to get working weights.")

	# Cleanup
	import os
	for file in files:
	if os.path.exists(file):
	os.remove(file)
	print("\n✓ Local files cleaned up")