import json from datasets import load_dataset, Dataset from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments ) from peft import LoraConfig from trl import SFTTrainer # ---------- 1. Load rubpy dataset ---------- with open("rubpy_full_dataset.json", encoding="utf-8") as f: rubpy_data = json.load(f) rubpy_dataset = Dataset.from_list([ { "text": f"""### Instruction: {item['instruction']} ### Response: {item['output']}""" } for item in rubpy_data ]) # ---------- 2. Load public code dataset ---------- public_dataset = load_dataset( "deepmind/code_contests", split="train" ) public_dataset = public_dataset.map(lambda x: { "text": f"""### Instruction: مسئله برنامه‌نویسی را حل کن: {x['description']} ### Response: {x['solution']}""" }) # ---------- 3. Combine datasets ---------- final_dataset = rubpy_dataset.concatenate( public_dataset.shuffle(seed=42).select(range(len(rubpy_dataset))) ) # ---------- 4. Model ---------- MODEL_NAME = "Qwen/Qwen2.5-Coder-7B-Instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, load_in_4bit=True, device_map="auto" ) # ---------- 5. LoRA ---------- lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) # ---------- 6. Training ---------- training_args = TrainingArguments( output_dir="./rubpy-model", per_device_train_batch_size=1, gradient_accumulation_steps=8, learning_rate=2e-4, num_train_epochs=3, logging_steps=10, save_steps=500, save_total_limit=2, bf16=True, report_to="none" ) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=final_dataset, peft_config=lora_config, args=training_args, max_seq_length=2048 ) trainer.train() trainer.save_model("./rubpy-model")