Spaces:
Build error
Build error
ref code from novel-translation
Browse files- .gitattributes +7 -0
- competition/01_EDA.ipynb +0 -0
- config/qwen2_0.5b_lora_sft.yaml +39 -0
- config/qwen2_1.5b_lora_sft.yaml +39 -0
- config/qwen2_7b_lora_sft.yaml +39 -0
- data/alpaca_mac.json +0 -0
- data/dataset_info.json +568 -0
- datasets/mgtv/dev.csv +3 -0
- datasets/mgtv/test_a.csv +3 -0
- datasets/mgtv/train.csv +3 -0
- llama-factory/config/qwen2_0.5b_lora_sft.yaml +39 -0
- llama-factory/config/qwen2_1.5b_lora_sft.yaml +39 -0
- llama-factory/config/qwen2_7b_lora_sft.yaml +39 -0
- llama-factory/data/alpaca_mac.json +0 -0
- llama-factory/data/dataset_info.json +568 -0
- notebooks/01_Finetune-Llama3-with-LLaMA-Factory.ipynb +1 -0
- novel-translation/00_Data_Analysis.ipynb +0 -0
- novel-translation/01_Qwen2-0.5B_Unsloth.ipynb +0 -0
- novel-translation/02_Qwen2-1.5B_Unsloth.ipynb +0 -0
- novel-translation/03_Qwen2-0.5B_1.5B-4bit.ipynb +0 -0
- novel-translation/04_tune-small-no-flash-attn.ipynb +0 -0
- novel-translation/05_tune-small-with-flash-attn.ipynb +0 -0
- novel-translation/06_tune-small-py3.11.ipynb +0 -0
- novel-translation/07_tune-lf-py3.11.ipynb +0 -0
- novel-translation/08_eval-lf-py3.11.ipynb +0 -0
- requirements.txt +2 -2
- results/mac-results-colab.csv +0 -0
- results/mac-results-colab.gsheet +3 -0
- results/mac-results_lf.csv +3 -0
- scripts/tune-lf.sh +8 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
datasets/mgtv/ filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
datasets/mgtv/dev.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
datasets/mgtv/test_a.csv filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
datasets/mgtv/train.csv filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
results/mac-results-colab.csv filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
results/mac-results-colab.gsheet filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
results/mac-results_lf.csv filter=lfs diff=lfs merge=lfs -text
|
competition/01_EDA.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
config/qwen2_0.5b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
config/qwen2_1.5b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
config/qwen2_7b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-7b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
data/alpaca_mac.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/dataset_info.json
ADDED
|
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpaca_mac": {
|
| 3 |
+
"file_name": "alpaca_mac.json"
|
| 4 |
+
},
|
| 5 |
+
"identity": {
|
| 6 |
+
"file_name": "identity.json"
|
| 7 |
+
},
|
| 8 |
+
"alpaca_en_demo": {
|
| 9 |
+
"file_name": "alpaca_en_demo.json"
|
| 10 |
+
},
|
| 11 |
+
"alpaca_zh_demo": {
|
| 12 |
+
"file_name": "alpaca_zh_demo.json"
|
| 13 |
+
},
|
| 14 |
+
"glaive_toolcall_en_demo": {
|
| 15 |
+
"file_name": "glaive_toolcall_en_demo.json",
|
| 16 |
+
"formatting": "sharegpt",
|
| 17 |
+
"columns": {
|
| 18 |
+
"messages": "conversations",
|
| 19 |
+
"tools": "tools"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"glaive_toolcall_zh_demo": {
|
| 23 |
+
"file_name": "glaive_toolcall_zh_demo.json",
|
| 24 |
+
"formatting": "sharegpt",
|
| 25 |
+
"columns": {
|
| 26 |
+
"messages": "conversations",
|
| 27 |
+
"tools": "tools"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"mllm_demo": {
|
| 31 |
+
"file_name": "mllm_demo.json",
|
| 32 |
+
"formatting": "sharegpt",
|
| 33 |
+
"columns": {
|
| 34 |
+
"messages": "messages",
|
| 35 |
+
"images": "images"
|
| 36 |
+
},
|
| 37 |
+
"tags": {
|
| 38 |
+
"role_tag": "role",
|
| 39 |
+
"content_tag": "content",
|
| 40 |
+
"user_tag": "user",
|
| 41 |
+
"assistant_tag": "assistant"
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"alpaca_en": {
|
| 45 |
+
"hf_hub_url": "llamafactory/alpaca_en",
|
| 46 |
+
"ms_hub_url": "llamafactory/alpaca_en"
|
| 47 |
+
},
|
| 48 |
+
"alpaca_zh": {
|
| 49 |
+
"hf_hub_url": "llamafactory/alpaca_zh",
|
| 50 |
+
"ms_hub_url": "llamafactory/alpaca_zh"
|
| 51 |
+
},
|
| 52 |
+
"alpaca_gpt4_en": {
|
| 53 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_en",
|
| 54 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_en"
|
| 55 |
+
},
|
| 56 |
+
"alpaca_gpt4_zh": {
|
| 57 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
| 58 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
|
| 59 |
+
},
|
| 60 |
+
"glaive_toolcall_en": {
|
| 61 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
| 62 |
+
"formatting": "sharegpt",
|
| 63 |
+
"columns": {
|
| 64 |
+
"messages": "conversations",
|
| 65 |
+
"tools": "tools"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"glaive_toolcall_zh": {
|
| 69 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_zh",
|
| 70 |
+
"formatting": "sharegpt",
|
| 71 |
+
"columns": {
|
| 72 |
+
"messages": "conversations",
|
| 73 |
+
"tools": "tools"
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
"lima": {
|
| 77 |
+
"hf_hub_url": "llamafactory/lima",
|
| 78 |
+
"formatting": "sharegpt"
|
| 79 |
+
},
|
| 80 |
+
"guanaco": {
|
| 81 |
+
"hf_hub_url": "JosephusCheung/GuanacoDataset",
|
| 82 |
+
"ms_hub_url": "AI-ModelScope/GuanacoDataset"
|
| 83 |
+
},
|
| 84 |
+
"belle_2m": {
|
| 85 |
+
"hf_hub_url": "BelleGroup/train_2M_CN",
|
| 86 |
+
"ms_hub_url": "AI-ModelScope/train_2M_CN"
|
| 87 |
+
},
|
| 88 |
+
"belle_1m": {
|
| 89 |
+
"hf_hub_url": "BelleGroup/train_1M_CN",
|
| 90 |
+
"ms_hub_url": "AI-ModelScope/train_1M_CN"
|
| 91 |
+
},
|
| 92 |
+
"belle_0.5m": {
|
| 93 |
+
"hf_hub_url": "BelleGroup/train_0.5M_CN",
|
| 94 |
+
"ms_hub_url": "AI-ModelScope/train_0.5M_CN"
|
| 95 |
+
},
|
| 96 |
+
"belle_dialog": {
|
| 97 |
+
"hf_hub_url": "BelleGroup/generated_chat_0.4M",
|
| 98 |
+
"ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
|
| 99 |
+
},
|
| 100 |
+
"belle_math": {
|
| 101 |
+
"hf_hub_url": "BelleGroup/school_math_0.25M",
|
| 102 |
+
"ms_hub_url": "AI-ModelScope/school_math_0.25M"
|
| 103 |
+
},
|
| 104 |
+
"belle_multiturn": {
|
| 105 |
+
"script_url": "belle_multiturn",
|
| 106 |
+
"formatting": "sharegpt"
|
| 107 |
+
},
|
| 108 |
+
"ultra_chat": {
|
| 109 |
+
"script_url": "ultra_chat",
|
| 110 |
+
"formatting": "sharegpt"
|
| 111 |
+
},
|
| 112 |
+
"open_platypus": {
|
| 113 |
+
"hf_hub_url": "garage-bAInd/Open-Platypus",
|
| 114 |
+
"ms_hub_url": "AI-ModelScope/Open-Platypus"
|
| 115 |
+
},
|
| 116 |
+
"codealpaca": {
|
| 117 |
+
"hf_hub_url": "sahil2801/CodeAlpaca-20k",
|
| 118 |
+
"ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
|
| 119 |
+
},
|
| 120 |
+
"alpaca_cot": {
|
| 121 |
+
"hf_hub_url": "QingyiSi/Alpaca-CoT",
|
| 122 |
+
"ms_hub_url": "AI-ModelScope/Alpaca-CoT"
|
| 123 |
+
},
|
| 124 |
+
"openorca": {
|
| 125 |
+
"hf_hub_url": "Open-Orca/OpenOrca",
|
| 126 |
+
"ms_hub_url": "AI-ModelScope/OpenOrca",
|
| 127 |
+
"columns": {
|
| 128 |
+
"prompt": "question",
|
| 129 |
+
"response": "response",
|
| 130 |
+
"system": "system_prompt"
|
| 131 |
+
}
|
| 132 |
+
},
|
| 133 |
+
"slimorca": {
|
| 134 |
+
"hf_hub_url": "Open-Orca/SlimOrca",
|
| 135 |
+
"formatting": "sharegpt"
|
| 136 |
+
},
|
| 137 |
+
"mathinstruct": {
|
| 138 |
+
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
| 139 |
+
"ms_hub_url": "AI-ModelScope/MathInstruct",
|
| 140 |
+
"columns": {
|
| 141 |
+
"prompt": "instruction",
|
| 142 |
+
"response": "output"
|
| 143 |
+
}
|
| 144 |
+
},
|
| 145 |
+
"firefly": {
|
| 146 |
+
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
| 147 |
+
"columns": {
|
| 148 |
+
"prompt": "input",
|
| 149 |
+
"response": "target"
|
| 150 |
+
}
|
| 151 |
+
},
|
| 152 |
+
"wikiqa": {
|
| 153 |
+
"hf_hub_url": "wiki_qa",
|
| 154 |
+
"columns": {
|
| 155 |
+
"prompt": "question",
|
| 156 |
+
"response": "answer"
|
| 157 |
+
}
|
| 158 |
+
},
|
| 159 |
+
"webqa": {
|
| 160 |
+
"hf_hub_url": "suolyer/webqa",
|
| 161 |
+
"ms_hub_url": "AI-ModelScope/webqa",
|
| 162 |
+
"columns": {
|
| 163 |
+
"prompt": "input",
|
| 164 |
+
"response": "output"
|
| 165 |
+
}
|
| 166 |
+
},
|
| 167 |
+
"webnovel": {
|
| 168 |
+
"hf_hub_url": "zxbsmk/webnovel_cn",
|
| 169 |
+
"ms_hub_url": "AI-ModelScope/webnovel_cn"
|
| 170 |
+
},
|
| 171 |
+
"nectar_sft": {
|
| 172 |
+
"hf_hub_url": "AstraMindAI/SFT-Nectar",
|
| 173 |
+
"ms_hub_url": "AI-ModelScope/SFT-Nectar"
|
| 174 |
+
},
|
| 175 |
+
"deepctrl": {
|
| 176 |
+
"ms_hub_url": "deepctrl/deepctrl-sft-data"
|
| 177 |
+
},
|
| 178 |
+
"adgen": {
|
| 179 |
+
"hf_hub_url": "HasturOfficial/adgen",
|
| 180 |
+
"ms_hub_url": "AI-ModelScope/adgen",
|
| 181 |
+
"columns": {
|
| 182 |
+
"prompt": "content",
|
| 183 |
+
"response": "summary"
|
| 184 |
+
}
|
| 185 |
+
},
|
| 186 |
+
"sharegpt_hyper": {
|
| 187 |
+
"hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
|
| 188 |
+
"formatting": "sharegpt"
|
| 189 |
+
},
|
| 190 |
+
"sharegpt4": {
|
| 191 |
+
"hf_hub_url": "shibing624/sharegpt_gpt4",
|
| 192 |
+
"ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
|
| 193 |
+
"formatting": "sharegpt"
|
| 194 |
+
},
|
| 195 |
+
"ultrachat_200k": {
|
| 196 |
+
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
|
| 197 |
+
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
|
| 198 |
+
"formatting": "sharegpt",
|
| 199 |
+
"columns": {
|
| 200 |
+
"messages": "messages"
|
| 201 |
+
},
|
| 202 |
+
"tags": {
|
| 203 |
+
"role_tag": "role",
|
| 204 |
+
"content_tag": "content",
|
| 205 |
+
"user_tag": "user",
|
| 206 |
+
"assistant_tag": "assistant"
|
| 207 |
+
}
|
| 208 |
+
},
|
| 209 |
+
"agent_instruct": {
|
| 210 |
+
"hf_hub_url": "THUDM/AgentInstruct",
|
| 211 |
+
"ms_hub_url": "ZhipuAI/AgentInstruct",
|
| 212 |
+
"formatting": "sharegpt"
|
| 213 |
+
},
|
| 214 |
+
"lmsys_chat": {
|
| 215 |
+
"hf_hub_url": "lmsys/lmsys-chat-1m",
|
| 216 |
+
"ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
|
| 217 |
+
"formatting": "sharegpt",
|
| 218 |
+
"columns": {
|
| 219 |
+
"messages": "conversation"
|
| 220 |
+
},
|
| 221 |
+
"tags": {
|
| 222 |
+
"role_tag": "role",
|
| 223 |
+
"content_tag": "content",
|
| 224 |
+
"user_tag": "human",
|
| 225 |
+
"assistant_tag": "assistant"
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"evol_instruct": {
|
| 229 |
+
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
| 230 |
+
"ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
|
| 231 |
+
"formatting": "sharegpt"
|
| 232 |
+
},
|
| 233 |
+
"glaive_toolcall_100k": {
|
| 234 |
+
"hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
|
| 235 |
+
"formatting": "sharegpt",
|
| 236 |
+
"columns": {
|
| 237 |
+
"messages": "conversations",
|
| 238 |
+
"tools": "tools"
|
| 239 |
+
}
|
| 240 |
+
},
|
| 241 |
+
"cosmopedia": {
|
| 242 |
+
"hf_hub_url": "HuggingFaceTB/cosmopedia",
|
| 243 |
+
"columns": {
|
| 244 |
+
"prompt": "prompt",
|
| 245 |
+
"response": "text"
|
| 246 |
+
}
|
| 247 |
+
},
|
| 248 |
+
"stem_zh": {
|
| 249 |
+
"hf_hub_url": "hfl/stem_zh_instruction"
|
| 250 |
+
},
|
| 251 |
+
"ruozhiba_gpt4": {
|
| 252 |
+
"hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
|
| 253 |
+
},
|
| 254 |
+
"neo_sft": {
|
| 255 |
+
"hf_hub_url": "m-a-p/neo_sft_phase2",
|
| 256 |
+
"formatting": "sharegpt"
|
| 257 |
+
},
|
| 258 |
+
"magpie_pro_300k": {
|
| 259 |
+
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
|
| 260 |
+
"formatting": "sharegpt"
|
| 261 |
+
},
|
| 262 |
+
"web_instruct": {
|
| 263 |
+
"hf_hub_url": "TIGER-Lab/WebInstructSub",
|
| 264 |
+
"columns": {
|
| 265 |
+
"prompt": "question",
|
| 266 |
+
"response": "answer"
|
| 267 |
+
}
|
| 268 |
+
},
|
| 269 |
+
"llava_1k_en": {
|
| 270 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
| 271 |
+
"subset": "en",
|
| 272 |
+
"formatting": "sharegpt",
|
| 273 |
+
"columns": {
|
| 274 |
+
"messages": "messages",
|
| 275 |
+
"images": "images"
|
| 276 |
+
},
|
| 277 |
+
"tags": {
|
| 278 |
+
"role_tag": "role",
|
| 279 |
+
"content_tag": "content",
|
| 280 |
+
"user_tag": "user",
|
| 281 |
+
"assistant_tag": "assistant"
|
| 282 |
+
}
|
| 283 |
+
},
|
| 284 |
+
"llava_1k_zh": {
|
| 285 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
| 286 |
+
"subset": "zh",
|
| 287 |
+
"formatting": "sharegpt",
|
| 288 |
+
"columns": {
|
| 289 |
+
"messages": "messages",
|
| 290 |
+
"images": "images"
|
| 291 |
+
},
|
| 292 |
+
"tags": {
|
| 293 |
+
"role_tag": "role",
|
| 294 |
+
"content_tag": "content",
|
| 295 |
+
"user_tag": "user",
|
| 296 |
+
"assistant_tag": "assistant"
|
| 297 |
+
}
|
| 298 |
+
},
|
| 299 |
+
"llava_150k_en": {
|
| 300 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
| 301 |
+
"subset": "en",
|
| 302 |
+
"formatting": "sharegpt",
|
| 303 |
+
"columns": {
|
| 304 |
+
"messages": "messages",
|
| 305 |
+
"images": "images"
|
| 306 |
+
},
|
| 307 |
+
"tags": {
|
| 308 |
+
"role_tag": "role",
|
| 309 |
+
"content_tag": "content",
|
| 310 |
+
"user_tag": "user",
|
| 311 |
+
"assistant_tag": "assistant"
|
| 312 |
+
}
|
| 313 |
+
},
|
| 314 |
+
"llava_150k_zh": {
|
| 315 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
| 316 |
+
"subset": "zh",
|
| 317 |
+
"formatting": "sharegpt",
|
| 318 |
+
"columns": {
|
| 319 |
+
"messages": "messages",
|
| 320 |
+
"images": "images"
|
| 321 |
+
},
|
| 322 |
+
"tags": {
|
| 323 |
+
"role_tag": "role",
|
| 324 |
+
"content_tag": "content",
|
| 325 |
+
"user_tag": "user",
|
| 326 |
+
"assistant_tag": "assistant"
|
| 327 |
+
}
|
| 328 |
+
},
|
| 329 |
+
"mllm_pt_demo": {
|
| 330 |
+
"hf_hub_url": "BUAADreamer/mllm_pt_demo",
|
| 331 |
+
"formatting": "sharegpt",
|
| 332 |
+
"columns": {
|
| 333 |
+
"messages": "messages",
|
| 334 |
+
"images": "images"
|
| 335 |
+
},
|
| 336 |
+
"tags": {
|
| 337 |
+
"role_tag": "role",
|
| 338 |
+
"content_tag": "content",
|
| 339 |
+
"user_tag": "user",
|
| 340 |
+
"assistant_tag": "assistant"
|
| 341 |
+
}
|
| 342 |
+
},
|
| 343 |
+
"oasst_de": {
|
| 344 |
+
"hf_hub_url": "mayflowergmbh/oasst_de"
|
| 345 |
+
},
|
| 346 |
+
"dolly_15k_de": {
|
| 347 |
+
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
| 348 |
+
},
|
| 349 |
+
"alpaca-gpt4_de": {
|
| 350 |
+
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
| 351 |
+
},
|
| 352 |
+
"openschnabeltier_de": {
|
| 353 |
+
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
| 354 |
+
},
|
| 355 |
+
"evol_instruct_de": {
|
| 356 |
+
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
| 357 |
+
},
|
| 358 |
+
"dolphin_de": {
|
| 359 |
+
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
| 360 |
+
},
|
| 361 |
+
"booksum_de": {
|
| 362 |
+
"hf_hub_url": "mayflowergmbh/booksum_de"
|
| 363 |
+
},
|
| 364 |
+
"airoboros_de": {
|
| 365 |
+
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
| 366 |
+
},
|
| 367 |
+
"ultrachat_de": {
|
| 368 |
+
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
| 369 |
+
},
|
| 370 |
+
"dpo_en_demo": {
|
| 371 |
+
"file_name": "dpo_en_demo.json",
|
| 372 |
+
"ranking": true,
|
| 373 |
+
"formatting": "sharegpt",
|
| 374 |
+
"columns": {
|
| 375 |
+
"messages": "conversations",
|
| 376 |
+
"chosen": "chosen",
|
| 377 |
+
"rejected": "rejected"
|
| 378 |
+
}
|
| 379 |
+
},
|
| 380 |
+
"dpo_zh_demo": {
|
| 381 |
+
"file_name": "dpo_zh_demo.json",
|
| 382 |
+
"ranking": true,
|
| 383 |
+
"formatting": "sharegpt",
|
| 384 |
+
"columns": {
|
| 385 |
+
"messages": "conversations",
|
| 386 |
+
"chosen": "chosen",
|
| 387 |
+
"rejected": "rejected"
|
| 388 |
+
}
|
| 389 |
+
},
|
| 390 |
+
"dpo_mix_en": {
|
| 391 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
| 392 |
+
"subset": "en",
|
| 393 |
+
"ranking": true,
|
| 394 |
+
"formatting": "sharegpt",
|
| 395 |
+
"columns": {
|
| 396 |
+
"messages": "conversations",
|
| 397 |
+
"chosen": "chosen",
|
| 398 |
+
"rejected": "rejected"
|
| 399 |
+
}
|
| 400 |
+
},
|
| 401 |
+
"dpo_mix_zh": {
|
| 402 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
| 403 |
+
"subset": "zh",
|
| 404 |
+
"ranking": true,
|
| 405 |
+
"formatting": "sharegpt",
|
| 406 |
+
"columns": {
|
| 407 |
+
"messages": "conversations",
|
| 408 |
+
"chosen": "chosen",
|
| 409 |
+
"rejected": "rejected"
|
| 410 |
+
}
|
| 411 |
+
},
|
| 412 |
+
"ultrafeedback": {
|
| 413 |
+
"hf_hub_url": "llamafactory/ultrafeedback_binarized",
|
| 414 |
+
"ms_hub_url": "llamafactory/ultrafeedback_binarized",
|
| 415 |
+
"ranking": true,
|
| 416 |
+
"columns": {
|
| 417 |
+
"prompt": "instruction",
|
| 418 |
+
"chosen": "chosen",
|
| 419 |
+
"rejected": "rejected"
|
| 420 |
+
}
|
| 421 |
+
},
|
| 422 |
+
"orca_pairs": {
|
| 423 |
+
"hf_hub_url": "Intel/orca_dpo_pairs",
|
| 424 |
+
"ranking": true,
|
| 425 |
+
"columns": {
|
| 426 |
+
"prompt": "question",
|
| 427 |
+
"chosen": "chosen",
|
| 428 |
+
"rejected": "rejected",
|
| 429 |
+
"system": "system"
|
| 430 |
+
}
|
| 431 |
+
},
|
| 432 |
+
"hh_rlhf_en": {
|
| 433 |
+
"script_url": "hh_rlhf_en",
|
| 434 |
+
"ranking": true,
|
| 435 |
+
"columns": {
|
| 436 |
+
"prompt": "instruction",
|
| 437 |
+
"chosen": "chosen",
|
| 438 |
+
"rejected": "rejected",
|
| 439 |
+
"history": "history"
|
| 440 |
+
}
|
| 441 |
+
},
|
| 442 |
+
"nectar_rm": {
|
| 443 |
+
"hf_hub_url": "AstraMindAI/RLAIF-Nectar",
|
| 444 |
+
"ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
|
| 445 |
+
"ranking": true
|
| 446 |
+
},
|
| 447 |
+
"orca_dpo_de": {
|
| 448 |
+
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
| 449 |
+
"ranking": true
|
| 450 |
+
},
|
| 451 |
+
"kto_en_demo": {
|
| 452 |
+
"file_name": "kto_en_demo.json",
|
| 453 |
+
"formatting": "sharegpt",
|
| 454 |
+
"columns": {
|
| 455 |
+
"messages": "messages",
|
| 456 |
+
"kto_tag": "label"
|
| 457 |
+
},
|
| 458 |
+
"tags": {
|
| 459 |
+
"role_tag": "role",
|
| 460 |
+
"content_tag": "content",
|
| 461 |
+
"user_tag": "user",
|
| 462 |
+
"assistant_tag": "assistant"
|
| 463 |
+
}
|
| 464 |
+
},
|
| 465 |
+
"kto_mix_en": {
|
| 466 |
+
"hf_hub_url": "argilla/kto-mix-15k",
|
| 467 |
+
"formatting": "sharegpt",
|
| 468 |
+
"columns": {
|
| 469 |
+
"messages": "completion",
|
| 470 |
+
"kto_tag": "label"
|
| 471 |
+
},
|
| 472 |
+
"tags": {
|
| 473 |
+
"role_tag": "role",
|
| 474 |
+
"content_tag": "content",
|
| 475 |
+
"user_tag": "user",
|
| 476 |
+
"assistant_tag": "assistant"
|
| 477 |
+
}
|
| 478 |
+
},
|
| 479 |
+
"ultrafeedback_kto": {
|
| 480 |
+
"hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
|
| 481 |
+
"ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
|
| 482 |
+
"columns": {
|
| 483 |
+
"prompt": "prompt",
|
| 484 |
+
"response": "completion",
|
| 485 |
+
"kto_tag": "label"
|
| 486 |
+
}
|
| 487 |
+
},
|
| 488 |
+
"wiki_demo": {
|
| 489 |
+
"file_name": "wiki_demo.txt",
|
| 490 |
+
"columns": {
|
| 491 |
+
"prompt": "text"
|
| 492 |
+
}
|
| 493 |
+
},
|
| 494 |
+
"c4_demo": {
|
| 495 |
+
"file_name": "c4_demo.json",
|
| 496 |
+
"columns": {
|
| 497 |
+
"prompt": "text"
|
| 498 |
+
}
|
| 499 |
+
},
|
| 500 |
+
"refinedweb": {
|
| 501 |
+
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
| 502 |
+
"columns": {
|
| 503 |
+
"prompt": "content"
|
| 504 |
+
}
|
| 505 |
+
},
|
| 506 |
+
"redpajama_v2": {
|
| 507 |
+
"hf_hub_url": "togethercomputer/RedPajama-Data-V2",
|
| 508 |
+
"columns": {
|
| 509 |
+
"prompt": "raw_content"
|
| 510 |
+
},
|
| 511 |
+
"subset": "default"
|
| 512 |
+
},
|
| 513 |
+
"wikipedia_en": {
|
| 514 |
+
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
| 515 |
+
"ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
|
| 516 |
+
"columns": {
|
| 517 |
+
"prompt": "text"
|
| 518 |
+
}
|
| 519 |
+
},
|
| 520 |
+
"wikipedia_zh": {
|
| 521 |
+
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
| 522 |
+
"ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
|
| 523 |
+
"columns": {
|
| 524 |
+
"prompt": "completion"
|
| 525 |
+
}
|
| 526 |
+
},
|
| 527 |
+
"pile": {
|
| 528 |
+
"hf_hub_url": "monology/pile-uncopyrighted",
|
| 529 |
+
"ms_hub_url": "AI-ModelScope/pile",
|
| 530 |
+
"columns": {
|
| 531 |
+
"prompt": "text"
|
| 532 |
+
}
|
| 533 |
+
},
|
| 534 |
+
"skypile": {
|
| 535 |
+
"hf_hub_url": "Skywork/SkyPile-150B",
|
| 536 |
+
"ms_hub_url": "AI-ModelScope/SkyPile-150B",
|
| 537 |
+
"columns": {
|
| 538 |
+
"prompt": "text"
|
| 539 |
+
}
|
| 540 |
+
},
|
| 541 |
+
"fineweb": {
|
| 542 |
+
"hf_hub_url": "HuggingFaceFW/fineweb",
|
| 543 |
+
"columns": {
|
| 544 |
+
"prompt": "text"
|
| 545 |
+
}
|
| 546 |
+
},
|
| 547 |
+
"fineweb_edu": {
|
| 548 |
+
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
| 549 |
+
"columns": {
|
| 550 |
+
"prompt": "text"
|
| 551 |
+
}
|
| 552 |
+
},
|
| 553 |
+
"the_stack": {
|
| 554 |
+
"hf_hub_url": "bigcode/the-stack",
|
| 555 |
+
"ms_hub_url": "AI-ModelScope/the-stack",
|
| 556 |
+
"columns": {
|
| 557 |
+
"prompt": "content"
|
| 558 |
+
}
|
| 559 |
+
},
|
| 560 |
+
"starcoder_python": {
|
| 561 |
+
"hf_hub_url": "bigcode/starcoderdata",
|
| 562 |
+
"ms_hub_url": "AI-ModelScope/starcoderdata",
|
| 563 |
+
"columns": {
|
| 564 |
+
"prompt": "content"
|
| 565 |
+
},
|
| 566 |
+
"folder": "python"
|
| 567 |
+
}
|
| 568 |
+
}
|
datasets/mgtv/dev.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:449f236786e2105cd1dd0ba5f4a037c3608a03d73a24597e880cc5009e8c53b6
|
| 3 |
+
size 2741482
|
datasets/mgtv/test_a.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7c29598e27c726bef8a9f2672b83dfc4f7edb6fb6a7ff19bf63cadbdc6e9a62
|
| 3 |
+
size 1816769
|
datasets/mgtv/train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06570ba22afc612ea7033d2fda6acf67774f662e5c60f57e4ce8e28ca2dd9b22
|
| 3 |
+
size 20747995
|
llama-factory/config/qwen2_0.5b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
llama-factory/config/qwen2_1.5b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
llama-factory/config/qwen2_7b_lora_sft.yaml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
| 3 |
+
|
| 4 |
+
### method
|
| 5 |
+
stage: sft
|
| 6 |
+
do_train: true
|
| 7 |
+
finetuning_type: lora
|
| 8 |
+
lora_target: all
|
| 9 |
+
|
| 10 |
+
### dataset
|
| 11 |
+
dataset: alpaca_mac
|
| 12 |
+
template: chatml
|
| 13 |
+
cutoff_len: 1024
|
| 14 |
+
max_samples: 4528
|
| 15 |
+
overwrite_cache: true
|
| 16 |
+
preprocessing_num_workers: 16
|
| 17 |
+
|
| 18 |
+
### output
|
| 19 |
+
output_dir: saves/qwen2-7b/lora/sft
|
| 20 |
+
logging_steps: 10
|
| 21 |
+
save_steps: 560
|
| 22 |
+
plot_loss: true
|
| 23 |
+
overwrite_output_dir: true
|
| 24 |
+
|
| 25 |
+
### train
|
| 26 |
+
per_device_train_batch_size: 1
|
| 27 |
+
gradient_accumulation_steps: 8
|
| 28 |
+
learning_rate: 1.0e-4
|
| 29 |
+
num_train_epochs: 10.0
|
| 30 |
+
lr_scheduler_type: cosine
|
| 31 |
+
warmup_ratio: 0.1
|
| 32 |
+
bf16: true
|
| 33 |
+
ddp_timeout: 180000000
|
| 34 |
+
|
| 35 |
+
### eval
|
| 36 |
+
val_size: 0.01
|
| 37 |
+
per_device_eval_batch_size: 1
|
| 38 |
+
eval_strategy: steps
|
| 39 |
+
eval_steps: 560
|
llama-factory/data/alpaca_mac.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
llama-factory/data/dataset_info.json
ADDED
|
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpaca_mac": {
|
| 3 |
+
"file_name": "alpaca_mac.json"
|
| 4 |
+
},
|
| 5 |
+
"identity": {
|
| 6 |
+
"file_name": "identity.json"
|
| 7 |
+
},
|
| 8 |
+
"alpaca_en_demo": {
|
| 9 |
+
"file_name": "alpaca_en_demo.json"
|
| 10 |
+
},
|
| 11 |
+
"alpaca_zh_demo": {
|
| 12 |
+
"file_name": "alpaca_zh_demo.json"
|
| 13 |
+
},
|
| 14 |
+
"glaive_toolcall_en_demo": {
|
| 15 |
+
"file_name": "glaive_toolcall_en_demo.json",
|
| 16 |
+
"formatting": "sharegpt",
|
| 17 |
+
"columns": {
|
| 18 |
+
"messages": "conversations",
|
| 19 |
+
"tools": "tools"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"glaive_toolcall_zh_demo": {
|
| 23 |
+
"file_name": "glaive_toolcall_zh_demo.json",
|
| 24 |
+
"formatting": "sharegpt",
|
| 25 |
+
"columns": {
|
| 26 |
+
"messages": "conversations",
|
| 27 |
+
"tools": "tools"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"mllm_demo": {
|
| 31 |
+
"file_name": "mllm_demo.json",
|
| 32 |
+
"formatting": "sharegpt",
|
| 33 |
+
"columns": {
|
| 34 |
+
"messages": "messages",
|
| 35 |
+
"images": "images"
|
| 36 |
+
},
|
| 37 |
+
"tags": {
|
| 38 |
+
"role_tag": "role",
|
| 39 |
+
"content_tag": "content",
|
| 40 |
+
"user_tag": "user",
|
| 41 |
+
"assistant_tag": "assistant"
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"alpaca_en": {
|
| 45 |
+
"hf_hub_url": "llamafactory/alpaca_en",
|
| 46 |
+
"ms_hub_url": "llamafactory/alpaca_en"
|
| 47 |
+
},
|
| 48 |
+
"alpaca_zh": {
|
| 49 |
+
"hf_hub_url": "llamafactory/alpaca_zh",
|
| 50 |
+
"ms_hub_url": "llamafactory/alpaca_zh"
|
| 51 |
+
},
|
| 52 |
+
"alpaca_gpt4_en": {
|
| 53 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_en",
|
| 54 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_en"
|
| 55 |
+
},
|
| 56 |
+
"alpaca_gpt4_zh": {
|
| 57 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
| 58 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
|
| 59 |
+
},
|
| 60 |
+
"glaive_toolcall_en": {
|
| 61 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
| 62 |
+
"formatting": "sharegpt",
|
| 63 |
+
"columns": {
|
| 64 |
+
"messages": "conversations",
|
| 65 |
+
"tools": "tools"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"glaive_toolcall_zh": {
|
| 69 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_zh",
|
| 70 |
+
"formatting": "sharegpt",
|
| 71 |
+
"columns": {
|
| 72 |
+
"messages": "conversations",
|
| 73 |
+
"tools": "tools"
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
"lima": {
|
| 77 |
+
"hf_hub_url": "llamafactory/lima",
|
| 78 |
+
"formatting": "sharegpt"
|
| 79 |
+
},
|
| 80 |
+
"guanaco": {
|
| 81 |
+
"hf_hub_url": "JosephusCheung/GuanacoDataset",
|
| 82 |
+
"ms_hub_url": "AI-ModelScope/GuanacoDataset"
|
| 83 |
+
},
|
| 84 |
+
"belle_2m": {
|
| 85 |
+
"hf_hub_url": "BelleGroup/train_2M_CN",
|
| 86 |
+
"ms_hub_url": "AI-ModelScope/train_2M_CN"
|
| 87 |
+
},
|
| 88 |
+
"belle_1m": {
|
| 89 |
+
"hf_hub_url": "BelleGroup/train_1M_CN",
|
| 90 |
+
"ms_hub_url": "AI-ModelScope/train_1M_CN"
|
| 91 |
+
},
|
| 92 |
+
"belle_0.5m": {
|
| 93 |
+
"hf_hub_url": "BelleGroup/train_0.5M_CN",
|
| 94 |
+
"ms_hub_url": "AI-ModelScope/train_0.5M_CN"
|
| 95 |
+
},
|
| 96 |
+
"belle_dialog": {
|
| 97 |
+
"hf_hub_url": "BelleGroup/generated_chat_0.4M",
|
| 98 |
+
"ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
|
| 99 |
+
},
|
| 100 |
+
"belle_math": {
|
| 101 |
+
"hf_hub_url": "BelleGroup/school_math_0.25M",
|
| 102 |
+
"ms_hub_url": "AI-ModelScope/school_math_0.25M"
|
| 103 |
+
},
|
| 104 |
+
"belle_multiturn": {
|
| 105 |
+
"script_url": "belle_multiturn",
|
| 106 |
+
"formatting": "sharegpt"
|
| 107 |
+
},
|
| 108 |
+
"ultra_chat": {
|
| 109 |
+
"script_url": "ultra_chat",
|
| 110 |
+
"formatting": "sharegpt"
|
| 111 |
+
},
|
| 112 |
+
"open_platypus": {
|
| 113 |
+
"hf_hub_url": "garage-bAInd/Open-Platypus",
|
| 114 |
+
"ms_hub_url": "AI-ModelScope/Open-Platypus"
|
| 115 |
+
},
|
| 116 |
+
"codealpaca": {
|
| 117 |
+
"hf_hub_url": "sahil2801/CodeAlpaca-20k",
|
| 118 |
+
"ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
|
| 119 |
+
},
|
| 120 |
+
"alpaca_cot": {
|
| 121 |
+
"hf_hub_url": "QingyiSi/Alpaca-CoT",
|
| 122 |
+
"ms_hub_url": "AI-ModelScope/Alpaca-CoT"
|
| 123 |
+
},
|
| 124 |
+
"openorca": {
|
| 125 |
+
"hf_hub_url": "Open-Orca/OpenOrca",
|
| 126 |
+
"ms_hub_url": "AI-ModelScope/OpenOrca",
|
| 127 |
+
"columns": {
|
| 128 |
+
"prompt": "question",
|
| 129 |
+
"response": "response",
|
| 130 |
+
"system": "system_prompt"
|
| 131 |
+
}
|
| 132 |
+
},
|
| 133 |
+
"slimorca": {
|
| 134 |
+
"hf_hub_url": "Open-Orca/SlimOrca",
|
| 135 |
+
"formatting": "sharegpt"
|
| 136 |
+
},
|
| 137 |
+
"mathinstruct": {
|
| 138 |
+
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
| 139 |
+
"ms_hub_url": "AI-ModelScope/MathInstruct",
|
| 140 |
+
"columns": {
|
| 141 |
+
"prompt": "instruction",
|
| 142 |
+
"response": "output"
|
| 143 |
+
}
|
| 144 |
+
},
|
| 145 |
+
"firefly": {
|
| 146 |
+
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
| 147 |
+
"columns": {
|
| 148 |
+
"prompt": "input",
|
| 149 |
+
"response": "target"
|
| 150 |
+
}
|
| 151 |
+
},
|
| 152 |
+
"wikiqa": {
|
| 153 |
+
"hf_hub_url": "wiki_qa",
|
| 154 |
+
"columns": {
|
| 155 |
+
"prompt": "question",
|
| 156 |
+
"response": "answer"
|
| 157 |
+
}
|
| 158 |
+
},
|
| 159 |
+
"webqa": {
|
| 160 |
+
"hf_hub_url": "suolyer/webqa",
|
| 161 |
+
"ms_hub_url": "AI-ModelScope/webqa",
|
| 162 |
+
"columns": {
|
| 163 |
+
"prompt": "input",
|
| 164 |
+
"response": "output"
|
| 165 |
+
}
|
| 166 |
+
},
|
| 167 |
+
"webnovel": {
|
| 168 |
+
"hf_hub_url": "zxbsmk/webnovel_cn",
|
| 169 |
+
"ms_hub_url": "AI-ModelScope/webnovel_cn"
|
| 170 |
+
},
|
| 171 |
+
"nectar_sft": {
|
| 172 |
+
"hf_hub_url": "AstraMindAI/SFT-Nectar",
|
| 173 |
+
"ms_hub_url": "AI-ModelScope/SFT-Nectar"
|
| 174 |
+
},
|
| 175 |
+
"deepctrl": {
|
| 176 |
+
"ms_hub_url": "deepctrl/deepctrl-sft-data"
|
| 177 |
+
},
|
| 178 |
+
"adgen": {
|
| 179 |
+
"hf_hub_url": "HasturOfficial/adgen",
|
| 180 |
+
"ms_hub_url": "AI-ModelScope/adgen",
|
| 181 |
+
"columns": {
|
| 182 |
+
"prompt": "content",
|
| 183 |
+
"response": "summary"
|
| 184 |
+
}
|
| 185 |
+
},
|
| 186 |
+
"sharegpt_hyper": {
|
| 187 |
+
"hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
|
| 188 |
+
"formatting": "sharegpt"
|
| 189 |
+
},
|
| 190 |
+
"sharegpt4": {
|
| 191 |
+
"hf_hub_url": "shibing624/sharegpt_gpt4",
|
| 192 |
+
"ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
|
| 193 |
+
"formatting": "sharegpt"
|
| 194 |
+
},
|
| 195 |
+
"ultrachat_200k": {
|
| 196 |
+
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
|
| 197 |
+
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
|
| 198 |
+
"formatting": "sharegpt",
|
| 199 |
+
"columns": {
|
| 200 |
+
"messages": "messages"
|
| 201 |
+
},
|
| 202 |
+
"tags": {
|
| 203 |
+
"role_tag": "role",
|
| 204 |
+
"content_tag": "content",
|
| 205 |
+
"user_tag": "user",
|
| 206 |
+
"assistant_tag": "assistant"
|
| 207 |
+
}
|
| 208 |
+
},
|
| 209 |
+
"agent_instruct": {
|
| 210 |
+
"hf_hub_url": "THUDM/AgentInstruct",
|
| 211 |
+
"ms_hub_url": "ZhipuAI/AgentInstruct",
|
| 212 |
+
"formatting": "sharegpt"
|
| 213 |
+
},
|
| 214 |
+
"lmsys_chat": {
|
| 215 |
+
"hf_hub_url": "lmsys/lmsys-chat-1m",
|
| 216 |
+
"ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
|
| 217 |
+
"formatting": "sharegpt",
|
| 218 |
+
"columns": {
|
| 219 |
+
"messages": "conversation"
|
| 220 |
+
},
|
| 221 |
+
"tags": {
|
| 222 |
+
"role_tag": "role",
|
| 223 |
+
"content_tag": "content",
|
| 224 |
+
"user_tag": "human",
|
| 225 |
+
"assistant_tag": "assistant"
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"evol_instruct": {
|
| 229 |
+
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
| 230 |
+
"ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
|
| 231 |
+
"formatting": "sharegpt"
|
| 232 |
+
},
|
| 233 |
+
"glaive_toolcall_100k": {
|
| 234 |
+
"hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
|
| 235 |
+
"formatting": "sharegpt",
|
| 236 |
+
"columns": {
|
| 237 |
+
"messages": "conversations",
|
| 238 |
+
"tools": "tools"
|
| 239 |
+
}
|
| 240 |
+
},
|
| 241 |
+
"cosmopedia": {
|
| 242 |
+
"hf_hub_url": "HuggingFaceTB/cosmopedia",
|
| 243 |
+
"columns": {
|
| 244 |
+
"prompt": "prompt",
|
| 245 |
+
"response": "text"
|
| 246 |
+
}
|
| 247 |
+
},
|
| 248 |
+
"stem_zh": {
|
| 249 |
+
"hf_hub_url": "hfl/stem_zh_instruction"
|
| 250 |
+
},
|
| 251 |
+
"ruozhiba_gpt4": {
|
| 252 |
+
"hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
|
| 253 |
+
},
|
| 254 |
+
"neo_sft": {
|
| 255 |
+
"hf_hub_url": "m-a-p/neo_sft_phase2",
|
| 256 |
+
"formatting": "sharegpt"
|
| 257 |
+
},
|
| 258 |
+
"magpie_pro_300k": {
|
| 259 |
+
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
|
| 260 |
+
"formatting": "sharegpt"
|
| 261 |
+
},
|
| 262 |
+
"web_instruct": {
|
| 263 |
+
"hf_hub_url": "TIGER-Lab/WebInstructSub",
|
| 264 |
+
"columns": {
|
| 265 |
+
"prompt": "question",
|
| 266 |
+
"response": "answer"
|
| 267 |
+
}
|
| 268 |
+
},
|
| 269 |
+
"llava_1k_en": {
|
| 270 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
| 271 |
+
"subset": "en",
|
| 272 |
+
"formatting": "sharegpt",
|
| 273 |
+
"columns": {
|
| 274 |
+
"messages": "messages",
|
| 275 |
+
"images": "images"
|
| 276 |
+
},
|
| 277 |
+
"tags": {
|
| 278 |
+
"role_tag": "role",
|
| 279 |
+
"content_tag": "content",
|
| 280 |
+
"user_tag": "user",
|
| 281 |
+
"assistant_tag": "assistant"
|
| 282 |
+
}
|
| 283 |
+
},
|
| 284 |
+
"llava_1k_zh": {
|
| 285 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
| 286 |
+
"subset": "zh",
|
| 287 |
+
"formatting": "sharegpt",
|
| 288 |
+
"columns": {
|
| 289 |
+
"messages": "messages",
|
| 290 |
+
"images": "images"
|
| 291 |
+
},
|
| 292 |
+
"tags": {
|
| 293 |
+
"role_tag": "role",
|
| 294 |
+
"content_tag": "content",
|
| 295 |
+
"user_tag": "user",
|
| 296 |
+
"assistant_tag": "assistant"
|
| 297 |
+
}
|
| 298 |
+
},
|
| 299 |
+
"llava_150k_en": {
|
| 300 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
| 301 |
+
"subset": "en",
|
| 302 |
+
"formatting": "sharegpt",
|
| 303 |
+
"columns": {
|
| 304 |
+
"messages": "messages",
|
| 305 |
+
"images": "images"
|
| 306 |
+
},
|
| 307 |
+
"tags": {
|
| 308 |
+
"role_tag": "role",
|
| 309 |
+
"content_tag": "content",
|
| 310 |
+
"user_tag": "user",
|
| 311 |
+
"assistant_tag": "assistant"
|
| 312 |
+
}
|
| 313 |
+
},
|
| 314 |
+
"llava_150k_zh": {
|
| 315 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
| 316 |
+
"subset": "zh",
|
| 317 |
+
"formatting": "sharegpt",
|
| 318 |
+
"columns": {
|
| 319 |
+
"messages": "messages",
|
| 320 |
+
"images": "images"
|
| 321 |
+
},
|
| 322 |
+
"tags": {
|
| 323 |
+
"role_tag": "role",
|
| 324 |
+
"content_tag": "content",
|
| 325 |
+
"user_tag": "user",
|
| 326 |
+
"assistant_tag": "assistant"
|
| 327 |
+
}
|
| 328 |
+
},
|
| 329 |
+
"mllm_pt_demo": {
|
| 330 |
+
"hf_hub_url": "BUAADreamer/mllm_pt_demo",
|
| 331 |
+
"formatting": "sharegpt",
|
| 332 |
+
"columns": {
|
| 333 |
+
"messages": "messages",
|
| 334 |
+
"images": "images"
|
| 335 |
+
},
|
| 336 |
+
"tags": {
|
| 337 |
+
"role_tag": "role",
|
| 338 |
+
"content_tag": "content",
|
| 339 |
+
"user_tag": "user",
|
| 340 |
+
"assistant_tag": "assistant"
|
| 341 |
+
}
|
| 342 |
+
},
|
| 343 |
+
"oasst_de": {
|
| 344 |
+
"hf_hub_url": "mayflowergmbh/oasst_de"
|
| 345 |
+
},
|
| 346 |
+
"dolly_15k_de": {
|
| 347 |
+
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
| 348 |
+
},
|
| 349 |
+
"alpaca-gpt4_de": {
|
| 350 |
+
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
| 351 |
+
},
|
| 352 |
+
"openschnabeltier_de": {
|
| 353 |
+
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
| 354 |
+
},
|
| 355 |
+
"evol_instruct_de": {
|
| 356 |
+
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
| 357 |
+
},
|
| 358 |
+
"dolphin_de": {
|
| 359 |
+
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
| 360 |
+
},
|
| 361 |
+
"booksum_de": {
|
| 362 |
+
"hf_hub_url": "mayflowergmbh/booksum_de"
|
| 363 |
+
},
|
| 364 |
+
"airoboros_de": {
|
| 365 |
+
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
| 366 |
+
},
|
| 367 |
+
"ultrachat_de": {
|
| 368 |
+
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
| 369 |
+
},
|
| 370 |
+
"dpo_en_demo": {
|
| 371 |
+
"file_name": "dpo_en_demo.json",
|
| 372 |
+
"ranking": true,
|
| 373 |
+
"formatting": "sharegpt",
|
| 374 |
+
"columns": {
|
| 375 |
+
"messages": "conversations",
|
| 376 |
+
"chosen": "chosen",
|
| 377 |
+
"rejected": "rejected"
|
| 378 |
+
}
|
| 379 |
+
},
|
| 380 |
+
"dpo_zh_demo": {
|
| 381 |
+
"file_name": "dpo_zh_demo.json",
|
| 382 |
+
"ranking": true,
|
| 383 |
+
"formatting": "sharegpt",
|
| 384 |
+
"columns": {
|
| 385 |
+
"messages": "conversations",
|
| 386 |
+
"chosen": "chosen",
|
| 387 |
+
"rejected": "rejected"
|
| 388 |
+
}
|
| 389 |
+
},
|
| 390 |
+
"dpo_mix_en": {
|
| 391 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
| 392 |
+
"subset": "en",
|
| 393 |
+
"ranking": true,
|
| 394 |
+
"formatting": "sharegpt",
|
| 395 |
+
"columns": {
|
| 396 |
+
"messages": "conversations",
|
| 397 |
+
"chosen": "chosen",
|
| 398 |
+
"rejected": "rejected"
|
| 399 |
+
}
|
| 400 |
+
},
|
| 401 |
+
"dpo_mix_zh": {
|
| 402 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
| 403 |
+
"subset": "zh",
|
| 404 |
+
"ranking": true,
|
| 405 |
+
"formatting": "sharegpt",
|
| 406 |
+
"columns": {
|
| 407 |
+
"messages": "conversations",
|
| 408 |
+
"chosen": "chosen",
|
| 409 |
+
"rejected": "rejected"
|
| 410 |
+
}
|
| 411 |
+
},
|
| 412 |
+
"ultrafeedback": {
|
| 413 |
+
"hf_hub_url": "llamafactory/ultrafeedback_binarized",
|
| 414 |
+
"ms_hub_url": "llamafactory/ultrafeedback_binarized",
|
| 415 |
+
"ranking": true,
|
| 416 |
+
"columns": {
|
| 417 |
+
"prompt": "instruction",
|
| 418 |
+
"chosen": "chosen",
|
| 419 |
+
"rejected": "rejected"
|
| 420 |
+
}
|
| 421 |
+
},
|
| 422 |
+
"orca_pairs": {
|
| 423 |
+
"hf_hub_url": "Intel/orca_dpo_pairs",
|
| 424 |
+
"ranking": true,
|
| 425 |
+
"columns": {
|
| 426 |
+
"prompt": "question",
|
| 427 |
+
"chosen": "chosen",
|
| 428 |
+
"rejected": "rejected",
|
| 429 |
+
"system": "system"
|
| 430 |
+
}
|
| 431 |
+
},
|
| 432 |
+
"hh_rlhf_en": {
|
| 433 |
+
"script_url": "hh_rlhf_en",
|
| 434 |
+
"ranking": true,
|
| 435 |
+
"columns": {
|
| 436 |
+
"prompt": "instruction",
|
| 437 |
+
"chosen": "chosen",
|
| 438 |
+
"rejected": "rejected",
|
| 439 |
+
"history": "history"
|
| 440 |
+
}
|
| 441 |
+
},
|
| 442 |
+
"nectar_rm": {
|
| 443 |
+
"hf_hub_url": "AstraMindAI/RLAIF-Nectar",
|
| 444 |
+
"ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
|
| 445 |
+
"ranking": true
|
| 446 |
+
},
|
| 447 |
+
"orca_dpo_de": {
|
| 448 |
+
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
| 449 |
+
"ranking": true
|
| 450 |
+
},
|
| 451 |
+
"kto_en_demo": {
|
| 452 |
+
"file_name": "kto_en_demo.json",
|
| 453 |
+
"formatting": "sharegpt",
|
| 454 |
+
"columns": {
|
| 455 |
+
"messages": "messages",
|
| 456 |
+
"kto_tag": "label"
|
| 457 |
+
},
|
| 458 |
+
"tags": {
|
| 459 |
+
"role_tag": "role",
|
| 460 |
+
"content_tag": "content",
|
| 461 |
+
"user_tag": "user",
|
| 462 |
+
"assistant_tag": "assistant"
|
| 463 |
+
}
|
| 464 |
+
},
|
| 465 |
+
"kto_mix_en": {
|
| 466 |
+
"hf_hub_url": "argilla/kto-mix-15k",
|
| 467 |
+
"formatting": "sharegpt",
|
| 468 |
+
"columns": {
|
| 469 |
+
"messages": "completion",
|
| 470 |
+
"kto_tag": "label"
|
| 471 |
+
},
|
| 472 |
+
"tags": {
|
| 473 |
+
"role_tag": "role",
|
| 474 |
+
"content_tag": "content",
|
| 475 |
+
"user_tag": "user",
|
| 476 |
+
"assistant_tag": "assistant"
|
| 477 |
+
}
|
| 478 |
+
},
|
| 479 |
+
"ultrafeedback_kto": {
|
| 480 |
+
"hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
|
| 481 |
+
"ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
|
| 482 |
+
"columns": {
|
| 483 |
+
"prompt": "prompt",
|
| 484 |
+
"response": "completion",
|
| 485 |
+
"kto_tag": "label"
|
| 486 |
+
}
|
| 487 |
+
},
|
| 488 |
+
"wiki_demo": {
|
| 489 |
+
"file_name": "wiki_demo.txt",
|
| 490 |
+
"columns": {
|
| 491 |
+
"prompt": "text"
|
| 492 |
+
}
|
| 493 |
+
},
|
| 494 |
+
"c4_demo": {
|
| 495 |
+
"file_name": "c4_demo.json",
|
| 496 |
+
"columns": {
|
| 497 |
+
"prompt": "text"
|
| 498 |
+
}
|
| 499 |
+
},
|
| 500 |
+
"refinedweb": {
|
| 501 |
+
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
| 502 |
+
"columns": {
|
| 503 |
+
"prompt": "content"
|
| 504 |
+
}
|
| 505 |
+
},
|
| 506 |
+
"redpajama_v2": {
|
| 507 |
+
"hf_hub_url": "togethercomputer/RedPajama-Data-V2",
|
| 508 |
+
"columns": {
|
| 509 |
+
"prompt": "raw_content"
|
| 510 |
+
},
|
| 511 |
+
"subset": "default"
|
| 512 |
+
},
|
| 513 |
+
"wikipedia_en": {
|
| 514 |
+
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
| 515 |
+
"ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
|
| 516 |
+
"columns": {
|
| 517 |
+
"prompt": "text"
|
| 518 |
+
}
|
| 519 |
+
},
|
| 520 |
+
"wikipedia_zh": {
|
| 521 |
+
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
| 522 |
+
"ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
|
| 523 |
+
"columns": {
|
| 524 |
+
"prompt": "completion"
|
| 525 |
+
}
|
| 526 |
+
},
|
| 527 |
+
"pile": {
|
| 528 |
+
"hf_hub_url": "monology/pile-uncopyrighted",
|
| 529 |
+
"ms_hub_url": "AI-ModelScope/pile",
|
| 530 |
+
"columns": {
|
| 531 |
+
"prompt": "text"
|
| 532 |
+
}
|
| 533 |
+
},
|
| 534 |
+
"skypile": {
|
| 535 |
+
"hf_hub_url": "Skywork/SkyPile-150B",
|
| 536 |
+
"ms_hub_url": "AI-ModelScope/SkyPile-150B",
|
| 537 |
+
"columns": {
|
| 538 |
+
"prompt": "text"
|
| 539 |
+
}
|
| 540 |
+
},
|
| 541 |
+
"fineweb": {
|
| 542 |
+
"hf_hub_url": "HuggingFaceFW/fineweb",
|
| 543 |
+
"columns": {
|
| 544 |
+
"prompt": "text"
|
| 545 |
+
}
|
| 546 |
+
},
|
| 547 |
+
"fineweb_edu": {
|
| 548 |
+
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
| 549 |
+
"columns": {
|
| 550 |
+
"prompt": "text"
|
| 551 |
+
}
|
| 552 |
+
},
|
| 553 |
+
"the_stack": {
|
| 554 |
+
"hf_hub_url": "bigcode/the-stack",
|
| 555 |
+
"ms_hub_url": "AI-ModelScope/the-stack",
|
| 556 |
+
"columns": {
|
| 557 |
+
"prompt": "content"
|
| 558 |
+
}
|
| 559 |
+
},
|
| 560 |
+
"starcoder_python": {
|
| 561 |
+
"hf_hub_url": "bigcode/starcoderdata",
|
| 562 |
+
"ms_hub_url": "AI-ModelScope/starcoderdata",
|
| 563 |
+
"columns": {
|
| 564 |
+
"prompt": "content"
|
| 565 |
+
},
|
| 566 |
+
"folder": "python"
|
| 567 |
+
}
|
| 568 |
+
}
|
notebooks/01_Finetune-Llama3-with-LLaMA-Factory.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9","timestamp":1719737717483}],"gpuType":"T4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# Finetune Llama-3 with LLaMA Factory\n","\n","Please use a **free** Tesla T4 Colab GPU to run this!\n","\n","Project homepage: https://github.com/hiyouga/LLaMA-Factory"],"metadata":{"id":"1oHFCsV0z-Jw"}},{"cell_type":"markdown","source":["## Install Dependencies"],"metadata":{"id":"lr7rB3szzhtx"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"giM74oK1rRIH"},"outputs":[],"source":["%cd /content/\n","%rm -rf LLaMA-Factory\n","!git clone https://github.com/hiyouga/LLaMA-Factory.git\n","%cd LLaMA-Factory\n","%ls\n","!pip install -e .[torch,bitsandbytes]"]},{"cell_type":"markdown","source":["### Check GPU environment"],"metadata":{"id":"H9RXn_YQnn9f"}},{"cell_type":"code","source":["import torch\n","try:\n"," assert torch.cuda.is_available() is True\n","except AssertionError:\n"," print(\"Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6\")"],"metadata":{"id":"ZkN-ktlsnrdU"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Update Identity Dataset"],"metadata":{"id":"TeYs5Lz-QJYk"}},{"cell_type":"code","source":["import json\n","\n","%cd /content/LLaMA-Factory/\n","\n","NAME = \"Llama-3\"\n","AUTHOR = \"LLaMA Factory\"\n","\n","with open(\"data/identity.json\", \"r\", encoding=\"utf-8\") as f:\n"," dataset = json.load(f)\n","\n","for sample in dataset:\n"," sample[\"output\"] = sample[\"output\"].replace(\"{{\"+ \"name\" + \"}}\", NAME).replace(\"{{\"+ \"author\" + \"}}\", AUTHOR)\n","\n","with open(\"data/identity.json\", \"w\", encoding=\"utf-8\") as f:\n"," json.dump(dataset, f, indent=2, ensure_ascii=False)"],"metadata":{"id":"ap_fvMBsQHJc"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Fine-tune model via LLaMA Board"],"metadata":{"id":"2QiXcvdzzW3Y"}},{"cell_type":"code","source":["%cd /content/LLaMA-Factory/\n","!GRADIO_SHARE=1 llamafactory-cli webui"],"metadata":{"id":"YLsdS6V5yUMy"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Fine-tune model via Command Line\n","\n","It takes ~30min for training."],"metadata":{"id":"rgR3UFhB0Ifq"}},{"cell_type":"code","source":["import json\n","\n","args = dict(\n"," stage=\"sft\", # do supervised fine-tuning\n"," do_train=True,\n"," model_name_or_path=\"unsloth/llama-3-8b-Instruct-bnb-4bit\", # use bnb-4bit-quantized Llama-3-8B-Instruct model\n"," dataset=\"identity,alpaca_en_demo\", # use alpaca and identity datasets\n"," template=\"llama3\", # use llama3 prompt template\n"," finetuning_type=\"lora\", # use LoRA adapters to save memory\n"," lora_target=\"all\", # attach LoRA adapters to all linear layers\n"," output_dir=\"llama3_lora\", # the path to save LoRA adapters\n"," per_device_train_batch_size=2, # the batch size\n"," gradient_accumulation_steps=4, # the gradient accumulation steps\n"," lr_scheduler_type=\"cosine\", # use cosine learning rate scheduler\n"," logging_steps=10, # log every 10 steps\n"," warmup_ratio=0.1, # use warmup scheduler\n"," save_steps=1000, # save checkpoint every 1000 steps\n"," learning_rate=5e-5, # the learning rate\n"," num_train_epochs=3.0, # the epochs of training\n"," max_samples=500, # use 500 examples in each dataset\n"," max_grad_norm=1.0, # clip gradient norm to 1.0\n"," quantization_bit=4, # use 4-bit QLoRA\n"," loraplus_lr_ratio=16.0, # use LoRA+ algorithm with lambda=16.0\n"," fp16=True, # use float16 mixed precision training\n",")\n","\n","json.dump(args, open(\"train_llama3.json\", \"w\", encoding=\"utf-8\"), indent=2)\n","\n","%cd /content/LLaMA-Factory/\n","\n","!llamafactory-cli train train_llama3.json"],"metadata":{"id":"CS0Qk5OR0i4Q"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Infer the fine-tuned model"],"metadata":{"id":"PVNaC-xS5N40"}},{"cell_type":"code","source":["from llamafactory.chat import ChatModel\n","from llamafactory.extras.misc import torch_gc\n","\n","%cd /content/LLaMA-Factory/\n","\n","args = dict(\n"," model_name_or_path=\"unsloth/llama-3-8b-Instruct-bnb-4bit\", # use bnb-4bit-quantized Llama-3-8B-Instruct model\n"," adapter_name_or_path=\"llama3_lora\", # load the saved LoRA adapters\n"," template=\"llama3\", # same to the one in training\n"," finetuning_type=\"lora\", # same to the one in training\n"," quantization_bit=4, # load 4-bit quantized model\n",")\n","chat_model = ChatModel(args)\n","\n","messages = []\n","print(\"Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.\")\n","while True:\n"," query = input(\"\\nUser: \")\n"," if query.strip() == \"exit\":\n"," break\n"," if query.strip() == \"clear\":\n"," messages = []\n"," torch_gc()\n"," print(\"History has been removed.\")\n"," continue\n","\n"," messages.append({\"role\": \"user\", \"content\": query})\n"," print(\"Assistant: \", end=\"\", flush=True)\n","\n"," response = \"\"\n"," for new_text in chat_model.stream_chat(messages):\n"," print(new_text, end=\"\", flush=True)\n"," response += new_text\n"," print()\n"," messages.append({\"role\": \"assistant\", \"content\": response})\n","\n","torch_gc()"],"metadata":{"id":"oh8H9A_25SF9"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Merge the LoRA adapter and optionally upload model\n","\n","NOTE: the Colab free version has merely 12GB RAM, where merging LoRA of a 8B model needs at least 18GB RAM, thus you **cannot** perform it in the free version."],"metadata":{"id":"kTESHaFvbNTr"}},{"cell_type":"code","source":["!huggingface-cli login"],"metadata":{"id":"mcNcHcA4bf4Z"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import json\n","\n","args = dict(\n"," model_name_or_path=\"meta-llama/Meta-Llama-3-8B-Instruct\", # use official non-quantized Llama-3-8B-Instruct model\n"," adapter_name_or_path=\"llama3_lora\", # load the saved LoRA adapters\n"," template=\"llama3\", # same to the one in training\n"," finetuning_type=\"lora\", # same to the one in training\n"," export_dir=\"llama3_lora_merged\", # the path to save the merged model\n"," export_size=2, # the file shard size (in GB) of the merged model\n"," export_device=\"cpu\", # the device used in export, can be chosen from `cpu` and `cuda`\n"," #export_hub_model_id=\"your_id/your_model\", # the Hugging Face hub ID to upload model\n",")\n","\n","json.dump(args, open(\"merge_llama3.json\", \"w\", encoding=\"utf-8\"), indent=2)\n","\n","%cd /content/LLaMA-Factory/\n","\n","!llamafactory-cli export merge_llama3.json"],"metadata":{"id":"IMojogHbaOZF"},"execution_count":null,"outputs":[]}]}
|
novel-translation/00_Data_Analysis.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/01_Qwen2-0.5B_Unsloth.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/02_Qwen2-1.5B_Unsloth.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/03_Qwen2-0.5B_1.5B-4bit.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/04_tune-small-no-flash-attn.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/05_tune-small-with-flash-attn.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/06_tune-small-py3.11.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/07_tune-lf-py3.11.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
novel-translation/08_eval-lf-py3.11.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
CHANGED
|
@@ -10,5 +10,5 @@ scikit-learn==1.5.0
|
|
| 10 |
jupyter
|
| 11 |
ipywidgets
|
| 12 |
packaging
|
| 13 |
-
triton
|
| 14 |
-
xformers
|
|
|
|
| 10 |
jupyter
|
| 11 |
ipywidgets
|
| 12 |
packaging
|
| 13 |
+
# triton
|
| 14 |
+
# xformers
|
results/mac-results-colab.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/mac-results-colab.gsheet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fd488430f65a2b959d746b81e485a0b596f8e32537979904416dfc021b1181d
|
| 3 |
+
size 179
|
results/mac-results_lf.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5acc087808de5df6839cbf7b170094c6e63445aab4bea15e4be9564b905eb51
|
| 3 |
+
size 3236072
|
scripts/tune-lf.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
|
| 3 |
+
BASEDIR=$(dirname "$0")
|
| 4 |
+
cd $BASEDIR/../llama-factory
|
| 5 |
+
echo Current Directory:
|
| 6 |
+
pwd
|
| 7 |
+
|
| 8 |
+
llamafactory-cli train $1
|