Upload 7 files
Browse files- added_tokens.json +28 -0
- all_results.json +12 -0
- chat_template.jinja +4 -0
- config.json +68 -0
- eval_results.json +7 -0
- generation_config.json +13 -0
- merges.txt +0 -0
    	
        added_tokens.json
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "</think>": 151668,
         | 
| 3 | 
            +
              "</tool_call>": 151658,
         | 
| 4 | 
            +
              "</tool_response>": 151666,
         | 
| 5 | 
            +
              "<think>": 151667,
         | 
| 6 | 
            +
              "<tool_call>": 151657,
         | 
| 7 | 
            +
              "<tool_response>": 151665,
         | 
| 8 | 
            +
              "<|box_end|>": 151649,
         | 
| 9 | 
            +
              "<|box_start|>": 151648,
         | 
| 10 | 
            +
              "<|endoftext|>": 151643,
         | 
| 11 | 
            +
              "<|file_sep|>": 151664,
         | 
| 12 | 
            +
              "<|fim_middle|>": 151660,
         | 
| 13 | 
            +
              "<|fim_pad|>": 151662,
         | 
| 14 | 
            +
              "<|fim_prefix|>": 151659,
         | 
| 15 | 
            +
              "<|fim_suffix|>": 151661,
         | 
| 16 | 
            +
              "<|im_end|>": 151645,
         | 
| 17 | 
            +
              "<|im_start|>": 151644,
         | 
| 18 | 
            +
              "<|image_pad|>": 151655,
         | 
| 19 | 
            +
              "<|object_ref_end|>": 151647,
         | 
| 20 | 
            +
              "<|object_ref_start|>": 151646,
         | 
| 21 | 
            +
              "<|quad_end|>": 151651,
         | 
| 22 | 
            +
              "<|quad_start|>": 151650,
         | 
| 23 | 
            +
              "<|repo_name|>": 151663,
         | 
| 24 | 
            +
              "<|video_pad|>": 151656,
         | 
| 25 | 
            +
              "<|vision_end|>": 151653,
         | 
| 26 | 
            +
              "<|vision_pad|>": 151654,
         | 
| 27 | 
            +
              "<|vision_start|>": 151652
         | 
| 28 | 
            +
            }
         | 
    	
        all_results.json
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "epoch": 3.0,
         | 
| 3 | 
            +
                "eval_loss": 0.04384339973330498,
         | 
| 4 | 
            +
                "eval_runtime": 63.6922,
         | 
| 5 | 
            +
                "eval_samples_per_second": 15.701,
         | 
| 6 | 
            +
                "eval_steps_per_second": 3.925,
         | 
| 7 | 
            +
                "total_flos": 5776319864832.0,
         | 
| 8 | 
            +
                "train_loss": 0.16538718646019698,
         | 
| 9 | 
            +
                "train_runtime": 730.2133,
         | 
| 10 | 
            +
                "train_samples_per_second": 4.108,
         | 
| 11 | 
            +
                "train_steps_per_second": 0.514
         | 
| 12 | 
            +
            }
         | 
    	
        chat_template.jinja
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ 'System: ' + system_message + '<|im_end|>' + '
         | 
| 2 | 
            +
            ' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '<|im_end|>' + '
         | 
| 3 | 
            +
            Assistant:' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '
         | 
| 4 | 
            +
            ' }}{% endif %}{% endfor %}
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "Qwen3ForCausalLM"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "attention_bias": false,
         | 
| 6 | 
            +
              "attention_dropout": 0.0,
         | 
| 7 | 
            +
              "bos_token_id": 151643,
         | 
| 8 | 
            +
              "eos_token_id": 151645,
         | 
| 9 | 
            +
              "head_dim": 128,
         | 
| 10 | 
            +
              "hidden_act": "silu",
         | 
| 11 | 
            +
              "hidden_size": 2560,
         | 
| 12 | 
            +
              "initializer_range": 0.02,
         | 
| 13 | 
            +
              "intermediate_size": 9728,
         | 
| 14 | 
            +
              "layer_types": [
         | 
| 15 | 
            +
                "full_attention",
         | 
| 16 | 
            +
                "full_attention",
         | 
| 17 | 
            +
                "full_attention",
         | 
| 18 | 
            +
                "full_attention",
         | 
| 19 | 
            +
                "full_attention",
         | 
| 20 | 
            +
                "full_attention",
         | 
| 21 | 
            +
                "full_attention",
         | 
| 22 | 
            +
                "full_attention",
         | 
| 23 | 
            +
                "full_attention",
         | 
| 24 | 
            +
                "full_attention",
         | 
| 25 | 
            +
                "full_attention",
         | 
| 26 | 
            +
                "full_attention",
         | 
| 27 | 
            +
                "full_attention",
         | 
| 28 | 
            +
                "full_attention",
         | 
| 29 | 
            +
                "full_attention",
         | 
| 30 | 
            +
                "full_attention",
         | 
| 31 | 
            +
                "full_attention",
         | 
| 32 | 
            +
                "full_attention",
         | 
| 33 | 
            +
                "full_attention",
         | 
| 34 | 
            +
                "full_attention",
         | 
| 35 | 
            +
                "full_attention",
         | 
| 36 | 
            +
                "full_attention",
         | 
| 37 | 
            +
                "full_attention",
         | 
| 38 | 
            +
                "full_attention",
         | 
| 39 | 
            +
                "full_attention",
         | 
| 40 | 
            +
                "full_attention",
         | 
| 41 | 
            +
                "full_attention",
         | 
| 42 | 
            +
                "full_attention",
         | 
| 43 | 
            +
                "full_attention",
         | 
| 44 | 
            +
                "full_attention",
         | 
| 45 | 
            +
                "full_attention",
         | 
| 46 | 
            +
                "full_attention",
         | 
| 47 | 
            +
                "full_attention",
         | 
| 48 | 
            +
                "full_attention",
         | 
| 49 | 
            +
                "full_attention",
         | 
| 50 | 
            +
                "full_attention"
         | 
| 51 | 
            +
              ],
         | 
| 52 | 
            +
              "max_position_embeddings": 262144,
         | 
| 53 | 
            +
              "max_window_layers": 36,
         | 
| 54 | 
            +
              "model_type": "qwen3",
         | 
| 55 | 
            +
              "num_attention_heads": 32,
         | 
| 56 | 
            +
              "num_hidden_layers": 36,
         | 
| 57 | 
            +
              "num_key_value_heads": 8,
         | 
| 58 | 
            +
              "rms_norm_eps": 1e-06,
         | 
| 59 | 
            +
              "rope_scaling": null,
         | 
| 60 | 
            +
              "rope_theta": 5000000,
         | 
| 61 | 
            +
              "sliding_window": null,
         | 
| 62 | 
            +
              "tie_word_embeddings": true,
         | 
| 63 | 
            +
              "torch_dtype": "bfloat16",
         | 
| 64 | 
            +
              "transformers_version": "4.55.0",
         | 
| 65 | 
            +
              "use_cache": false,
         | 
| 66 | 
            +
              "use_sliding_window": false,
         | 
| 67 | 
            +
              "vocab_size": 151936
         | 
| 68 | 
            +
            }
         | 
    	
        eval_results.json
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "epoch": 3.0,
         | 
| 3 | 
            +
                "eval_loss": 0.04384339973330498,
         | 
| 4 | 
            +
                "eval_runtime": 63.6922,
         | 
| 5 | 
            +
                "eval_samples_per_second": 15.701,
         | 
| 6 | 
            +
                "eval_steps_per_second": 3.925
         | 
| 7 | 
            +
            }
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "bos_token_id": 151643,
         | 
| 3 | 
            +
              "do_sample": true,
         | 
| 4 | 
            +
              "eos_token_id": [
         | 
| 5 | 
            +
                151645,
         | 
| 6 | 
            +
                151643
         | 
| 7 | 
            +
              ],
         | 
| 8 | 
            +
              "pad_token_id": 151643,
         | 
| 9 | 
            +
              "temperature": 0.7,
         | 
| 10 | 
            +
              "top_k": 20,
         | 
| 11 | 
            +
              "top_p": 0.8,
         | 
| 12 | 
            +
              "transformers_version": "4.55.0"
         | 
| 13 | 
            +
            }
         | 
    	
        merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 

