Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- added_tokens.json +1011 -0
- chat_template.jinja +24 -0
- config.json +29 -0
- configuration_ernie4_5.py +127 -0
- generation_config.json +11 -0
- model.safetensors +3 -0
- model.safetensors.index.json +172 -0
- modeling_ernie4_5.py +1068 -0
- special_tokens_map.json +1062 -0
- tokenization_ernie4_5.py +373 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         | 
    	
        added_tokens.json
    ADDED
    
    | @@ -0,0 +1,1011 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "<|AUDIO_PLACEHOLDER|>": 100296,
         | 
| 3 | 
            +
              "<|CROP_COL_SEP|>": 101301,
         | 
| 4 | 
            +
              "<|CROP_ROW_SEP|>": 101302,
         | 
| 5 | 
            +
              "<|IMAGE_PLACEHOLDER|>": 100295,
         | 
| 6 | 
            +
              "<|IMAGE_SEP|>": 101303,
         | 
| 7 | 
            +
              "<|LOC_0|>": 100297,
         | 
| 8 | 
            +
              "<|LOC_1000|>": 101297,
         | 
| 9 | 
            +
              "<|LOC_100|>": 100397,
         | 
| 10 | 
            +
              "<|LOC_101|>": 100398,
         | 
| 11 | 
            +
              "<|LOC_102|>": 100399,
         | 
| 12 | 
            +
              "<|LOC_103|>": 100400,
         | 
| 13 | 
            +
              "<|LOC_104|>": 100401,
         | 
| 14 | 
            +
              "<|LOC_105|>": 100402,
         | 
| 15 | 
            +
              "<|LOC_106|>": 100403,
         | 
| 16 | 
            +
              "<|LOC_107|>": 100404,
         | 
| 17 | 
            +
              "<|LOC_108|>": 100405,
         | 
| 18 | 
            +
              "<|LOC_109|>": 100406,
         | 
| 19 | 
            +
              "<|LOC_10|>": 100307,
         | 
| 20 | 
            +
              "<|LOC_110|>": 100407,
         | 
| 21 | 
            +
              "<|LOC_111|>": 100408,
         | 
| 22 | 
            +
              "<|LOC_112|>": 100409,
         | 
| 23 | 
            +
              "<|LOC_113|>": 100410,
         | 
| 24 | 
            +
              "<|LOC_114|>": 100411,
         | 
| 25 | 
            +
              "<|LOC_115|>": 100412,
         | 
| 26 | 
            +
              "<|LOC_116|>": 100413,
         | 
| 27 | 
            +
              "<|LOC_117|>": 100414,
         | 
| 28 | 
            +
              "<|LOC_118|>": 100415,
         | 
| 29 | 
            +
              "<|LOC_119|>": 100416,
         | 
| 30 | 
            +
              "<|LOC_11|>": 100308,
         | 
| 31 | 
            +
              "<|LOC_120|>": 100417,
         | 
| 32 | 
            +
              "<|LOC_121|>": 100418,
         | 
| 33 | 
            +
              "<|LOC_122|>": 100419,
         | 
| 34 | 
            +
              "<|LOC_123|>": 100420,
         | 
| 35 | 
            +
              "<|LOC_124|>": 100421,
         | 
| 36 | 
            +
              "<|LOC_125|>": 100422,
         | 
| 37 | 
            +
              "<|LOC_126|>": 100423,
         | 
| 38 | 
            +
              "<|LOC_127|>": 100424,
         | 
| 39 | 
            +
              "<|LOC_128|>": 100425,
         | 
| 40 | 
            +
              "<|LOC_129|>": 100426,
         | 
| 41 | 
            +
              "<|LOC_12|>": 100309,
         | 
| 42 | 
            +
              "<|LOC_130|>": 100427,
         | 
| 43 | 
            +
              "<|LOC_131|>": 100428,
         | 
| 44 | 
            +
              "<|LOC_132|>": 100429,
         | 
| 45 | 
            +
              "<|LOC_133|>": 100430,
         | 
| 46 | 
            +
              "<|LOC_134|>": 100431,
         | 
| 47 | 
            +
              "<|LOC_135|>": 100432,
         | 
| 48 | 
            +
              "<|LOC_136|>": 100433,
         | 
| 49 | 
            +
              "<|LOC_137|>": 100434,
         | 
| 50 | 
            +
              "<|LOC_138|>": 100435,
         | 
| 51 | 
            +
              "<|LOC_139|>": 100436,
         | 
| 52 | 
            +
              "<|LOC_13|>": 100310,
         | 
| 53 | 
            +
              "<|LOC_140|>": 100437,
         | 
| 54 | 
            +
              "<|LOC_141|>": 100438,
         | 
| 55 | 
            +
              "<|LOC_142|>": 100439,
         | 
| 56 | 
            +
              "<|LOC_143|>": 100440,
         | 
| 57 | 
            +
              "<|LOC_144|>": 100441,
         | 
| 58 | 
            +
              "<|LOC_145|>": 100442,
         | 
| 59 | 
            +
              "<|LOC_146|>": 100443,
         | 
| 60 | 
            +
              "<|LOC_147|>": 100444,
         | 
| 61 | 
            +
              "<|LOC_148|>": 100445,
         | 
| 62 | 
            +
              "<|LOC_149|>": 100446,
         | 
| 63 | 
            +
              "<|LOC_14|>": 100311,
         | 
| 64 | 
            +
              "<|LOC_150|>": 100447,
         | 
| 65 | 
            +
              "<|LOC_151|>": 100448,
         | 
| 66 | 
            +
              "<|LOC_152|>": 100449,
         | 
| 67 | 
            +
              "<|LOC_153|>": 100450,
         | 
| 68 | 
            +
              "<|LOC_154|>": 100451,
         | 
| 69 | 
            +
              "<|LOC_155|>": 100452,
         | 
| 70 | 
            +
              "<|LOC_156|>": 100453,
         | 
| 71 | 
            +
              "<|LOC_157|>": 100454,
         | 
| 72 | 
            +
              "<|LOC_158|>": 100455,
         | 
| 73 | 
            +
              "<|LOC_159|>": 100456,
         | 
| 74 | 
            +
              "<|LOC_15|>": 100312,
         | 
| 75 | 
            +
              "<|LOC_160|>": 100457,
         | 
| 76 | 
            +
              "<|LOC_161|>": 100458,
         | 
| 77 | 
            +
              "<|LOC_162|>": 100459,
         | 
| 78 | 
            +
              "<|LOC_163|>": 100460,
         | 
| 79 | 
            +
              "<|LOC_164|>": 100461,
         | 
| 80 | 
            +
              "<|LOC_165|>": 100462,
         | 
| 81 | 
            +
              "<|LOC_166|>": 100463,
         | 
| 82 | 
            +
              "<|LOC_167|>": 100464,
         | 
| 83 | 
            +
              "<|LOC_168|>": 100465,
         | 
| 84 | 
            +
              "<|LOC_169|>": 100466,
         | 
| 85 | 
            +
              "<|LOC_16|>": 100313,
         | 
| 86 | 
            +
              "<|LOC_170|>": 100467,
         | 
| 87 | 
            +
              "<|LOC_171|>": 100468,
         | 
| 88 | 
            +
              "<|LOC_172|>": 100469,
         | 
| 89 | 
            +
              "<|LOC_173|>": 100470,
         | 
| 90 | 
            +
              "<|LOC_174|>": 100471,
         | 
| 91 | 
            +
              "<|LOC_175|>": 100472,
         | 
| 92 | 
            +
              "<|LOC_176|>": 100473,
         | 
| 93 | 
            +
              "<|LOC_177|>": 100474,
         | 
| 94 | 
            +
              "<|LOC_178|>": 100475,
         | 
| 95 | 
            +
              "<|LOC_179|>": 100476,
         | 
| 96 | 
            +
              "<|LOC_17|>": 100314,
         | 
| 97 | 
            +
              "<|LOC_180|>": 100477,
         | 
| 98 | 
            +
              "<|LOC_181|>": 100478,
         | 
| 99 | 
            +
              "<|LOC_182|>": 100479,
         | 
| 100 | 
            +
              "<|LOC_183|>": 100480,
         | 
| 101 | 
            +
              "<|LOC_184|>": 100481,
         | 
| 102 | 
            +
              "<|LOC_185|>": 100482,
         | 
| 103 | 
            +
              "<|LOC_186|>": 100483,
         | 
| 104 | 
            +
              "<|LOC_187|>": 100484,
         | 
| 105 | 
            +
              "<|LOC_188|>": 100485,
         | 
| 106 | 
            +
              "<|LOC_189|>": 100486,
         | 
| 107 | 
            +
              "<|LOC_18|>": 100315,
         | 
| 108 | 
            +
              "<|LOC_190|>": 100487,
         | 
| 109 | 
            +
              "<|LOC_191|>": 100488,
         | 
| 110 | 
            +
              "<|LOC_192|>": 100489,
         | 
| 111 | 
            +
              "<|LOC_193|>": 100490,
         | 
| 112 | 
            +
              "<|LOC_194|>": 100491,
         | 
| 113 | 
            +
              "<|LOC_195|>": 100492,
         | 
| 114 | 
            +
              "<|LOC_196|>": 100493,
         | 
| 115 | 
            +
              "<|LOC_197|>": 100494,
         | 
| 116 | 
            +
              "<|LOC_198|>": 100495,
         | 
| 117 | 
            +
              "<|LOC_199|>": 100496,
         | 
| 118 | 
            +
              "<|LOC_19|>": 100316,
         | 
| 119 | 
            +
              "<|LOC_1|>": 100298,
         | 
| 120 | 
            +
              "<|LOC_200|>": 100497,
         | 
| 121 | 
            +
              "<|LOC_201|>": 100498,
         | 
| 122 | 
            +
              "<|LOC_202|>": 100499,
         | 
| 123 | 
            +
              "<|LOC_203|>": 100500,
         | 
| 124 | 
            +
              "<|LOC_204|>": 100501,
         | 
| 125 | 
            +
              "<|LOC_205|>": 100502,
         | 
| 126 | 
            +
              "<|LOC_206|>": 100503,
         | 
| 127 | 
            +
              "<|LOC_207|>": 100504,
         | 
| 128 | 
            +
              "<|LOC_208|>": 100505,
         | 
| 129 | 
            +
              "<|LOC_209|>": 100506,
         | 
| 130 | 
            +
              "<|LOC_20|>": 100317,
         | 
| 131 | 
            +
              "<|LOC_210|>": 100507,
         | 
| 132 | 
            +
              "<|LOC_211|>": 100508,
         | 
| 133 | 
            +
              "<|LOC_212|>": 100509,
         | 
| 134 | 
            +
              "<|LOC_213|>": 100510,
         | 
| 135 | 
            +
              "<|LOC_214|>": 100511,
         | 
| 136 | 
            +
              "<|LOC_215|>": 100512,
         | 
| 137 | 
            +
              "<|LOC_216|>": 100513,
         | 
| 138 | 
            +
              "<|LOC_217|>": 100514,
         | 
| 139 | 
            +
              "<|LOC_218|>": 100515,
         | 
| 140 | 
            +
              "<|LOC_219|>": 100516,
         | 
| 141 | 
            +
              "<|LOC_21|>": 100318,
         | 
| 142 | 
            +
              "<|LOC_220|>": 100517,
         | 
| 143 | 
            +
              "<|LOC_221|>": 100518,
         | 
| 144 | 
            +
              "<|LOC_222|>": 100519,
         | 
| 145 | 
            +
              "<|LOC_223|>": 100520,
         | 
| 146 | 
            +
              "<|LOC_224|>": 100521,
         | 
| 147 | 
            +
              "<|LOC_225|>": 100522,
         | 
| 148 | 
            +
              "<|LOC_226|>": 100523,
         | 
| 149 | 
            +
              "<|LOC_227|>": 100524,
         | 
| 150 | 
            +
              "<|LOC_228|>": 100525,
         | 
| 151 | 
            +
              "<|LOC_229|>": 100526,
         | 
| 152 | 
            +
              "<|LOC_22|>": 100319,
         | 
| 153 | 
            +
              "<|LOC_230|>": 100527,
         | 
| 154 | 
            +
              "<|LOC_231|>": 100528,
         | 
| 155 | 
            +
              "<|LOC_232|>": 100529,
         | 
| 156 | 
            +
              "<|LOC_233|>": 100530,
         | 
| 157 | 
            +
              "<|LOC_234|>": 100531,
         | 
| 158 | 
            +
              "<|LOC_235|>": 100532,
         | 
| 159 | 
            +
              "<|LOC_236|>": 100533,
         | 
| 160 | 
            +
              "<|LOC_237|>": 100534,
         | 
| 161 | 
            +
              "<|LOC_238|>": 100535,
         | 
| 162 | 
            +
              "<|LOC_239|>": 100536,
         | 
| 163 | 
            +
              "<|LOC_23|>": 100320,
         | 
| 164 | 
            +
              "<|LOC_240|>": 100537,
         | 
| 165 | 
            +
              "<|LOC_241|>": 100538,
         | 
| 166 | 
            +
              "<|LOC_242|>": 100539,
         | 
| 167 | 
            +
              "<|LOC_243|>": 100540,
         | 
| 168 | 
            +
              "<|LOC_244|>": 100541,
         | 
| 169 | 
            +
              "<|LOC_245|>": 100542,
         | 
| 170 | 
            +
              "<|LOC_246|>": 100543,
         | 
| 171 | 
            +
              "<|LOC_247|>": 100544,
         | 
| 172 | 
            +
              "<|LOC_248|>": 100545,
         | 
| 173 | 
            +
              "<|LOC_249|>": 100546,
         | 
| 174 | 
            +
              "<|LOC_24|>": 100321,
         | 
| 175 | 
            +
              "<|LOC_250|>": 100547,
         | 
| 176 | 
            +
              "<|LOC_251|>": 100548,
         | 
| 177 | 
            +
              "<|LOC_252|>": 100549,
         | 
| 178 | 
            +
              "<|LOC_253|>": 100550,
         | 
| 179 | 
            +
              "<|LOC_254|>": 100551,
         | 
| 180 | 
            +
              "<|LOC_255|>": 100552,
         | 
| 181 | 
            +
              "<|LOC_256|>": 100553,
         | 
| 182 | 
            +
              "<|LOC_257|>": 100554,
         | 
| 183 | 
            +
              "<|LOC_258|>": 100555,
         | 
| 184 | 
            +
              "<|LOC_259|>": 100556,
         | 
| 185 | 
            +
              "<|LOC_25|>": 100322,
         | 
| 186 | 
            +
              "<|LOC_260|>": 100557,
         | 
| 187 | 
            +
              "<|LOC_261|>": 100558,
         | 
| 188 | 
            +
              "<|LOC_262|>": 100559,
         | 
| 189 | 
            +
              "<|LOC_263|>": 100560,
         | 
| 190 | 
            +
              "<|LOC_264|>": 100561,
         | 
| 191 | 
            +
              "<|LOC_265|>": 100562,
         | 
| 192 | 
            +
              "<|LOC_266|>": 100563,
         | 
| 193 | 
            +
              "<|LOC_267|>": 100564,
         | 
| 194 | 
            +
              "<|LOC_268|>": 100565,
         | 
| 195 | 
            +
              "<|LOC_269|>": 100566,
         | 
| 196 | 
            +
              "<|LOC_26|>": 100323,
         | 
| 197 | 
            +
              "<|LOC_270|>": 100567,
         | 
| 198 | 
            +
              "<|LOC_271|>": 100568,
         | 
| 199 | 
            +
              "<|LOC_272|>": 100569,
         | 
| 200 | 
            +
              "<|LOC_273|>": 100570,
         | 
| 201 | 
            +
              "<|LOC_274|>": 100571,
         | 
| 202 | 
            +
              "<|LOC_275|>": 100572,
         | 
| 203 | 
            +
              "<|LOC_276|>": 100573,
         | 
| 204 | 
            +
              "<|LOC_277|>": 100574,
         | 
| 205 | 
            +
              "<|LOC_278|>": 100575,
         | 
| 206 | 
            +
              "<|LOC_279|>": 100576,
         | 
| 207 | 
            +
              "<|LOC_27|>": 100324,
         | 
| 208 | 
            +
              "<|LOC_280|>": 100577,
         | 
| 209 | 
            +
              "<|LOC_281|>": 100578,
         | 
| 210 | 
            +
              "<|LOC_282|>": 100579,
         | 
| 211 | 
            +
              "<|LOC_283|>": 100580,
         | 
| 212 | 
            +
              "<|LOC_284|>": 100581,
         | 
| 213 | 
            +
              "<|LOC_285|>": 100582,
         | 
| 214 | 
            +
              "<|LOC_286|>": 100583,
         | 
| 215 | 
            +
              "<|LOC_287|>": 100584,
         | 
| 216 | 
            +
              "<|LOC_288|>": 100585,
         | 
| 217 | 
            +
              "<|LOC_289|>": 100586,
         | 
| 218 | 
            +
              "<|LOC_28|>": 100325,
         | 
| 219 | 
            +
              "<|LOC_290|>": 100587,
         | 
| 220 | 
            +
              "<|LOC_291|>": 100588,
         | 
| 221 | 
            +
              "<|LOC_292|>": 100589,
         | 
| 222 | 
            +
              "<|LOC_293|>": 100590,
         | 
| 223 | 
            +
              "<|LOC_294|>": 100591,
         | 
| 224 | 
            +
              "<|LOC_295|>": 100592,
         | 
| 225 | 
            +
              "<|LOC_296|>": 100593,
         | 
| 226 | 
            +
              "<|LOC_297|>": 100594,
         | 
| 227 | 
            +
              "<|LOC_298|>": 100595,
         | 
| 228 | 
            +
              "<|LOC_299|>": 100596,
         | 
| 229 | 
            +
              "<|LOC_29|>": 100326,
         | 
| 230 | 
            +
              "<|LOC_2|>": 100299,
         | 
| 231 | 
            +
              "<|LOC_300|>": 100597,
         | 
| 232 | 
            +
              "<|LOC_301|>": 100598,
         | 
| 233 | 
            +
              "<|LOC_302|>": 100599,
         | 
| 234 | 
            +
              "<|LOC_303|>": 100600,
         | 
| 235 | 
            +
              "<|LOC_304|>": 100601,
         | 
| 236 | 
            +
              "<|LOC_305|>": 100602,
         | 
| 237 | 
            +
              "<|LOC_306|>": 100603,
         | 
| 238 | 
            +
              "<|LOC_307|>": 100604,
         | 
| 239 | 
            +
              "<|LOC_308|>": 100605,
         | 
| 240 | 
            +
              "<|LOC_309|>": 100606,
         | 
| 241 | 
            +
              "<|LOC_30|>": 100327,
         | 
| 242 | 
            +
              "<|LOC_310|>": 100607,
         | 
| 243 | 
            +
              "<|LOC_311|>": 100608,
         | 
| 244 | 
            +
              "<|LOC_312|>": 100609,
         | 
| 245 | 
            +
              "<|LOC_313|>": 100610,
         | 
| 246 | 
            +
              "<|LOC_314|>": 100611,
         | 
| 247 | 
            +
              "<|LOC_315|>": 100612,
         | 
| 248 | 
            +
              "<|LOC_316|>": 100613,
         | 
| 249 | 
            +
              "<|LOC_317|>": 100614,
         | 
| 250 | 
            +
              "<|LOC_318|>": 100615,
         | 
| 251 | 
            +
              "<|LOC_319|>": 100616,
         | 
| 252 | 
            +
              "<|LOC_31|>": 100328,
         | 
| 253 | 
            +
              "<|LOC_320|>": 100617,
         | 
| 254 | 
            +
              "<|LOC_321|>": 100618,
         | 
| 255 | 
            +
              "<|LOC_322|>": 100619,
         | 
| 256 | 
            +
              "<|LOC_323|>": 100620,
         | 
| 257 | 
            +
              "<|LOC_324|>": 100621,
         | 
| 258 | 
            +
              "<|LOC_325|>": 100622,
         | 
| 259 | 
            +
              "<|LOC_326|>": 100623,
         | 
| 260 | 
            +
              "<|LOC_327|>": 100624,
         | 
| 261 | 
            +
              "<|LOC_328|>": 100625,
         | 
| 262 | 
            +
              "<|LOC_329|>": 100626,
         | 
| 263 | 
            +
              "<|LOC_32|>": 100329,
         | 
| 264 | 
            +
              "<|LOC_330|>": 100627,
         | 
| 265 | 
            +
              "<|LOC_331|>": 100628,
         | 
| 266 | 
            +
              "<|LOC_332|>": 100629,
         | 
| 267 | 
            +
              "<|LOC_333|>": 100630,
         | 
| 268 | 
            +
              "<|LOC_334|>": 100631,
         | 
| 269 | 
            +
              "<|LOC_335|>": 100632,
         | 
| 270 | 
            +
              "<|LOC_336|>": 100633,
         | 
| 271 | 
            +
              "<|LOC_337|>": 100634,
         | 
| 272 | 
            +
              "<|LOC_338|>": 100635,
         | 
| 273 | 
            +
              "<|LOC_339|>": 100636,
         | 
| 274 | 
            +
              "<|LOC_33|>": 100330,
         | 
| 275 | 
            +
              "<|LOC_340|>": 100637,
         | 
| 276 | 
            +
              "<|LOC_341|>": 100638,
         | 
| 277 | 
            +
              "<|LOC_342|>": 100639,
         | 
| 278 | 
            +
              "<|LOC_343|>": 100640,
         | 
| 279 | 
            +
              "<|LOC_344|>": 100641,
         | 
| 280 | 
            +
              "<|LOC_345|>": 100642,
         | 
| 281 | 
            +
              "<|LOC_346|>": 100643,
         | 
| 282 | 
            +
              "<|LOC_347|>": 100644,
         | 
| 283 | 
            +
              "<|LOC_348|>": 100645,
         | 
| 284 | 
            +
              "<|LOC_349|>": 100646,
         | 
| 285 | 
            +
              "<|LOC_34|>": 100331,
         | 
| 286 | 
            +
              "<|LOC_350|>": 100647,
         | 
| 287 | 
            +
              "<|LOC_351|>": 100648,
         | 
| 288 | 
            +
              "<|LOC_352|>": 100649,
         | 
| 289 | 
            +
              "<|LOC_353|>": 100650,
         | 
| 290 | 
            +
              "<|LOC_354|>": 100651,
         | 
| 291 | 
            +
              "<|LOC_355|>": 100652,
         | 
| 292 | 
            +
              "<|LOC_356|>": 100653,
         | 
| 293 | 
            +
              "<|LOC_357|>": 100654,
         | 
| 294 | 
            +
              "<|LOC_358|>": 100655,
         | 
| 295 | 
            +
              "<|LOC_359|>": 100656,
         | 
| 296 | 
            +
              "<|LOC_35|>": 100332,
         | 
| 297 | 
            +
              "<|LOC_360|>": 100657,
         | 
| 298 | 
            +
              "<|LOC_361|>": 100658,
         | 
| 299 | 
            +
              "<|LOC_362|>": 100659,
         | 
| 300 | 
            +
              "<|LOC_363|>": 100660,
         | 
| 301 | 
            +
              "<|LOC_364|>": 100661,
         | 
| 302 | 
            +
              "<|LOC_365|>": 100662,
         | 
| 303 | 
            +
              "<|LOC_366|>": 100663,
         | 
| 304 | 
            +
              "<|LOC_367|>": 100664,
         | 
| 305 | 
            +
              "<|LOC_368|>": 100665,
         | 
| 306 | 
            +
              "<|LOC_369|>": 100666,
         | 
| 307 | 
            +
              "<|LOC_36|>": 100333,
         | 
| 308 | 
            +
              "<|LOC_370|>": 100667,
         | 
| 309 | 
            +
              "<|LOC_371|>": 100668,
         | 
| 310 | 
            +
              "<|LOC_372|>": 100669,
         | 
| 311 | 
            +
              "<|LOC_373|>": 100670,
         | 
| 312 | 
            +
              "<|LOC_374|>": 100671,
         | 
| 313 | 
            +
              "<|LOC_375|>": 100672,
         | 
| 314 | 
            +
              "<|LOC_376|>": 100673,
         | 
| 315 | 
            +
              "<|LOC_377|>": 100674,
         | 
| 316 | 
            +
              "<|LOC_378|>": 100675,
         | 
| 317 | 
            +
              "<|LOC_379|>": 100676,
         | 
| 318 | 
            +
              "<|LOC_37|>": 100334,
         | 
| 319 | 
            +
              "<|LOC_380|>": 100677,
         | 
| 320 | 
            +
              "<|LOC_381|>": 100678,
         | 
| 321 | 
            +
              "<|LOC_382|>": 100679,
         | 
| 322 | 
            +
              "<|LOC_383|>": 100680,
         | 
| 323 | 
            +
              "<|LOC_384|>": 100681,
         | 
| 324 | 
            +
              "<|LOC_385|>": 100682,
         | 
| 325 | 
            +
              "<|LOC_386|>": 100683,
         | 
| 326 | 
            +
              "<|LOC_387|>": 100684,
         | 
| 327 | 
            +
              "<|LOC_388|>": 100685,
         | 
| 328 | 
            +
              "<|LOC_389|>": 100686,
         | 
| 329 | 
            +
              "<|LOC_38|>": 100335,
         | 
| 330 | 
            +
              "<|LOC_390|>": 100687,
         | 
| 331 | 
            +
              "<|LOC_391|>": 100688,
         | 
| 332 | 
            +
              "<|LOC_392|>": 100689,
         | 
| 333 | 
            +
              "<|LOC_393|>": 100690,
         | 
| 334 | 
            +
              "<|LOC_394|>": 100691,
         | 
| 335 | 
            +
              "<|LOC_395|>": 100692,
         | 
| 336 | 
            +
              "<|LOC_396|>": 100693,
         | 
| 337 | 
            +
              "<|LOC_397|>": 100694,
         | 
| 338 | 
            +
              "<|LOC_398|>": 100695,
         | 
| 339 | 
            +
              "<|LOC_399|>": 100696,
         | 
| 340 | 
            +
              "<|LOC_39|>": 100336,
         | 
| 341 | 
            +
              "<|LOC_3|>": 100300,
         | 
| 342 | 
            +
              "<|LOC_400|>": 100697,
         | 
| 343 | 
            +
              "<|LOC_401|>": 100698,
         | 
| 344 | 
            +
              "<|LOC_402|>": 100699,
         | 
| 345 | 
            +
              "<|LOC_403|>": 100700,
         | 
| 346 | 
            +
              "<|LOC_404|>": 100701,
         | 
| 347 | 
            +
              "<|LOC_405|>": 100702,
         | 
| 348 | 
            +
              "<|LOC_406|>": 100703,
         | 
| 349 | 
            +
              "<|LOC_407|>": 100704,
         | 
| 350 | 
            +
              "<|LOC_408|>": 100705,
         | 
| 351 | 
            +
              "<|LOC_409|>": 100706,
         | 
| 352 | 
            +
              "<|LOC_40|>": 100337,
         | 
| 353 | 
            +
              "<|LOC_410|>": 100707,
         | 
| 354 | 
            +
              "<|LOC_411|>": 100708,
         | 
| 355 | 
            +
              "<|LOC_412|>": 100709,
         | 
| 356 | 
            +
              "<|LOC_413|>": 100710,
         | 
| 357 | 
            +
              "<|LOC_414|>": 100711,
         | 
| 358 | 
            +
              "<|LOC_415|>": 100712,
         | 
| 359 | 
            +
              "<|LOC_416|>": 100713,
         | 
| 360 | 
            +
              "<|LOC_417|>": 100714,
         | 
| 361 | 
            +
              "<|LOC_418|>": 100715,
         | 
| 362 | 
            +
              "<|LOC_419|>": 100716,
         | 
| 363 | 
            +
              "<|LOC_41|>": 100338,
         | 
| 364 | 
            +
              "<|LOC_420|>": 100717,
         | 
| 365 | 
            +
              "<|LOC_421|>": 100718,
         | 
| 366 | 
            +
              "<|LOC_422|>": 100719,
         | 
| 367 | 
            +
              "<|LOC_423|>": 100720,
         | 
| 368 | 
            +
              "<|LOC_424|>": 100721,
         | 
| 369 | 
            +
              "<|LOC_425|>": 100722,
         | 
| 370 | 
            +
              "<|LOC_426|>": 100723,
         | 
| 371 | 
            +
              "<|LOC_427|>": 100724,
         | 
| 372 | 
            +
              "<|LOC_428|>": 100725,
         | 
| 373 | 
            +
              "<|LOC_429|>": 100726,
         | 
| 374 | 
            +
              "<|LOC_42|>": 100339,
         | 
| 375 | 
            +
              "<|LOC_430|>": 100727,
         | 
| 376 | 
            +
              "<|LOC_431|>": 100728,
         | 
| 377 | 
            +
              "<|LOC_432|>": 100729,
         | 
| 378 | 
            +
              "<|LOC_433|>": 100730,
         | 
| 379 | 
            +
              "<|LOC_434|>": 100731,
         | 
| 380 | 
            +
              "<|LOC_435|>": 100732,
         | 
| 381 | 
            +
              "<|LOC_436|>": 100733,
         | 
| 382 | 
            +
              "<|LOC_437|>": 100734,
         | 
| 383 | 
            +
              "<|LOC_438|>": 100735,
         | 
| 384 | 
            +
              "<|LOC_439|>": 100736,
         | 
| 385 | 
            +
              "<|LOC_43|>": 100340,
         | 
| 386 | 
            +
              "<|LOC_440|>": 100737,
         | 
| 387 | 
            +
              "<|LOC_441|>": 100738,
         | 
| 388 | 
            +
              "<|LOC_442|>": 100739,
         | 
| 389 | 
            +
              "<|LOC_443|>": 100740,
         | 
| 390 | 
            +
              "<|LOC_444|>": 100741,
         | 
| 391 | 
            +
              "<|LOC_445|>": 100742,
         | 
| 392 | 
            +
              "<|LOC_446|>": 100743,
         | 
| 393 | 
            +
              "<|LOC_447|>": 100744,
         | 
| 394 | 
            +
              "<|LOC_448|>": 100745,
         | 
| 395 | 
            +
              "<|LOC_449|>": 100746,
         | 
| 396 | 
            +
              "<|LOC_44|>": 100341,
         | 
| 397 | 
            +
              "<|LOC_450|>": 100747,
         | 
| 398 | 
            +
              "<|LOC_451|>": 100748,
         | 
| 399 | 
            +
              "<|LOC_452|>": 100749,
         | 
| 400 | 
            +
              "<|LOC_453|>": 100750,
         | 
| 401 | 
            +
              "<|LOC_454|>": 100751,
         | 
| 402 | 
            +
              "<|LOC_455|>": 100752,
         | 
| 403 | 
            +
              "<|LOC_456|>": 100753,
         | 
| 404 | 
            +
              "<|LOC_457|>": 100754,
         | 
| 405 | 
            +
              "<|LOC_458|>": 100755,
         | 
| 406 | 
            +
              "<|LOC_459|>": 100756,
         | 
| 407 | 
            +
              "<|LOC_45|>": 100342,
         | 
| 408 | 
            +
              "<|LOC_460|>": 100757,
         | 
| 409 | 
            +
              "<|LOC_461|>": 100758,
         | 
| 410 | 
            +
              "<|LOC_462|>": 100759,
         | 
| 411 | 
            +
              "<|LOC_463|>": 100760,
         | 
| 412 | 
            +
              "<|LOC_464|>": 100761,
         | 
| 413 | 
            +
              "<|LOC_465|>": 100762,
         | 
| 414 | 
            +
              "<|LOC_466|>": 100763,
         | 
| 415 | 
            +
              "<|LOC_467|>": 100764,
         | 
| 416 | 
            +
              "<|LOC_468|>": 100765,
         | 
| 417 | 
            +
              "<|LOC_469|>": 100766,
         | 
| 418 | 
            +
              "<|LOC_46|>": 100343,
         | 
| 419 | 
            +
              "<|LOC_470|>": 100767,
         | 
| 420 | 
            +
              "<|LOC_471|>": 100768,
         | 
| 421 | 
            +
              "<|LOC_472|>": 100769,
         | 
| 422 | 
            +
              "<|LOC_473|>": 100770,
         | 
| 423 | 
            +
              "<|LOC_474|>": 100771,
         | 
| 424 | 
            +
              "<|LOC_475|>": 100772,
         | 
| 425 | 
            +
              "<|LOC_476|>": 100773,
         | 
| 426 | 
            +
              "<|LOC_477|>": 100774,
         | 
| 427 | 
            +
              "<|LOC_478|>": 100775,
         | 
| 428 | 
            +
              "<|LOC_479|>": 100776,
         | 
| 429 | 
            +
              "<|LOC_47|>": 100344,
         | 
| 430 | 
            +
              "<|LOC_480|>": 100777,
         | 
| 431 | 
            +
              "<|LOC_481|>": 100778,
         | 
| 432 | 
            +
              "<|LOC_482|>": 100779,
         | 
| 433 | 
            +
              "<|LOC_483|>": 100780,
         | 
| 434 | 
            +
              "<|LOC_484|>": 100781,
         | 
| 435 | 
            +
              "<|LOC_485|>": 100782,
         | 
| 436 | 
            +
              "<|LOC_486|>": 100783,
         | 
| 437 | 
            +
              "<|LOC_487|>": 100784,
         | 
| 438 | 
            +
              "<|LOC_488|>": 100785,
         | 
| 439 | 
            +
              "<|LOC_489|>": 100786,
         | 
| 440 | 
            +
              "<|LOC_48|>": 100345,
         | 
| 441 | 
            +
              "<|LOC_490|>": 100787,
         | 
| 442 | 
            +
              "<|LOC_491|>": 100788,
         | 
| 443 | 
            +
              "<|LOC_492|>": 100789,
         | 
| 444 | 
            +
              "<|LOC_493|>": 100790,
         | 
| 445 | 
            +
              "<|LOC_494|>": 100791,
         | 
| 446 | 
            +
              "<|LOC_495|>": 100792,
         | 
| 447 | 
            +
              "<|LOC_496|>": 100793,
         | 
| 448 | 
            +
              "<|LOC_497|>": 100794,
         | 
| 449 | 
            +
              "<|LOC_498|>": 100795,
         | 
| 450 | 
            +
              "<|LOC_499|>": 100796,
         | 
| 451 | 
            +
              "<|LOC_49|>": 100346,
         | 
| 452 | 
            +
              "<|LOC_4|>": 100301,
         | 
| 453 | 
            +
              "<|LOC_500|>": 100797,
         | 
| 454 | 
            +
              "<|LOC_501|>": 100798,
         | 
| 455 | 
            +
              "<|LOC_502|>": 100799,
         | 
| 456 | 
            +
              "<|LOC_503|>": 100800,
         | 
| 457 | 
            +
              "<|LOC_504|>": 100801,
         | 
| 458 | 
            +
              "<|LOC_505|>": 100802,
         | 
| 459 | 
            +
              "<|LOC_506|>": 100803,
         | 
| 460 | 
            +
              "<|LOC_507|>": 100804,
         | 
| 461 | 
            +
              "<|LOC_508|>": 100805,
         | 
| 462 | 
            +
              "<|LOC_509|>": 100806,
         | 
| 463 | 
            +
              "<|LOC_50|>": 100347,
         | 
| 464 | 
            +
              "<|LOC_510|>": 100807,
         | 
| 465 | 
            +
              "<|LOC_511|>": 100808,
         | 
| 466 | 
            +
              "<|LOC_512|>": 100809,
         | 
| 467 | 
            +
              "<|LOC_513|>": 100810,
         | 
| 468 | 
            +
              "<|LOC_514|>": 100811,
         | 
| 469 | 
            +
              "<|LOC_515|>": 100812,
         | 
| 470 | 
            +
              "<|LOC_516|>": 100813,
         | 
| 471 | 
            +
              "<|LOC_517|>": 100814,
         | 
| 472 | 
            +
              "<|LOC_518|>": 100815,
         | 
| 473 | 
            +
              "<|LOC_519|>": 100816,
         | 
| 474 | 
            +
              "<|LOC_51|>": 100348,
         | 
| 475 | 
            +
              "<|LOC_520|>": 100817,
         | 
| 476 | 
            +
              "<|LOC_521|>": 100818,
         | 
| 477 | 
            +
              "<|LOC_522|>": 100819,
         | 
| 478 | 
            +
              "<|LOC_523|>": 100820,
         | 
| 479 | 
            +
              "<|LOC_524|>": 100821,
         | 
| 480 | 
            +
              "<|LOC_525|>": 100822,
         | 
| 481 | 
            +
              "<|LOC_526|>": 100823,
         | 
| 482 | 
            +
              "<|LOC_527|>": 100824,
         | 
| 483 | 
            +
              "<|LOC_528|>": 100825,
         | 
| 484 | 
            +
              "<|LOC_529|>": 100826,
         | 
| 485 | 
            +
              "<|LOC_52|>": 100349,
         | 
| 486 | 
            +
              "<|LOC_530|>": 100827,
         | 
| 487 | 
            +
              "<|LOC_531|>": 100828,
         | 
| 488 | 
            +
              "<|LOC_532|>": 100829,
         | 
| 489 | 
            +
              "<|LOC_533|>": 100830,
         | 
| 490 | 
            +
              "<|LOC_534|>": 100831,
         | 
| 491 | 
            +
              "<|LOC_535|>": 100832,
         | 
| 492 | 
            +
              "<|LOC_536|>": 100833,
         | 
| 493 | 
            +
              "<|LOC_537|>": 100834,
         | 
| 494 | 
            +
              "<|LOC_538|>": 100835,
         | 
| 495 | 
            +
              "<|LOC_539|>": 100836,
         | 
| 496 | 
            +
              "<|LOC_53|>": 100350,
         | 
| 497 | 
            +
              "<|LOC_540|>": 100837,
         | 
| 498 | 
            +
              "<|LOC_541|>": 100838,
         | 
| 499 | 
            +
              "<|LOC_542|>": 100839,
         | 
| 500 | 
            +
              "<|LOC_543|>": 100840,
         | 
| 501 | 
            +
              "<|LOC_544|>": 100841,
         | 
| 502 | 
            +
              "<|LOC_545|>": 100842,
         | 
| 503 | 
            +
              "<|LOC_546|>": 100843,
         | 
| 504 | 
            +
              "<|LOC_547|>": 100844,
         | 
| 505 | 
            +
              "<|LOC_548|>": 100845,
         | 
| 506 | 
            +
              "<|LOC_549|>": 100846,
         | 
| 507 | 
            +
              "<|LOC_54|>": 100351,
         | 
| 508 | 
            +
              "<|LOC_550|>": 100847,
         | 
| 509 | 
            +
              "<|LOC_551|>": 100848,
         | 
| 510 | 
            +
              "<|LOC_552|>": 100849,
         | 
| 511 | 
            +
              "<|LOC_553|>": 100850,
         | 
| 512 | 
            +
              "<|LOC_554|>": 100851,
         | 
| 513 | 
            +
              "<|LOC_555|>": 100852,
         | 
| 514 | 
            +
              "<|LOC_556|>": 100853,
         | 
| 515 | 
            +
              "<|LOC_557|>": 100854,
         | 
| 516 | 
            +
              "<|LOC_558|>": 100855,
         | 
| 517 | 
            +
              "<|LOC_559|>": 100856,
         | 
| 518 | 
            +
              "<|LOC_55|>": 100352,
         | 
| 519 | 
            +
              "<|LOC_560|>": 100857,
         | 
| 520 | 
            +
              "<|LOC_561|>": 100858,
         | 
| 521 | 
            +
              "<|LOC_562|>": 100859,
         | 
| 522 | 
            +
              "<|LOC_563|>": 100860,
         | 
| 523 | 
            +
              "<|LOC_564|>": 100861,
         | 
| 524 | 
            +
              "<|LOC_565|>": 100862,
         | 
| 525 | 
            +
              "<|LOC_566|>": 100863,
         | 
| 526 | 
            +
              "<|LOC_567|>": 100864,
         | 
| 527 | 
            +
              "<|LOC_568|>": 100865,
         | 
| 528 | 
            +
              "<|LOC_569|>": 100866,
         | 
| 529 | 
            +
              "<|LOC_56|>": 100353,
         | 
| 530 | 
            +
              "<|LOC_570|>": 100867,
         | 
| 531 | 
            +
              "<|LOC_571|>": 100868,
         | 
| 532 | 
            +
              "<|LOC_572|>": 100869,
         | 
| 533 | 
            +
              "<|LOC_573|>": 100870,
         | 
| 534 | 
            +
              "<|LOC_574|>": 100871,
         | 
| 535 | 
            +
              "<|LOC_575|>": 100872,
         | 
| 536 | 
            +
              "<|LOC_576|>": 100873,
         | 
| 537 | 
            +
              "<|LOC_577|>": 100874,
         | 
| 538 | 
            +
              "<|LOC_578|>": 100875,
         | 
| 539 | 
            +
              "<|LOC_579|>": 100876,
         | 
| 540 | 
            +
              "<|LOC_57|>": 100354,
         | 
| 541 | 
            +
              "<|LOC_580|>": 100877,
         | 
| 542 | 
            +
              "<|LOC_581|>": 100878,
         | 
| 543 | 
            +
              "<|LOC_582|>": 100879,
         | 
| 544 | 
            +
              "<|LOC_583|>": 100880,
         | 
| 545 | 
            +
              "<|LOC_584|>": 100881,
         | 
| 546 | 
            +
              "<|LOC_585|>": 100882,
         | 
| 547 | 
            +
              "<|LOC_586|>": 100883,
         | 
| 548 | 
            +
              "<|LOC_587|>": 100884,
         | 
| 549 | 
            +
              "<|LOC_588|>": 100885,
         | 
| 550 | 
            +
              "<|LOC_589|>": 100886,
         | 
| 551 | 
            +
              "<|LOC_58|>": 100355,
         | 
| 552 | 
            +
              "<|LOC_590|>": 100887,
         | 
| 553 | 
            +
              "<|LOC_591|>": 100888,
         | 
| 554 | 
            +
              "<|LOC_592|>": 100889,
         | 
| 555 | 
            +
              "<|LOC_593|>": 100890,
         | 
| 556 | 
            +
              "<|LOC_594|>": 100891,
         | 
| 557 | 
            +
              "<|LOC_595|>": 100892,
         | 
| 558 | 
            +
              "<|LOC_596|>": 100893,
         | 
| 559 | 
            +
              "<|LOC_597|>": 100894,
         | 
| 560 | 
            +
              "<|LOC_598|>": 100895,
         | 
| 561 | 
            +
              "<|LOC_599|>": 100896,
         | 
| 562 | 
            +
              "<|LOC_59|>": 100356,
         | 
| 563 | 
            +
              "<|LOC_5|>": 100302,
         | 
| 564 | 
            +
              "<|LOC_600|>": 100897,
         | 
| 565 | 
            +
              "<|LOC_601|>": 100898,
         | 
| 566 | 
            +
              "<|LOC_602|>": 100899,
         | 
| 567 | 
            +
              "<|LOC_603|>": 100900,
         | 
| 568 | 
            +
              "<|LOC_604|>": 100901,
         | 
| 569 | 
            +
              "<|LOC_605|>": 100902,
         | 
| 570 | 
            +
              "<|LOC_606|>": 100903,
         | 
| 571 | 
            +
              "<|LOC_607|>": 100904,
         | 
| 572 | 
            +
              "<|LOC_608|>": 100905,
         | 
| 573 | 
            +
              "<|LOC_609|>": 100906,
         | 
| 574 | 
            +
              "<|LOC_60|>": 100357,
         | 
| 575 | 
            +
              "<|LOC_610|>": 100907,
         | 
| 576 | 
            +
              "<|LOC_611|>": 100908,
         | 
| 577 | 
            +
              "<|LOC_612|>": 100909,
         | 
| 578 | 
            +
              "<|LOC_613|>": 100910,
         | 
| 579 | 
            +
              "<|LOC_614|>": 100911,
         | 
| 580 | 
            +
              "<|LOC_615|>": 100912,
         | 
| 581 | 
            +
              "<|LOC_616|>": 100913,
         | 
| 582 | 
            +
              "<|LOC_617|>": 100914,
         | 
| 583 | 
            +
              "<|LOC_618|>": 100915,
         | 
| 584 | 
            +
              "<|LOC_619|>": 100916,
         | 
| 585 | 
            +
              "<|LOC_61|>": 100358,
         | 
| 586 | 
            +
              "<|LOC_620|>": 100917,
         | 
| 587 | 
            +
              "<|LOC_621|>": 100918,
         | 
| 588 | 
            +
              "<|LOC_622|>": 100919,
         | 
| 589 | 
            +
              "<|LOC_623|>": 100920,
         | 
| 590 | 
            +
              "<|LOC_624|>": 100921,
         | 
| 591 | 
            +
              "<|LOC_625|>": 100922,
         | 
| 592 | 
            +
              "<|LOC_626|>": 100923,
         | 
| 593 | 
            +
              "<|LOC_627|>": 100924,
         | 
| 594 | 
            +
              "<|LOC_628|>": 100925,
         | 
| 595 | 
            +
              "<|LOC_629|>": 100926,
         | 
| 596 | 
            +
              "<|LOC_62|>": 100359,
         | 
| 597 | 
            +
              "<|LOC_630|>": 100927,
         | 
| 598 | 
            +
              "<|LOC_631|>": 100928,
         | 
| 599 | 
            +
              "<|LOC_632|>": 100929,
         | 
| 600 | 
            +
              "<|LOC_633|>": 100930,
         | 
| 601 | 
            +
              "<|LOC_634|>": 100931,
         | 
| 602 | 
            +
              "<|LOC_635|>": 100932,
         | 
| 603 | 
            +
              "<|LOC_636|>": 100933,
         | 
| 604 | 
            +
              "<|LOC_637|>": 100934,
         | 
| 605 | 
            +
              "<|LOC_638|>": 100935,
         | 
| 606 | 
            +
              "<|LOC_639|>": 100936,
         | 
| 607 | 
            +
              "<|LOC_63|>": 100360,
         | 
| 608 | 
            +
              "<|LOC_640|>": 100937,
         | 
| 609 | 
            +
              "<|LOC_641|>": 100938,
         | 
| 610 | 
            +
              "<|LOC_642|>": 100939,
         | 
| 611 | 
            +
              "<|LOC_643|>": 100940,
         | 
| 612 | 
            +
              "<|LOC_644|>": 100941,
         | 
| 613 | 
            +
              "<|LOC_645|>": 100942,
         | 
| 614 | 
            +
              "<|LOC_646|>": 100943,
         | 
| 615 | 
            +
              "<|LOC_647|>": 100944,
         | 
| 616 | 
            +
              "<|LOC_648|>": 100945,
         | 
| 617 | 
            +
              "<|LOC_649|>": 100946,
         | 
| 618 | 
            +
              "<|LOC_64|>": 100361,
         | 
| 619 | 
            +
              "<|LOC_650|>": 100947,
         | 
| 620 | 
            +
              "<|LOC_651|>": 100948,
         | 
| 621 | 
            +
              "<|LOC_652|>": 100949,
         | 
| 622 | 
            +
              "<|LOC_653|>": 100950,
         | 
| 623 | 
            +
              "<|LOC_654|>": 100951,
         | 
| 624 | 
            +
              "<|LOC_655|>": 100952,
         | 
| 625 | 
            +
              "<|LOC_656|>": 100953,
         | 
| 626 | 
            +
              "<|LOC_657|>": 100954,
         | 
| 627 | 
            +
              "<|LOC_658|>": 100955,
         | 
| 628 | 
            +
              "<|LOC_659|>": 100956,
         | 
| 629 | 
            +
              "<|LOC_65|>": 100362,
         | 
| 630 | 
            +
              "<|LOC_660|>": 100957,
         | 
| 631 | 
            +
              "<|LOC_661|>": 100958,
         | 
| 632 | 
            +
              "<|LOC_662|>": 100959,
         | 
| 633 | 
            +
              "<|LOC_663|>": 100960,
         | 
| 634 | 
            +
              "<|LOC_664|>": 100961,
         | 
| 635 | 
            +
              "<|LOC_665|>": 100962,
         | 
| 636 | 
            +
              "<|LOC_666|>": 100963,
         | 
| 637 | 
            +
              "<|LOC_667|>": 100964,
         | 
| 638 | 
            +
              "<|LOC_668|>": 100965,
         | 
| 639 | 
            +
              "<|LOC_669|>": 100966,
         | 
| 640 | 
            +
              "<|LOC_66|>": 100363,
         | 
| 641 | 
            +
              "<|LOC_670|>": 100967,
         | 
| 642 | 
            +
              "<|LOC_671|>": 100968,
         | 
| 643 | 
            +
              "<|LOC_672|>": 100969,
         | 
| 644 | 
            +
              "<|LOC_673|>": 100970,
         | 
| 645 | 
            +
              "<|LOC_674|>": 100971,
         | 
| 646 | 
            +
              "<|LOC_675|>": 100972,
         | 
| 647 | 
            +
              "<|LOC_676|>": 100973,
         | 
| 648 | 
            +
              "<|LOC_677|>": 100974,
         | 
| 649 | 
            +
              "<|LOC_678|>": 100975,
         | 
| 650 | 
            +
              "<|LOC_679|>": 100976,
         | 
| 651 | 
            +
              "<|LOC_67|>": 100364,
         | 
| 652 | 
            +
              "<|LOC_680|>": 100977,
         | 
| 653 | 
            +
              "<|LOC_681|>": 100978,
         | 
| 654 | 
            +
              "<|LOC_682|>": 100979,
         | 
| 655 | 
            +
              "<|LOC_683|>": 100980,
         | 
| 656 | 
            +
              "<|LOC_684|>": 100981,
         | 
| 657 | 
            +
              "<|LOC_685|>": 100982,
         | 
| 658 | 
            +
              "<|LOC_686|>": 100983,
         | 
| 659 | 
            +
              "<|LOC_687|>": 100984,
         | 
| 660 | 
            +
              "<|LOC_688|>": 100985,
         | 
| 661 | 
            +
              "<|LOC_689|>": 100986,
         | 
| 662 | 
            +
              "<|LOC_68|>": 100365,
         | 
| 663 | 
            +
              "<|LOC_690|>": 100987,
         | 
| 664 | 
            +
              "<|LOC_691|>": 100988,
         | 
| 665 | 
            +
              "<|LOC_692|>": 100989,
         | 
| 666 | 
            +
              "<|LOC_693|>": 100990,
         | 
| 667 | 
            +
              "<|LOC_694|>": 100991,
         | 
| 668 | 
            +
              "<|LOC_695|>": 100992,
         | 
| 669 | 
            +
              "<|LOC_696|>": 100993,
         | 
| 670 | 
            +
              "<|LOC_697|>": 100994,
         | 
| 671 | 
            +
              "<|LOC_698|>": 100995,
         | 
| 672 | 
            +
              "<|LOC_699|>": 100996,
         | 
| 673 | 
            +
              "<|LOC_69|>": 100366,
         | 
| 674 | 
            +
              "<|LOC_6|>": 100303,
         | 
| 675 | 
            +
              "<|LOC_700|>": 100997,
         | 
| 676 | 
            +
              "<|LOC_701|>": 100998,
         | 
| 677 | 
            +
              "<|LOC_702|>": 100999,
         | 
| 678 | 
            +
              "<|LOC_703|>": 101000,
         | 
| 679 | 
            +
              "<|LOC_704|>": 101001,
         | 
| 680 | 
            +
              "<|LOC_705|>": 101002,
         | 
| 681 | 
            +
              "<|LOC_706|>": 101003,
         | 
| 682 | 
            +
              "<|LOC_707|>": 101004,
         | 
| 683 | 
            +
              "<|LOC_708|>": 101005,
         | 
| 684 | 
            +
              "<|LOC_709|>": 101006,
         | 
| 685 | 
            +
              "<|LOC_70|>": 100367,
         | 
| 686 | 
            +
              "<|LOC_710|>": 101007,
         | 
| 687 | 
            +
              "<|LOC_711|>": 101008,
         | 
| 688 | 
            +
              "<|LOC_712|>": 101009,
         | 
| 689 | 
            +
              "<|LOC_713|>": 101010,
         | 
| 690 | 
            +
              "<|LOC_714|>": 101011,
         | 
| 691 | 
            +
              "<|LOC_715|>": 101012,
         | 
| 692 | 
            +
              "<|LOC_716|>": 101013,
         | 
| 693 | 
            +
              "<|LOC_717|>": 101014,
         | 
| 694 | 
            +
              "<|LOC_718|>": 101015,
         | 
| 695 | 
            +
              "<|LOC_719|>": 101016,
         | 
| 696 | 
            +
              "<|LOC_71|>": 100368,
         | 
| 697 | 
            +
              "<|LOC_720|>": 101017,
         | 
| 698 | 
            +
              "<|LOC_721|>": 101018,
         | 
| 699 | 
            +
              "<|LOC_722|>": 101019,
         | 
| 700 | 
            +
              "<|LOC_723|>": 101020,
         | 
| 701 | 
            +
              "<|LOC_724|>": 101021,
         | 
| 702 | 
            +
              "<|LOC_725|>": 101022,
         | 
| 703 | 
            +
              "<|LOC_726|>": 101023,
         | 
| 704 | 
            +
              "<|LOC_727|>": 101024,
         | 
| 705 | 
            +
              "<|LOC_728|>": 101025,
         | 
| 706 | 
            +
              "<|LOC_729|>": 101026,
         | 
| 707 | 
            +
              "<|LOC_72|>": 100369,
         | 
| 708 | 
            +
              "<|LOC_730|>": 101027,
         | 
| 709 | 
            +
              "<|LOC_731|>": 101028,
         | 
| 710 | 
            +
              "<|LOC_732|>": 101029,
         | 
| 711 | 
            +
              "<|LOC_733|>": 101030,
         | 
| 712 | 
            +
              "<|LOC_734|>": 101031,
         | 
| 713 | 
            +
              "<|LOC_735|>": 101032,
         | 
| 714 | 
            +
              "<|LOC_736|>": 101033,
         | 
| 715 | 
            +
              "<|LOC_737|>": 101034,
         | 
| 716 | 
            +
              "<|LOC_738|>": 101035,
         | 
| 717 | 
            +
              "<|LOC_739|>": 101036,
         | 
| 718 | 
            +
              "<|LOC_73|>": 100370,
         | 
| 719 | 
            +
              "<|LOC_740|>": 101037,
         | 
| 720 | 
            +
              "<|LOC_741|>": 101038,
         | 
| 721 | 
            +
              "<|LOC_742|>": 101039,
         | 
| 722 | 
            +
              "<|LOC_743|>": 101040,
         | 
| 723 | 
            +
              "<|LOC_744|>": 101041,
         | 
| 724 | 
            +
              "<|LOC_745|>": 101042,
         | 
| 725 | 
            +
              "<|LOC_746|>": 101043,
         | 
| 726 | 
            +
              "<|LOC_747|>": 101044,
         | 
| 727 | 
            +
              "<|LOC_748|>": 101045,
         | 
| 728 | 
            +
              "<|LOC_749|>": 101046,
         | 
| 729 | 
            +
              "<|LOC_74|>": 100371,
         | 
| 730 | 
            +
              "<|LOC_750|>": 101047,
         | 
| 731 | 
            +
              "<|LOC_751|>": 101048,
         | 
| 732 | 
            +
              "<|LOC_752|>": 101049,
         | 
| 733 | 
            +
              "<|LOC_753|>": 101050,
         | 
| 734 | 
            +
              "<|LOC_754|>": 101051,
         | 
| 735 | 
            +
              "<|LOC_755|>": 101052,
         | 
| 736 | 
            +
              "<|LOC_756|>": 101053,
         | 
| 737 | 
            +
              "<|LOC_757|>": 101054,
         | 
| 738 | 
            +
              "<|LOC_758|>": 101055,
         | 
| 739 | 
            +
              "<|LOC_759|>": 101056,
         | 
| 740 | 
            +
              "<|LOC_75|>": 100372,
         | 
| 741 | 
            +
              "<|LOC_760|>": 101057,
         | 
| 742 | 
            +
              "<|LOC_761|>": 101058,
         | 
| 743 | 
            +
              "<|LOC_762|>": 101059,
         | 
| 744 | 
            +
              "<|LOC_763|>": 101060,
         | 
| 745 | 
            +
              "<|LOC_764|>": 101061,
         | 
| 746 | 
            +
              "<|LOC_765|>": 101062,
         | 
| 747 | 
            +
              "<|LOC_766|>": 101063,
         | 
| 748 | 
            +
              "<|LOC_767|>": 101064,
         | 
| 749 | 
            +
              "<|LOC_768|>": 101065,
         | 
| 750 | 
            +
              "<|LOC_769|>": 101066,
         | 
| 751 | 
            +
              "<|LOC_76|>": 100373,
         | 
| 752 | 
            +
              "<|LOC_770|>": 101067,
         | 
| 753 | 
            +
              "<|LOC_771|>": 101068,
         | 
| 754 | 
            +
              "<|LOC_772|>": 101069,
         | 
| 755 | 
            +
              "<|LOC_773|>": 101070,
         | 
| 756 | 
            +
              "<|LOC_774|>": 101071,
         | 
| 757 | 
            +
              "<|LOC_775|>": 101072,
         | 
| 758 | 
            +
              "<|LOC_776|>": 101073,
         | 
| 759 | 
            +
              "<|LOC_777|>": 101074,
         | 
| 760 | 
            +
              "<|LOC_778|>": 101075,
         | 
| 761 | 
            +
              "<|LOC_779|>": 101076,
         | 
| 762 | 
            +
              "<|LOC_77|>": 100374,
         | 
| 763 | 
            +
              "<|LOC_780|>": 101077,
         | 
| 764 | 
            +
              "<|LOC_781|>": 101078,
         | 
| 765 | 
            +
              "<|LOC_782|>": 101079,
         | 
| 766 | 
            +
              "<|LOC_783|>": 101080,
         | 
| 767 | 
            +
              "<|LOC_784|>": 101081,
         | 
| 768 | 
            +
              "<|LOC_785|>": 101082,
         | 
| 769 | 
            +
              "<|LOC_786|>": 101083,
         | 
| 770 | 
            +
              "<|LOC_787|>": 101084,
         | 
| 771 | 
            +
              "<|LOC_788|>": 101085,
         | 
| 772 | 
            +
              "<|LOC_789|>": 101086,
         | 
| 773 | 
            +
              "<|LOC_78|>": 100375,
         | 
| 774 | 
            +
              "<|LOC_790|>": 101087,
         | 
| 775 | 
            +
              "<|LOC_791|>": 101088,
         | 
| 776 | 
            +
              "<|LOC_792|>": 101089,
         | 
| 777 | 
            +
              "<|LOC_793|>": 101090,
         | 
| 778 | 
            +
              "<|LOC_794|>": 101091,
         | 
| 779 | 
            +
              "<|LOC_795|>": 101092,
         | 
| 780 | 
            +
              "<|LOC_796|>": 101093,
         | 
| 781 | 
            +
              "<|LOC_797|>": 101094,
         | 
| 782 | 
            +
              "<|LOC_798|>": 101095,
         | 
| 783 | 
            +
              "<|LOC_799|>": 101096,
         | 
| 784 | 
            +
              "<|LOC_79|>": 100376,
         | 
| 785 | 
            +
              "<|LOC_7|>": 100304,
         | 
| 786 | 
            +
              "<|LOC_800|>": 101097,
         | 
| 787 | 
            +
              "<|LOC_801|>": 101098,
         | 
| 788 | 
            +
              "<|LOC_802|>": 101099,
         | 
| 789 | 
            +
              "<|LOC_803|>": 101100,
         | 
| 790 | 
            +
              "<|LOC_804|>": 101101,
         | 
| 791 | 
            +
              "<|LOC_805|>": 101102,
         | 
| 792 | 
            +
              "<|LOC_806|>": 101103,
         | 
| 793 | 
            +
              "<|LOC_807|>": 101104,
         | 
| 794 | 
            +
              "<|LOC_808|>": 101105,
         | 
| 795 | 
            +
              "<|LOC_809|>": 101106,
         | 
| 796 | 
            +
              "<|LOC_80|>": 100377,
         | 
| 797 | 
            +
              "<|LOC_810|>": 101107,
         | 
| 798 | 
            +
              "<|LOC_811|>": 101108,
         | 
| 799 | 
            +
              "<|LOC_812|>": 101109,
         | 
| 800 | 
            +
              "<|LOC_813|>": 101110,
         | 
| 801 | 
            +
              "<|LOC_814|>": 101111,
         | 
| 802 | 
            +
              "<|LOC_815|>": 101112,
         | 
| 803 | 
            +
              "<|LOC_816|>": 101113,
         | 
| 804 | 
            +
              "<|LOC_817|>": 101114,
         | 
| 805 | 
            +
              "<|LOC_818|>": 101115,
         | 
| 806 | 
            +
              "<|LOC_819|>": 101116,
         | 
| 807 | 
            +
              "<|LOC_81|>": 100378,
         | 
| 808 | 
            +
              "<|LOC_820|>": 101117,
         | 
| 809 | 
            +
              "<|LOC_821|>": 101118,
         | 
| 810 | 
            +
              "<|LOC_822|>": 101119,
         | 
| 811 | 
            +
              "<|LOC_823|>": 101120,
         | 
| 812 | 
            +
              "<|LOC_824|>": 101121,
         | 
| 813 | 
            +
              "<|LOC_825|>": 101122,
         | 
| 814 | 
            +
              "<|LOC_826|>": 101123,
         | 
| 815 | 
            +
              "<|LOC_827|>": 101124,
         | 
| 816 | 
            +
              "<|LOC_828|>": 101125,
         | 
| 817 | 
            +
              "<|LOC_829|>": 101126,
         | 
| 818 | 
            +
              "<|LOC_82|>": 100379,
         | 
| 819 | 
            +
              "<|LOC_830|>": 101127,
         | 
| 820 | 
            +
              "<|LOC_831|>": 101128,
         | 
| 821 | 
            +
              "<|LOC_832|>": 101129,
         | 
| 822 | 
            +
              "<|LOC_833|>": 101130,
         | 
| 823 | 
            +
              "<|LOC_834|>": 101131,
         | 
| 824 | 
            +
              "<|LOC_835|>": 101132,
         | 
| 825 | 
            +
              "<|LOC_836|>": 101133,
         | 
| 826 | 
            +
              "<|LOC_837|>": 101134,
         | 
| 827 | 
            +
              "<|LOC_838|>": 101135,
         | 
| 828 | 
            +
              "<|LOC_839|>": 101136,
         | 
| 829 | 
            +
              "<|LOC_83|>": 100380,
         | 
| 830 | 
            +
              "<|LOC_840|>": 101137,
         | 
| 831 | 
            +
              "<|LOC_841|>": 101138,
         | 
| 832 | 
            +
              "<|LOC_842|>": 101139,
         | 
| 833 | 
            +
              "<|LOC_843|>": 101140,
         | 
| 834 | 
            +
              "<|LOC_844|>": 101141,
         | 
| 835 | 
            +
              "<|LOC_845|>": 101142,
         | 
| 836 | 
            +
              "<|LOC_846|>": 101143,
         | 
| 837 | 
            +
              "<|LOC_847|>": 101144,
         | 
| 838 | 
            +
              "<|LOC_848|>": 101145,
         | 
| 839 | 
            +
              "<|LOC_849|>": 101146,
         | 
| 840 | 
            +
              "<|LOC_84|>": 100381,
         | 
| 841 | 
            +
              "<|LOC_850|>": 101147,
         | 
| 842 | 
            +
              "<|LOC_851|>": 101148,
         | 
| 843 | 
            +
              "<|LOC_852|>": 101149,
         | 
| 844 | 
            +
              "<|LOC_853|>": 101150,
         | 
| 845 | 
            +
              "<|LOC_854|>": 101151,
         | 
| 846 | 
            +
              "<|LOC_855|>": 101152,
         | 
| 847 | 
            +
              "<|LOC_856|>": 101153,
         | 
| 848 | 
            +
              "<|LOC_857|>": 101154,
         | 
| 849 | 
            +
              "<|LOC_858|>": 101155,
         | 
| 850 | 
            +
              "<|LOC_859|>": 101156,
         | 
| 851 | 
            +
              "<|LOC_85|>": 100382,
         | 
| 852 | 
            +
              "<|LOC_860|>": 101157,
         | 
| 853 | 
            +
              "<|LOC_861|>": 101158,
         | 
| 854 | 
            +
              "<|LOC_862|>": 101159,
         | 
| 855 | 
            +
              "<|LOC_863|>": 101160,
         | 
| 856 | 
            +
              "<|LOC_864|>": 101161,
         | 
| 857 | 
            +
              "<|LOC_865|>": 101162,
         | 
| 858 | 
            +
              "<|LOC_866|>": 101163,
         | 
| 859 | 
            +
              "<|LOC_867|>": 101164,
         | 
| 860 | 
            +
              "<|LOC_868|>": 101165,
         | 
| 861 | 
            +
              "<|LOC_869|>": 101166,
         | 
| 862 | 
            +
              "<|LOC_86|>": 100383,
         | 
| 863 | 
            +
              "<|LOC_870|>": 101167,
         | 
| 864 | 
            +
              "<|LOC_871|>": 101168,
         | 
| 865 | 
            +
              "<|LOC_872|>": 101169,
         | 
| 866 | 
            +
              "<|LOC_873|>": 101170,
         | 
| 867 | 
            +
              "<|LOC_874|>": 101171,
         | 
| 868 | 
            +
              "<|LOC_875|>": 101172,
         | 
| 869 | 
            +
              "<|LOC_876|>": 101173,
         | 
| 870 | 
            +
              "<|LOC_877|>": 101174,
         | 
| 871 | 
            +
              "<|LOC_878|>": 101175,
         | 
| 872 | 
            +
              "<|LOC_879|>": 101176,
         | 
| 873 | 
            +
              "<|LOC_87|>": 100384,
         | 
| 874 | 
            +
              "<|LOC_880|>": 101177,
         | 
| 875 | 
            +
              "<|LOC_881|>": 101178,
         | 
| 876 | 
            +
              "<|LOC_882|>": 101179,
         | 
| 877 | 
            +
              "<|LOC_883|>": 101180,
         | 
| 878 | 
            +
              "<|LOC_884|>": 101181,
         | 
| 879 | 
            +
              "<|LOC_885|>": 101182,
         | 
| 880 | 
            +
              "<|LOC_886|>": 101183,
         | 
| 881 | 
            +
              "<|LOC_887|>": 101184,
         | 
| 882 | 
            +
              "<|LOC_888|>": 101185,
         | 
| 883 | 
            +
              "<|LOC_889|>": 101186,
         | 
| 884 | 
            +
              "<|LOC_88|>": 100385,
         | 
| 885 | 
            +
              "<|LOC_890|>": 101187,
         | 
| 886 | 
            +
              "<|LOC_891|>": 101188,
         | 
| 887 | 
            +
              "<|LOC_892|>": 101189,
         | 
| 888 | 
            +
              "<|LOC_893|>": 101190,
         | 
| 889 | 
            +
              "<|LOC_894|>": 101191,
         | 
| 890 | 
            +
              "<|LOC_895|>": 101192,
         | 
| 891 | 
            +
              "<|LOC_896|>": 101193,
         | 
| 892 | 
            +
              "<|LOC_897|>": 101194,
         | 
| 893 | 
            +
              "<|LOC_898|>": 101195,
         | 
| 894 | 
            +
              "<|LOC_899|>": 101196,
         | 
| 895 | 
            +
              "<|LOC_89|>": 100386,
         | 
| 896 | 
            +
              "<|LOC_8|>": 100305,
         | 
| 897 | 
            +
              "<|LOC_900|>": 101197,
         | 
| 898 | 
            +
              "<|LOC_901|>": 101198,
         | 
| 899 | 
            +
              "<|LOC_902|>": 101199,
         | 
| 900 | 
            +
              "<|LOC_903|>": 101200,
         | 
| 901 | 
            +
              "<|LOC_904|>": 101201,
         | 
| 902 | 
            +
              "<|LOC_905|>": 101202,
         | 
| 903 | 
            +
              "<|LOC_906|>": 101203,
         | 
| 904 | 
            +
              "<|LOC_907|>": 101204,
         | 
| 905 | 
            +
              "<|LOC_908|>": 101205,
         | 
| 906 | 
            +
              "<|LOC_909|>": 101206,
         | 
| 907 | 
            +
              "<|LOC_90|>": 100387,
         | 
| 908 | 
            +
              "<|LOC_910|>": 101207,
         | 
| 909 | 
            +
              "<|LOC_911|>": 101208,
         | 
| 910 | 
            +
              "<|LOC_912|>": 101209,
         | 
| 911 | 
            +
              "<|LOC_913|>": 101210,
         | 
| 912 | 
            +
              "<|LOC_914|>": 101211,
         | 
| 913 | 
            +
              "<|LOC_915|>": 101212,
         | 
| 914 | 
            +
              "<|LOC_916|>": 101213,
         | 
| 915 | 
            +
              "<|LOC_917|>": 101214,
         | 
| 916 | 
            +
              "<|LOC_918|>": 101215,
         | 
| 917 | 
            +
              "<|LOC_919|>": 101216,
         | 
| 918 | 
            +
              "<|LOC_91|>": 100388,
         | 
| 919 | 
            +
              "<|LOC_920|>": 101217,
         | 
| 920 | 
            +
              "<|LOC_921|>": 101218,
         | 
| 921 | 
            +
              "<|LOC_922|>": 101219,
         | 
| 922 | 
            +
              "<|LOC_923|>": 101220,
         | 
| 923 | 
            +
              "<|LOC_924|>": 101221,
         | 
| 924 | 
            +
              "<|LOC_925|>": 101222,
         | 
| 925 | 
            +
              "<|LOC_926|>": 101223,
         | 
| 926 | 
            +
              "<|LOC_927|>": 101224,
         | 
| 927 | 
            +
              "<|LOC_928|>": 101225,
         | 
| 928 | 
            +
              "<|LOC_929|>": 101226,
         | 
| 929 | 
            +
              "<|LOC_92|>": 100389,
         | 
| 930 | 
            +
              "<|LOC_930|>": 101227,
         | 
| 931 | 
            +
              "<|LOC_931|>": 101228,
         | 
| 932 | 
            +
              "<|LOC_932|>": 101229,
         | 
| 933 | 
            +
              "<|LOC_933|>": 101230,
         | 
| 934 | 
            +
              "<|LOC_934|>": 101231,
         | 
| 935 | 
            +
              "<|LOC_935|>": 101232,
         | 
| 936 | 
            +
              "<|LOC_936|>": 101233,
         | 
| 937 | 
            +
              "<|LOC_937|>": 101234,
         | 
| 938 | 
            +
              "<|LOC_938|>": 101235,
         | 
| 939 | 
            +
              "<|LOC_939|>": 101236,
         | 
| 940 | 
            +
              "<|LOC_93|>": 100390,
         | 
| 941 | 
            +
              "<|LOC_940|>": 101237,
         | 
| 942 | 
            +
              "<|LOC_941|>": 101238,
         | 
| 943 | 
            +
              "<|LOC_942|>": 101239,
         | 
| 944 | 
            +
              "<|LOC_943|>": 101240,
         | 
| 945 | 
            +
              "<|LOC_944|>": 101241,
         | 
| 946 | 
            +
              "<|LOC_945|>": 101242,
         | 
| 947 | 
            +
              "<|LOC_946|>": 101243,
         | 
| 948 | 
            +
              "<|LOC_947|>": 101244,
         | 
| 949 | 
            +
              "<|LOC_948|>": 101245,
         | 
| 950 | 
            +
              "<|LOC_949|>": 101246,
         | 
| 951 | 
            +
              "<|LOC_94|>": 100391,
         | 
| 952 | 
            +
              "<|LOC_950|>": 101247,
         | 
| 953 | 
            +
              "<|LOC_951|>": 101248,
         | 
| 954 | 
            +
              "<|LOC_952|>": 101249,
         | 
| 955 | 
            +
              "<|LOC_953|>": 101250,
         | 
| 956 | 
            +
              "<|LOC_954|>": 101251,
         | 
| 957 | 
            +
              "<|LOC_955|>": 101252,
         | 
| 958 | 
            +
              "<|LOC_956|>": 101253,
         | 
| 959 | 
            +
              "<|LOC_957|>": 101254,
         | 
| 960 | 
            +
              "<|LOC_958|>": 101255,
         | 
| 961 | 
            +
              "<|LOC_959|>": 101256,
         | 
| 962 | 
            +
              "<|LOC_95|>": 100392,
         | 
| 963 | 
            +
              "<|LOC_960|>": 101257,
         | 
| 964 | 
            +
              "<|LOC_961|>": 101258,
         | 
| 965 | 
            +
              "<|LOC_962|>": 101259,
         | 
| 966 | 
            +
              "<|LOC_963|>": 101260,
         | 
| 967 | 
            +
              "<|LOC_964|>": 101261,
         | 
| 968 | 
            +
              "<|LOC_965|>": 101262,
         | 
| 969 | 
            +
              "<|LOC_966|>": 101263,
         | 
| 970 | 
            +
              "<|LOC_967|>": 101264,
         | 
| 971 | 
            +
              "<|LOC_968|>": 101265,
         | 
| 972 | 
            +
              "<|LOC_969|>": 101266,
         | 
| 973 | 
            +
              "<|LOC_96|>": 100393,
         | 
| 974 | 
            +
              "<|LOC_970|>": 101267,
         | 
| 975 | 
            +
              "<|LOC_971|>": 101268,
         | 
| 976 | 
            +
              "<|LOC_972|>": 101269,
         | 
| 977 | 
            +
              "<|LOC_973|>": 101270,
         | 
| 978 | 
            +
              "<|LOC_974|>": 101271,
         | 
| 979 | 
            +
              "<|LOC_975|>": 101272,
         | 
| 980 | 
            +
              "<|LOC_976|>": 101273,
         | 
| 981 | 
            +
              "<|LOC_977|>": 101274,
         | 
| 982 | 
            +
              "<|LOC_978|>": 101275,
         | 
| 983 | 
            +
              "<|LOC_979|>": 101276,
         | 
| 984 | 
            +
              "<|LOC_97|>": 100394,
         | 
| 985 | 
            +
              "<|LOC_980|>": 101277,
         | 
| 986 | 
            +
              "<|LOC_981|>": 101278,
         | 
| 987 | 
            +
              "<|LOC_982|>": 101279,
         | 
| 988 | 
            +
              "<|LOC_983|>": 101280,
         | 
| 989 | 
            +
              "<|LOC_984|>": 101281,
         | 
| 990 | 
            +
              "<|LOC_985|>": 101282,
         | 
| 991 | 
            +
              "<|LOC_986|>": 101283,
         | 
| 992 | 
            +
              "<|LOC_987|>": 101284,
         | 
| 993 | 
            +
              "<|LOC_988|>": 101285,
         | 
| 994 | 
            +
              "<|LOC_989|>": 101286,
         | 
| 995 | 
            +
              "<|LOC_98|>": 100395,
         | 
| 996 | 
            +
              "<|LOC_990|>": 101287,
         | 
| 997 | 
            +
              "<|LOC_991|>": 101288,
         | 
| 998 | 
            +
              "<|LOC_992|>": 101289,
         | 
| 999 | 
            +
              "<|LOC_993|>": 101290,
         | 
| 1000 | 
            +
              "<|LOC_994|>": 101291,
         | 
| 1001 | 
            +
              "<|LOC_995|>": 101292,
         | 
| 1002 | 
            +
              "<|LOC_996|>": 101293,
         | 
| 1003 | 
            +
              "<|LOC_997|>": 101294,
         | 
| 1004 | 
            +
              "<|LOC_998|>": 101295,
         | 
| 1005 | 
            +
              "<|LOC_999|>": 101296,
         | 
| 1006 | 
            +
              "<|LOC_99|>": 100396,
         | 
| 1007 | 
            +
              "<|LOC_9|>": 100306,
         | 
| 1008 | 
            +
              "<|LOC_BEGIN|>": 101298,
         | 
| 1009 | 
            +
              "<|LOC_END|>": 101299,
         | 
| 1010 | 
            +
              "<|LOC_SEP|>": 101300
         | 
| 1011 | 
            +
            }
         | 
    	
        chat_template.jinja
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {%- if not add_generation_prompt is defined -%}
         | 
| 2 | 
            +
                {%- set add_generation_prompt = true -%}
         | 
| 3 | 
            +
            {%- endif -%}
         | 
| 4 | 
            +
            {%- if not cls_token is defined -%}
         | 
| 5 | 
            +
                {%- set cls_token = "<|begin_of_sentence|>" -%}
         | 
| 6 | 
            +
            {%- endif -%}
         | 
| 7 | 
            +
            {%- if not sep_token is defined -%}
         | 
| 8 | 
            +
                {%- set sep_token = "<|end_of_sentence|>" -%}
         | 
| 9 | 
            +
            {%- endif -%}
         | 
| 10 | 
            +
            {{- cls_token -}}
         | 
| 11 | 
            +
            {%- for message in messages -%}
         | 
| 12 | 
            +
                {%- if message["role"] == "user" -%}
         | 
| 13 | 
            +
                    {{- "User: " + message["content"] + "
         | 
| 14 | 
            +
            " -}}
         | 
| 15 | 
            +
                {%- elif message["role"] == "assistant" -%}
         | 
| 16 | 
            +
                    {{- "Assistant: " + message["content"] + sep_token -}}
         | 
| 17 | 
            +
                {%- elif message["role"] == "system" -%}
         | 
| 18 | 
            +
                    {{- message["content"] + "
         | 
| 19 | 
            +
            " -}}
         | 
| 20 | 
            +
                {%- endif -%}
         | 
| 21 | 
            +
            {%- endfor -%}
         | 
| 22 | 
            +
            {%- if add_generation_prompt -%}
         | 
| 23 | 
            +
                {{- "Assistant: " -}}
         | 
| 24 | 
            +
            {%- endif -%}
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,29 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "architectures": [
         | 
| 3 | 
            +
                    "Ernie4_5_ForCausalLM"
         | 
| 4 | 
            +
                ],
         | 
| 5 | 
            +
                "auto_map": {
         | 
| 6 | 
            +
                    "AutoConfig": "configuration_ernie4_5.Ernie4_5_Config",
         | 
| 7 | 
            +
                    "AutoModel": "modeling_ernie4_5.Ernie4_5_Model",
         | 
| 8 | 
            +
                    "AutoModelForCausalLM": "modeling_ernie4_5.Ernie4_5_ForCausalLM"
         | 
| 9 | 
            +
                },
         | 
| 10 | 
            +
                "bos_token_id": 1,
         | 
| 11 | 
            +
                "eos_token_id": 2,
         | 
| 12 | 
            +
                "head_dim": 128,
         | 
| 13 | 
            +
                "hidden_act": "silu",
         | 
| 14 | 
            +
                "hidden_size": 1024,
         | 
| 15 | 
            +
                "intermediate_size": 3072,
         | 
| 16 | 
            +
                "max_position_embeddings": 131072,
         | 
| 17 | 
            +
                "model_type": "ernie4_5",
         | 
| 18 | 
            +
                "num_attention_heads": 16,
         | 
| 19 | 
            +
                "num_hidden_layers": 18,
         | 
| 20 | 
            +
                "num_key_value_heads": 2,
         | 
| 21 | 
            +
                "pad_token_id": 0,
         | 
| 22 | 
            +
                "rms_norm_eps": 1e-05,
         | 
| 23 | 
            +
                "rope_theta": 500000,
         | 
| 24 | 
            +
                "tie_word_embeddings": true,
         | 
| 25 | 
            +
                "torch_dtype": "bfloat16",
         | 
| 26 | 
            +
                "use_bias": false,
         | 
| 27 | 
            +
                "use_cache": false,
         | 
| 28 | 
            +
                "vocab_size": 103424
         | 
| 29 | 
            +
            }
         | 
    	
        configuration_ernie4_5.py
    ADDED
    
    | @@ -0,0 +1,127 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 Baidu, Inc. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from transformers import PretrainedConfig
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            class Ernie4_5_Config(PretrainedConfig):
         | 
| 19 | 
            +
                """
         | 
| 20 | 
            +
                Configuration class.
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                This class stores the configuration of an Ernie model, defining the model architecture.
         | 
| 23 | 
            +
                It inherits from PretrainedConfig and can be used to control model outputs.
         | 
| 24 | 
            +
                """
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                model_type = "ernie4_5"
         | 
| 27 | 
            +
                keys_to_ignore_at_inference = ["past_key_values"]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                # Default tensor parallel plan for base model `Qwen3`
         | 
| 30 | 
            +
                base_model_tp_plan = {
         | 
| 31 | 
            +
                    "layers.*.self_attn.q_proj": "colwise",
         | 
| 32 | 
            +
                    "layers.*.self_attn.k_proj": "colwise",
         | 
| 33 | 
            +
                    "layers.*.self_attn.v_proj": "colwise",
         | 
| 34 | 
            +
                    "layers.*.self_attn.o_proj": "rowwise",
         | 
| 35 | 
            +
                    "layers.*.mlp.gate_proj": "colwise",
         | 
| 36 | 
            +
                    "layers.*.mlp.up_proj": "colwise",
         | 
| 37 | 
            +
                    "layers.*.mlp.down_proj": "rowwise",
         | 
| 38 | 
            +
                }
         | 
| 39 | 
            +
                base_model_pp_plan = {
         | 
| 40 | 
            +
                    "embed_tokens": (["input_ids"], ["inputs_embeds"]),
         | 
| 41 | 
            +
                    "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
         | 
| 42 | 
            +
                    "norm": (["hidden_states"], ["hidden_states"]),
         | 
| 43 | 
            +
                }
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def __init__(
         | 
| 46 | 
            +
                    self,
         | 
| 47 | 
            +
                    vocab_size=32000,
         | 
| 48 | 
            +
                    hidden_size=768,
         | 
| 49 | 
            +
                    intermediate_size=11008,
         | 
| 50 | 
            +
                    max_position_embeddings=32768,
         | 
| 51 | 
            +
                    num_hidden_layers=2,
         | 
| 52 | 
            +
                    num_attention_heads=2,
         | 
| 53 | 
            +
                    rms_norm_eps=1e-6,
         | 
| 54 | 
            +
                    use_cache=False,
         | 
| 55 | 
            +
                    use_flash_attention=False,
         | 
| 56 | 
            +
                    pad_token_id=0,
         | 
| 57 | 
            +
                    bos_token_id=1,
         | 
| 58 | 
            +
                    eos_token_id=2,
         | 
| 59 | 
            +
                    use_bias=False,
         | 
| 60 | 
            +
                    rope_theta=10000,
         | 
| 61 | 
            +
                    weight_share_add_bias=True,
         | 
| 62 | 
            +
                    ignored_index=-100,
         | 
| 63 | 
            +
                    attention_probs_dropout_prob=0.0,
         | 
| 64 | 
            +
                    hidden_dropout_prob=0.0,
         | 
| 65 | 
            +
                    compression_ratio: float = 1.0,
         | 
| 66 | 
            +
                    num_key_value_heads=None,
         | 
| 67 | 
            +
                    max_sequence_length=None,
         | 
| 68 | 
            +
                    **kwargs,
         | 
| 69 | 
            +
                ):
         | 
| 70 | 
            +
                    """
         | 
| 71 | 
            +
                    Initialize configuration with default or specified parameters.
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    Args:
         | 
| 74 | 
            +
                        vocab_size (int): Size of the vocabulary (number of unique tokens)
         | 
| 75 | 
            +
                        hidden_size (int): Dimensionality of the encoder layers and the pooler layer
         | 
| 76 | 
            +
                        intermediate_size (int): Dimensionality of the "intermediate" (feed-forward) layer
         | 
| 77 | 
            +
                        max_position_embeddings (int): Maximum sequence length the model can handle
         | 
| 78 | 
            +
                        num_hidden_layers (int): Number of hidden layers in the Transformer encoder
         | 
| 79 | 
            +
                        num_attention_heads (int): Number of attention heads for each attention layer
         | 
| 80 | 
            +
                        rms_norm_eps (float): The epsilon used by the RMS normalization layers
         | 
| 81 | 
            +
                        use_cache (bool): Whether to use caching for faster generation (decoding)
         | 
| 82 | 
            +
                        use_flash_attention (bool): Whether to use FlashAttention for optimized attention computation
         | 
| 83 | 
            +
                        pad_token_id (int): Token ID used for padding sequences
         | 
| 84 | 
            +
                        bos_token_id (int): Token ID used for beginning-of-sequence
         | 
| 85 | 
            +
                        eos_token_id (int): Token ID used for end-of-sequence
         | 
| 86 | 
            +
                        use_bias (bool): Whether to use bias terms in linear layers
         | 
| 87 | 
            +
                        rope_theta (float): The base period of the RoPE embeddings
         | 
| 88 | 
            +
                        weight_share_add_bias (bool): Whether to share bias weights in certain layers
         | 
| 89 | 
            +
                        ignored_index (int): Target value that is ignored during loss computation
         | 
| 90 | 
            +
                        attention_probs_dropout_prob (float): Dropout probability for attention weights
         | 
| 91 | 
            +
                        hidden_dropout_prob (float): Dropout probability for hidden layers
         | 
| 92 | 
            +
                        compression_ratio (float): Ratio for KV cache compression (1.0 = no compression)
         | 
| 93 | 
            +
                        num_key_value_heads (int): Number of key/value heads (for Grouped Query Attention)
         | 
| 94 | 
            +
                        max_sequence_length (int): Maximum sequence length for positional embeddings
         | 
| 95 | 
            +
                        **kwargs: Additional keyword arguments passed to parent class
         | 
| 96 | 
            +
                    """
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    # Set default for tied embeddings if not specified.
         | 
| 99 | 
            +
                    if "tie_word_embeddings" not in kwargs:
         | 
| 100 | 
            +
                        kwargs["tie_word_embeddings"] = False
         | 
| 101 | 
            +
                    super().__init__(
         | 
| 102 | 
            +
                        pad_token_id=pad_token_id,
         | 
| 103 | 
            +
                        bos_token_id=bos_token_id,
         | 
| 104 | 
            +
                        eos_token_id=eos_token_id,
         | 
| 105 | 
            +
                        **kwargs,
         | 
| 106 | 
            +
                    )
         | 
| 107 | 
            +
                    self.vocab_size = vocab_size
         | 
| 108 | 
            +
                    self.hidden_size = hidden_size
         | 
| 109 | 
            +
                    self.intermediate_size = intermediate_size
         | 
| 110 | 
            +
                    self.max_position_embeddings = max_position_embeddings
         | 
| 111 | 
            +
                    self.num_hidden_layers = num_hidden_layers
         | 
| 112 | 
            +
                    self.num_attention_heads = num_attention_heads
         | 
| 113 | 
            +
                    self.rms_norm_eps = rms_norm_eps
         | 
| 114 | 
            +
                    self.use_cache = use_cache
         | 
| 115 | 
            +
                    self.use_flash_attention = use_flash_attention
         | 
| 116 | 
            +
                    self.pad_token_id = pad_token_id
         | 
| 117 | 
            +
                    self.bos_token_id = bos_token_id
         | 
| 118 | 
            +
                    self.eos_token_id = eos_token_id
         | 
| 119 | 
            +
                    self.use_bias = use_bias
         | 
| 120 | 
            +
                    self.weight_share_add_bias = weight_share_add_bias
         | 
| 121 | 
            +
                    self.rope_theta = rope_theta
         | 
| 122 | 
            +
                    self.ignored_index = ignored_index
         | 
| 123 | 
            +
                    self.attention_probs_dropout_prob = attention_probs_dropout_prob
         | 
| 124 | 
            +
                    self.hidden_dropout_prob = hidden_dropout_prob
         | 
| 125 | 
            +
                    self.compression_ratio = compression_ratio
         | 
| 126 | 
            +
                    self.num_key_value_heads = num_key_value_heads
         | 
| 127 | 
            +
                    self.max_sequence_length = max_sequence_length
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "do_sample": true,
         | 
| 3 | 
            +
                "top_p": 0.8,
         | 
| 4 | 
            +
                "temperature": 0.8,
         | 
| 5 | 
            +
                "bos_token_id": 1,
         | 
| 6 | 
            +
                "eos_token_id": 2,
         | 
| 7 | 
            +
                "pad_token_id": 0,
         | 
| 8 | 
            +
                "repetition_penalty": 1.0,
         | 
| 9 | 
            +
                "frequency_penalty": 0.0,
         | 
| 10 | 
            +
                "presence_penalty": 0.0
         | 
| 11 | 
            +
            }
         | 
    	
        model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9dc88c2b55a10b32b0e1ba396537d411e5284ca2bcd60c7edd7e955ea6409ca8
         | 
| 3 | 
            +
            size 721514626
         | 
    	
        model.safetensors.index.json
    ADDED
    
    | @@ -0,0 +1,172 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "metadata": {
         | 
| 3 | 
            +
                    "total_size": 721496064,
         | 
| 4 | 
            +
                    "total_parameters": 360748032
         | 
| 5 | 
            +
                },
         | 
| 6 | 
            +
                "weight_map": {
         | 
| 7 | 
            +
                    "model.embed_tokens.weight": "model.safetensors",
         | 
| 8 | 
            +
                    "model.layers.0.input_layernorm.weight": "model.safetensors",
         | 
| 9 | 
            +
                    "model.layers.0.mlp.down_proj.weight": "model.safetensors",
         | 
| 10 | 
            +
                    "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
         | 
| 11 | 
            +
                    "model.layers.0.mlp.up_proj.weight": "model.safetensors",
         | 
| 12 | 
            +
                    "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
         | 
| 13 | 
            +
                    "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
         | 
| 14 | 
            +
                    "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
         | 
| 15 | 
            +
                    "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
         | 
| 16 | 
            +
                    "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
         | 
| 17 | 
            +
                    "model.layers.1.input_layernorm.weight": "model.safetensors",
         | 
| 18 | 
            +
                    "model.layers.1.mlp.down_proj.weight": "model.safetensors",
         | 
| 19 | 
            +
                    "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
         | 
| 20 | 
            +
                    "model.layers.1.mlp.up_proj.weight": "model.safetensors",
         | 
| 21 | 
            +
                    "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
         | 
| 22 | 
            +
                    "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
         | 
| 23 | 
            +
                    "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
         | 
| 24 | 
            +
                    "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
         | 
| 25 | 
            +
                    "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
         | 
| 26 | 
            +
                    "model.layers.10.input_layernorm.weight": "model.safetensors",
         | 
| 27 | 
            +
                    "model.layers.10.mlp.down_proj.weight": "model.safetensors",
         | 
| 28 | 
            +
                    "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
         | 
| 29 | 
            +
                    "model.layers.10.mlp.up_proj.weight": "model.safetensors",
         | 
| 30 | 
            +
                    "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
         | 
| 31 | 
            +
                    "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
         | 
| 32 | 
            +
                    "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
         | 
| 33 | 
            +
                    "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
         | 
| 34 | 
            +
                    "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
         | 
| 35 | 
            +
                    "model.layers.11.input_layernorm.weight": "model.safetensors",
         | 
| 36 | 
            +
                    "model.layers.11.mlp.down_proj.weight": "model.safetensors",
         | 
| 37 | 
            +
                    "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
         | 
| 38 | 
            +
                    "model.layers.11.mlp.up_proj.weight": "model.safetensors",
         | 
| 39 | 
            +
                    "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
         | 
| 40 | 
            +
                    "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
         | 
| 41 | 
            +
                    "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
         | 
| 42 | 
            +
                    "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
         | 
| 43 | 
            +
                    "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
         | 
| 44 | 
            +
                    "model.layers.12.input_layernorm.weight": "model.safetensors",
         | 
| 45 | 
            +
                    "model.layers.12.mlp.down_proj.weight": "model.safetensors",
         | 
| 46 | 
            +
                    "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
         | 
| 47 | 
            +
                    "model.layers.12.mlp.up_proj.weight": "model.safetensors",
         | 
| 48 | 
            +
                    "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
         | 
| 49 | 
            +
                    "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
         | 
| 50 | 
            +
                    "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
         | 
| 51 | 
            +
                    "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
         | 
| 52 | 
            +
                    "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
         | 
| 53 | 
            +
                    "model.layers.13.input_layernorm.weight": "model.safetensors",
         | 
| 54 | 
            +
                    "model.layers.13.mlp.down_proj.weight": "model.safetensors",
         | 
| 55 | 
            +
                    "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
         | 
| 56 | 
            +
                    "model.layers.13.mlp.up_proj.weight": "model.safetensors",
         | 
| 57 | 
            +
                    "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
         | 
| 58 | 
            +
                    "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
         | 
| 59 | 
            +
                    "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
         | 
| 60 | 
            +
                    "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
         | 
| 61 | 
            +
                    "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
         | 
| 62 | 
            +
                    "model.layers.14.input_layernorm.weight": "model.safetensors",
         | 
| 63 | 
            +
                    "model.layers.14.mlp.down_proj.weight": "model.safetensors",
         | 
| 64 | 
            +
                    "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
         | 
| 65 | 
            +
                    "model.layers.14.mlp.up_proj.weight": "model.safetensors",
         | 
| 66 | 
            +
                    "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
         | 
| 67 | 
            +
                    "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
         | 
| 68 | 
            +
                    "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
         | 
| 69 | 
            +
                    "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
         | 
| 70 | 
            +
                    "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
         | 
| 71 | 
            +
                    "model.layers.15.input_layernorm.weight": "model.safetensors",
         | 
| 72 | 
            +
                    "model.layers.15.mlp.down_proj.weight": "model.safetensors",
         | 
| 73 | 
            +
                    "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
         | 
| 74 | 
            +
                    "model.layers.15.mlp.up_proj.weight": "model.safetensors",
         | 
| 75 | 
            +
                    "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
         | 
| 76 | 
            +
                    "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
         | 
| 77 | 
            +
                    "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
         | 
| 78 | 
            +
                    "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
         | 
| 79 | 
            +
                    "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
         | 
| 80 | 
            +
                    "model.layers.16.input_layernorm.weight": "model.safetensors",
         | 
| 81 | 
            +
                    "model.layers.16.mlp.down_proj.weight": "model.safetensors",
         | 
| 82 | 
            +
                    "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
         | 
| 83 | 
            +
                    "model.layers.16.mlp.up_proj.weight": "model.safetensors",
         | 
| 84 | 
            +
                    "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
         | 
| 85 | 
            +
                    "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
         | 
| 86 | 
            +
                    "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
         | 
| 87 | 
            +
                    "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
         | 
| 88 | 
            +
                    "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
         | 
| 89 | 
            +
                    "model.layers.17.input_layernorm.weight": "model.safetensors",
         | 
| 90 | 
            +
                    "model.layers.17.mlp.down_proj.weight": "model.safetensors",
         | 
| 91 | 
            +
                    "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
         | 
| 92 | 
            +
                    "model.layers.17.mlp.up_proj.weight": "model.safetensors",
         | 
| 93 | 
            +
                    "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
         | 
| 94 | 
            +
                    "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
         | 
| 95 | 
            +
                    "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
         | 
| 96 | 
            +
                    "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
         | 
| 97 | 
            +
                    "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
         | 
| 98 | 
            +
                    "model.layers.2.input_layernorm.weight": "model.safetensors",
         | 
| 99 | 
            +
                    "model.layers.2.mlp.down_proj.weight": "model.safetensors",
         | 
| 100 | 
            +
                    "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
         | 
| 101 | 
            +
                    "model.layers.2.mlp.up_proj.weight": "model.safetensors",
         | 
| 102 | 
            +
                    "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
         | 
| 103 | 
            +
                    "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
         | 
| 104 | 
            +
                    "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
         | 
| 105 | 
            +
                    "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
         | 
| 106 | 
            +
                    "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
         | 
| 107 | 
            +
                    "model.layers.3.input_layernorm.weight": "model.safetensors",
         | 
| 108 | 
            +
                    "model.layers.3.mlp.down_proj.weight": "model.safetensors",
         | 
| 109 | 
            +
                    "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
         | 
| 110 | 
            +
                    "model.layers.3.mlp.up_proj.weight": "model.safetensors",
         | 
| 111 | 
            +
                    "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
         | 
| 112 | 
            +
                    "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
         | 
| 113 | 
            +
                    "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
         | 
| 114 | 
            +
                    "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
         | 
| 115 | 
            +
                    "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
         | 
| 116 | 
            +
                    "model.layers.4.input_layernorm.weight": "model.safetensors",
         | 
| 117 | 
            +
                    "model.layers.4.mlp.down_proj.weight": "model.safetensors",
         | 
| 118 | 
            +
                    "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
         | 
| 119 | 
            +
                    "model.layers.4.mlp.up_proj.weight": "model.safetensors",
         | 
| 120 | 
            +
                    "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
         | 
| 121 | 
            +
                    "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
         | 
| 122 | 
            +
                    "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
         | 
| 123 | 
            +
                    "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
         | 
| 124 | 
            +
                    "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
         | 
| 125 | 
            +
                    "model.layers.5.input_layernorm.weight": "model.safetensors",
         | 
| 126 | 
            +
                    "model.layers.5.mlp.down_proj.weight": "model.safetensors",
         | 
| 127 | 
            +
                    "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
         | 
| 128 | 
            +
                    "model.layers.5.mlp.up_proj.weight": "model.safetensors",
         | 
| 129 | 
            +
                    "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
         | 
| 130 | 
            +
                    "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
         | 
| 131 | 
            +
                    "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
         | 
| 132 | 
            +
                    "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
         | 
| 133 | 
            +
                    "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
         | 
| 134 | 
            +
                    "model.layers.6.input_layernorm.weight": "model.safetensors",
         | 
| 135 | 
            +
                    "model.layers.6.mlp.down_proj.weight": "model.safetensors",
         | 
| 136 | 
            +
                    "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
         | 
| 137 | 
            +
                    "model.layers.6.mlp.up_proj.weight": "model.safetensors",
         | 
| 138 | 
            +
                    "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
         | 
| 139 | 
            +
                    "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
         | 
| 140 | 
            +
                    "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
         | 
| 141 | 
            +
                    "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
         | 
| 142 | 
            +
                    "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
         | 
| 143 | 
            +
                    "model.layers.7.input_layernorm.weight": "model.safetensors",
         | 
| 144 | 
            +
                    "model.layers.7.mlp.down_proj.weight": "model.safetensors",
         | 
| 145 | 
            +
                    "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
         | 
| 146 | 
            +
                    "model.layers.7.mlp.up_proj.weight": "model.safetensors",
         | 
| 147 | 
            +
                    "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
         | 
| 148 | 
            +
                    "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
         | 
| 149 | 
            +
                    "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
         | 
| 150 | 
            +
                    "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
         | 
| 151 | 
            +
                    "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
         | 
| 152 | 
            +
                    "model.layers.8.input_layernorm.weight": "model.safetensors",
         | 
| 153 | 
            +
                    "model.layers.8.mlp.down_proj.weight": "model.safetensors",
         | 
| 154 | 
            +
                    "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
         | 
| 155 | 
            +
                    "model.layers.8.mlp.up_proj.weight": "model.safetensors",
         | 
| 156 | 
            +
                    "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
         | 
| 157 | 
            +
                    "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
         | 
| 158 | 
            +
                    "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
         | 
| 159 | 
            +
                    "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
         | 
| 160 | 
            +
                    "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
         | 
| 161 | 
            +
                    "model.layers.9.input_layernorm.weight": "model.safetensors",
         | 
| 162 | 
            +
                    "model.layers.9.mlp.down_proj.weight": "model.safetensors",
         | 
| 163 | 
            +
                    "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
         | 
| 164 | 
            +
                    "model.layers.9.mlp.up_proj.weight": "model.safetensors",
         | 
| 165 | 
            +
                    "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
         | 
| 166 | 
            +
                    "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
         | 
| 167 | 
            +
                    "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
         | 
| 168 | 
            +
                    "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
         | 
| 169 | 
            +
                    "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
         | 
| 170 | 
            +
                    "model.norm.weight": "model.safetensors"
         | 
| 171 | 
            +
                }
         | 
| 172 | 
            +
            }
         | 
    	
        modeling_ernie4_5.py
    ADDED
    
    | @@ -0,0 +1,1068 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 Baidu, Inc. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from typing import Optional, Tuple, Union
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            import torch
         | 
| 18 | 
            +
            import torch.nn as nn
         | 
| 19 | 
            +
            import torch.nn.functional as F
         | 
| 20 | 
            +
            from torch.nn.attention import SDPBackend, sdpa_kernel
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            from transformers.activations import ACT2FN
         | 
| 23 | 
            +
            from transformers.modeling_utils import PreTrainedModel
         | 
| 24 | 
            +
            from transformers.generation import GenerationMixin
         | 
| 25 | 
            +
            from transformers.modeling_outputs import (
         | 
| 26 | 
            +
                BaseModelOutputWithPast,
         | 
| 27 | 
            +
                CausalLMOutputWithPast,
         | 
| 28 | 
            +
            )
         | 
| 29 | 
            +
            from transformers.utils import logging
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            from .configuration_ernie4_5 import Ernie4_5_Config
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            logger = logging.get_logger(__name__)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
            class Ernie4_5_RMSNorm(nn.Module):
         | 
| 38 | 
            +
                """
         | 
| 39 | 
            +
                Root Mean Square Layer Normalization (Ernie4_5_RMSNorm) implementation.
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                Ernie4_5_RMSNorm is a simplified version of LayerNorm that focuses on the root mean square of inputs,
         | 
| 42 | 
            +
                omitting the mean-centering operation. This provides computational efficiency while maintaining
         | 
| 43 | 
            +
                good performance.
         | 
| 44 | 
            +
                """
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                def __init__(self, config):
         | 
| 47 | 
            +
                    """
         | 
| 48 | 
            +
                    Initialize Ernie4_5_RMSNorm layer.
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    Args:
         | 
| 51 | 
            +
                        config: Model configuration.
         | 
| 52 | 
            +
                    """
         | 
| 53 | 
            +
                    super().__init__()
         | 
| 54 | 
            +
                    self.hidden_size = config.hidden_size
         | 
| 55 | 
            +
                    self.weight = nn.Parameter(
         | 
| 56 | 
            +
                        torch.ones(self.hidden_size, dtype=torch.get_default_dtype())
         | 
| 57 | 
            +
                    )
         | 
| 58 | 
            +
                    self.variance_epsilon = config.rms_norm_eps
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                def forward(self, hidden_states):
         | 
| 61 | 
            +
                    """
         | 
| 62 | 
            +
                    Apply RMS normalization to input hidden states.
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    Args:
         | 
| 65 | 
            +
                        hidden_states (Tensor): Input tensor of shape [batch_size, seq_len, hidden_size]
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    Returns:
         | 
| 68 | 
            +
                        Tensor: Normalized output tensor of same shape as input
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    Note:
         | 
| 71 | 
            +
                        - computes Ernie4_5_RMSNorm manually:
         | 
| 72 | 
            +
                            1. Compute variance of features
         | 
| 73 | 
            +
                            2. Apply reciprocal square root normalization
         | 
| 74 | 
            +
                            3. Scale by learned weight parameter
         | 
| 75 | 
            +
                        - Maintains original dtype for numerical stability during computation
         | 
| 76 | 
            +
                    """
         | 
| 77 | 
            +
                    variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
         | 
| 78 | 
            +
                    hidden_states = torch.rsqrt(variance + self.variance_epsilon) * hidden_states
         | 
| 79 | 
            +
                    return hidden_states.to(self.weight.dtype) * self.weight
         | 
| 80 | 
            +
             | 
| 81 | 
            +
             | 
| 82 | 
            +
            class Ernie4_5_RopeEmbedding(nn.Module):
         | 
| 83 | 
            +
                """
         | 
| 84 | 
            +
                Rotary Position Embedding (RoPE) implementation for transformer models.
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                RoPE encodes absolute positional information with rotation matrices and
         | 
| 87 | 
            +
                naturally incorporates relative position information in self-attention.
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                Args:
         | 
| 90 | 
            +
                    head_dim (int): Dimension size of each attention head
         | 
| 91 | 
            +
                    compression_ratio (float, optional): Sequence length compression ratio. Defaults to 1.0.
         | 
| 92 | 
            +
                    base (int, optional): Base value for frequency calculation. Defaults to 10000.
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                Attributes:
         | 
| 95 | 
            +
                    head_dim (int): Dimension size of each attention head
         | 
| 96 | 
            +
                    compression_ratio (float): Sequence length compression factor
         | 
| 97 | 
            +
                    base (int): Base value for frequency calculation
         | 
| 98 | 
            +
                """
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                def __init__(self, head_dim, compression_ratio=1.0, base=10000):
         | 
| 101 | 
            +
                    """
         | 
| 102 | 
            +
                    Initialize RoPE embedding layer.
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                    Args:
         | 
| 105 | 
            +
                        head_dim: Dimension of each attention head
         | 
| 106 | 
            +
                        compression_ratio: Scaling factor for position indices
         | 
| 107 | 
            +
                        base: Base value for frequency calculation
         | 
| 108 | 
            +
                    """
         | 
| 109 | 
            +
                    super().__init__()
         | 
| 110 | 
            +
                    self.head_dim = head_dim
         | 
| 111 | 
            +
                    self.compression_ratio = compression_ratio
         | 
| 112 | 
            +
                    self.base = base
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                def forward(self, seq_length, position_ids=None):
         | 
| 115 | 
            +
                    """
         | 
| 116 | 
            +
                    Compute rotary position embeddings for given sequence length.
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    Args:
         | 
| 119 | 
            +
                        seq_length (int): Maximum sequence length
         | 
| 120 | 
            +
                        position_ids (Tensor, optional): Custom position indices. Defaults to None.
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                    Returns:
         | 
| 123 | 
            +
                        Tensor: Rotary position embeddings of shape [1, 1, seq_length, head_dim]
         | 
| 124 | 
            +
                    """
         | 
| 125 | 
            +
                    indices = torch.arange(0, self.head_dim, 2, dtype=torch.float32)
         | 
| 126 | 
            +
                    indices = 1 / self.base ** (indices / self.head_dim)
         | 
| 127 | 
            +
                    if position_ids is None:
         | 
| 128 | 
            +
                        position_ids = torch.arange(
         | 
| 129 | 
            +
                            0, seq_length, 1, dtype=torch.float32
         | 
| 130 | 
            +
                        ).unsqueeze(1)
         | 
| 131 | 
            +
                        position_ids = position_ids / self.compression_ratio
         | 
| 132 | 
            +
                        sinusoid_inp = position_ids * indices.unsqueeze(0)
         | 
| 133 | 
            +
                    else:
         | 
| 134 | 
            +
                        position_ids = position_ids / self.compression_ratio
         | 
| 135 | 
            +
                        seq_length = position_ids.shape[-1]
         | 
| 136 | 
            +
                        sinusoid_inp = position_ids.unsqueeze(-1).to(
         | 
| 137 | 
            +
                            torch.float32
         | 
| 138 | 
            +
                        ) * indices.unsqueeze(0)
         | 
| 139 | 
            +
                    pos_emb = torch.cat([torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)], dim=-1)
         | 
| 140 | 
            +
                    pos_emb = pos_emb.view(-1, 1, seq_length, self.head_dim)
         | 
| 141 | 
            +
                    pos_emb = pos_emb.detach()
         | 
| 142 | 
            +
                    return pos_emb
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def apply_rotary(self, rp, q, k):
         | 
| 145 | 
            +
                    """
         | 
| 146 | 
            +
                    Apply rotary position embeddings to queries and keys.
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    Args:
         | 
| 149 | 
            +
                        rp (Tensor): Rotary position embeddings
         | 
| 150 | 
            +
                        q (Tensor): Query tensor [batch, heads, seq_len, dim]
         | 
| 151 | 
            +
                        k (Tensor): Key tensor [batch, heads, seq_len, dim]
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                    Returns:
         | 
| 154 | 
            +
                        Tuple[Tensor, Tensor]: Rotated queries and keys
         | 
| 155 | 
            +
                    """
         | 
| 156 | 
            +
                    sin, cos = torch.chunk(rp.to(q.device), 2, dim=-1)
         | 
| 157 | 
            +
                    # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1]
         | 
| 158 | 
            +
                    sin_pos = torch.stack([sin, sin], dim=-1).reshape(rp.shape)
         | 
| 159 | 
            +
                    # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1]
         | 
| 160 | 
            +
                    cos_pos = torch.stack([cos, cos], dim=-1).reshape(rp.shape)
         | 
| 161 | 
            +
                    # rotate_half_query_layer [-q1,q0,-q3,q2......,-qd-1,qd-2]
         | 
| 162 | 
            +
                    rotate_half_q = torch.stack(
         | 
| 163 | 
            +
                        [-q[:, :, :, 1::2], q[:, :, :, 0::2]], dim=-1
         | 
| 164 | 
            +
                    ).reshape(q.shape)
         | 
| 165 | 
            +
                    query = (q.to(torch.float32) * cos_pos) + (
         | 
| 166 | 
            +
                        rotate_half_q.to(torch.float32) * sin_pos
         | 
| 167 | 
            +
                    )
         | 
| 168 | 
            +
                    # rotate_half_key_layer [-k1,k0,-k3,k2......,-kd-1,kd-2]
         | 
| 169 | 
            +
                    rotate_half_k = torch.stack(
         | 
| 170 | 
            +
                        [-k[:, :, :, 1::2], k[:, :, :, 0::2]], dim=-1
         | 
| 171 | 
            +
                    ).reshape(k.shape)
         | 
| 172 | 
            +
                    key = (k.to(torch.float32) * cos_pos) + (
         | 
| 173 | 
            +
                        rotate_half_k.to(torch.float32) * sin_pos
         | 
| 174 | 
            +
                    )
         | 
| 175 | 
            +
                    return query, key
         | 
| 176 | 
            +
             | 
| 177 | 
            +
             | 
| 178 | 
            +
            class Ernie4_5_FusedDropoutImpl(nn.Module):
         | 
| 179 | 
            +
                """
         | 
| 180 | 
            +
                Fused dropout implementation with residual connection support.
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                This layer combines dropout and residual addition in a single operation for better performance,
         | 
| 183 | 
            +
                particularly on GPU devices. The dropout is conditionally applied based on the probability.
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                Args:
         | 
| 186 | 
            +
                    prob (float): Dropout probability (between 0 and 1)
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                Attributes:
         | 
| 189 | 
            +
                    prob (float): Stores the dropout probability
         | 
| 190 | 
            +
                    dropout (nn.Dropout): The actual dropout layer instance
         | 
| 191 | 
            +
                """
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                def __init__(self, prob):
         | 
| 194 | 
            +
                    """
         | 
| 195 | 
            +
                    Initialize the fused dropout layer.
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                    Args:
         | 
| 198 | 
            +
                        prob (float): Dropout probability (0 means no dropout)
         | 
| 199 | 
            +
                    """
         | 
| 200 | 
            +
                    super().__init__()
         | 
| 201 | 
            +
                    self.prob = prob
         | 
| 202 | 
            +
                    self.dropout = nn.Dropout(p=prob)
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                def forward(self, x, y):
         | 
| 205 | 
            +
                    """
         | 
| 206 | 
            +
                    Forward pass of the fused dropout layer.
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    Args:
         | 
| 209 | 
            +
                        x (Tensor): Input tensor to potentially apply dropout
         | 
| 210 | 
            +
                        y (Tensor): Residual tensor to add to the (possibly dropped out) x
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                    Returns:
         | 
| 213 | 
            +
                        Tensor: Result of x (with optional dropout) + y
         | 
| 214 | 
            +
                    """
         | 
| 215 | 
            +
                    if self.prob > 0:
         | 
| 216 | 
            +
                        x = self.dropout(x)
         | 
| 217 | 
            +
                    output = x + y
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                    return output
         | 
| 220 | 
            +
             | 
| 221 | 
            +
             | 
| 222 | 
            +
            class Ernie4_5_MLP(nn.Module):
         | 
| 223 | 
            +
                """
         | 
| 224 | 
            +
                Ernie4_5_MLP - Gated Multi-Layer Perceptron module used in Ernie model.
         | 
| 225 | 
            +
                """
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                def __init__(self, config, layer_idx=0):
         | 
| 228 | 
            +
                    """
         | 
| 229 | 
            +
                    Initialize the MLP module with configuration options.
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                    Args:
         | 
| 232 | 
            +
                        config: Model configurations.
         | 
| 233 | 
            +
                        layer_idx (int): Index of current layer (default: 0)
         | 
| 234 | 
            +
                    """
         | 
| 235 | 
            +
                    super().__init__()
         | 
| 236 | 
            +
                    self.config = config
         | 
| 237 | 
            +
                    self.layer_idx = layer_idx
         | 
| 238 | 
            +
                    self.hidden_size = config.hidden_size
         | 
| 239 | 
            +
                    self.intermediate_size = config.intermediate_size
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                    self.gate_proj = nn.Linear(
         | 
| 242 | 
            +
                        self.hidden_size, self.intermediate_size, bias=config.use_bias
         | 
| 243 | 
            +
                    )
         | 
| 244 | 
            +
                    self.up_proj = nn.Linear(
         | 
| 245 | 
            +
                        self.hidden_size, self.intermediate_size, bias=config.use_bias
         | 
| 246 | 
            +
                    )
         | 
| 247 | 
            +
                    self.down_proj = nn.Linear(
         | 
| 248 | 
            +
                        self.intermediate_size, self.hidden_size, bias=config.use_bias
         | 
| 249 | 
            +
                    )
         | 
| 250 | 
            +
                    self.act_fn = ACT2FN[config.hidden_act]
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                def forward(self, x):
         | 
| 253 | 
            +
                    """
         | 
| 254 | 
            +
                    Args:
         | 
| 255 | 
            +
                        x (Tensor): shape [batch_size, seq_len, hidden_size]
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                    Returns:
         | 
| 258 | 
            +
                        Tensor: shape [batch_size, seq_len, hidden_size]
         | 
| 259 | 
            +
                    """
         | 
| 260 | 
            +
                    down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
         | 
| 261 | 
            +
                    return down_proj
         | 
| 262 | 
            +
             | 
| 263 | 
            +
             | 
| 264 | 
            +
            class Ernie4_5_Attention(nn.Module):
         | 
| 265 | 
            +
                """Multi-headed attention from 'Attention Is All You Need' paper"""
         | 
| 266 | 
            +
             | 
| 267 | 
            +
                def __init__(self, config, layer_idx=0):
         | 
| 268 | 
            +
                    """Initialize the attention layer.
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    Args:
         | 
| 271 | 
            +
                        config: Model configuration.
         | 
| 272 | 
            +
                        layer_idx (int, optional): Index in transformer stack. Defaults to 0.
         | 
| 273 | 
            +
                    """
         | 
| 274 | 
            +
                    super().__init__()
         | 
| 275 | 
            +
                    self.layer_idx = layer_idx
         | 
| 276 | 
            +
                    self.hidden_size = config.hidden_size
         | 
| 277 | 
            +
                    self.num_heads = config.num_attention_heads
         | 
| 278 | 
            +
                    self.num_key_value_heads = config.num_key_value_heads
         | 
| 279 | 
            +
             | 
| 280 | 
            +
                    if config.head_dim is None:
         | 
| 281 | 
            +
                        self.head_dim = self.hidden_size // self.num_heads
         | 
| 282 | 
            +
                    else:
         | 
| 283 | 
            +
                        self.head_dim = config.head_dim
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                    self.is_gqa = (
         | 
| 286 | 
            +
                        self.num_key_value_heads is not None
         | 
| 287 | 
            +
                        and self.num_key_value_heads != self.num_heads
         | 
| 288 | 
            +
                    )
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                    if self.is_gqa:
         | 
| 291 | 
            +
                        logger.info(
         | 
| 292 | 
            +
                            f"use GQA - num_heads: {self.num_heads}- num_key_value_heads: {self.num_key_value_heads}"
         | 
| 293 | 
            +
                        )
         | 
| 294 | 
            +
                        assert (
         | 
| 295 | 
            +
                            self.num_heads % self.num_key_value_heads == 0
         | 
| 296 | 
            +
                        ), f"num_heads: {self.num_heads}, num_key_value_heads: {self.num_key_value_heads}"
         | 
| 297 | 
            +
                        kv_hidden_size = self.head_dim * self.num_key_value_heads
         | 
| 298 | 
            +
                        q_hidden_size = self.head_dim * self.num_heads
         | 
| 299 | 
            +
                    else:
         | 
| 300 | 
            +
                        q_hidden_size = kv_hidden_size = self.head_dim * self.num_heads
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                    self.q_proj = nn.Linear(self.hidden_size, q_hidden_size, bias=config.use_bias)
         | 
| 303 | 
            +
                    self.k_proj = nn.Linear(self.hidden_size, kv_hidden_size, bias=config.use_bias)
         | 
| 304 | 
            +
                    self.v_proj = nn.Linear(self.hidden_size, kv_hidden_size, bias=config.use_bias)
         | 
| 305 | 
            +
                    self.o_proj = nn.Linear(q_hidden_size, self.hidden_size, bias=config.use_bias)
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                    self.rotary_emb = Ernie4_5_RopeEmbedding(
         | 
| 308 | 
            +
                        self.head_dim,
         | 
| 309 | 
            +
                        compression_ratio=config.compression_ratio,
         | 
| 310 | 
            +
                        base=config.rope_theta,
         | 
| 311 | 
            +
                    )
         | 
| 312 | 
            +
                    self.config = config
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                    self.set_attn_func()
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                def set_attn_func(self):
         | 
| 317 | 
            +
                    """Configure attention function based on settings.
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                    Selects between flash/core attention.
         | 
| 320 | 
            +
                    """
         | 
| 321 | 
            +
                    config = self.config
         | 
| 322 | 
            +
                    if config.use_flash_attention:
         | 
| 323 | 
            +
                        self.attn_func = self._flash_attention_wrapper
         | 
| 324 | 
            +
                    else:
         | 
| 325 | 
            +
                        self.attn_func = self.core_attn
         | 
| 326 | 
            +
             | 
| 327 | 
            +
                def forward(
         | 
| 328 | 
            +
                    self,
         | 
| 329 | 
            +
                    hidden_states,
         | 
| 330 | 
            +
                    past_key_value: Optional[Tuple[torch.Tensor]] = None,
         | 
| 331 | 
            +
                    attention_mask: Optional[torch.Tensor] = None,
         | 
| 332 | 
            +
                    attn_mask_start_row_indices: Optional[torch.Tensor] = None,
         | 
| 333 | 
            +
                    position_ids: Optional[Tuple[torch.Tensor]] = None,
         | 
| 334 | 
            +
                    output_attentions: bool = False,
         | 
| 335 | 
            +
                    use_cache: bool = False,
         | 
| 336 | 
            +
                    token_type_ids: Optional[Tuple[torch.Tensor]] = None,
         | 
| 337 | 
            +
                ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         | 
| 338 | 
            +
                    """Compute attention outputs.
         | 
| 339 | 
            +
             | 
| 340 | 
            +
                    Args:
         | 
| 341 | 
            +
                        hidden_states (torch.Tensor): Input tensor [bsz, seq_len, hidden_size]
         | 
| 342 | 
            +
                        past_key_value (Optional[Tuple[torch.Tensor, torch.Tensor]]): Cached key/value states
         | 
| 343 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask tensor
         | 
| 344 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length attention indices
         | 
| 345 | 
            +
                        position_ids (Optional[torch.Tensor]): Position indices for RoPE
         | 
| 346 | 
            +
                        output_attentions (bool): Return attention weights if True
         | 
| 347 | 
            +
                        use_cache (bool): Cache key/value states if True
         | 
| 348 | 
            +
             | 
| 349 | 
            +
                    Returns:
         | 
| 350 | 
            +
                        Tuple containing:
         | 
| 351 | 
            +
                            - attention_output: [bsz, seq_len, hidden_size]
         | 
| 352 | 
            +
                            - attention_weights: Optional attention probabilities
         | 
| 353 | 
            +
                            - updated_key_value_cache: Optional updated cache
         | 
| 354 | 
            +
                    """
         | 
| 355 | 
            +
                    if token_type_ids is not None:
         | 
| 356 | 
            +
                        token_type_ids = token_type_ids[:, :-1]
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                    bsz, q_len, _ = hidden_states.shape
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                    query_states = self.q_proj(hidden_states).reshape(
         | 
| 361 | 
            +
                        [bsz, q_len, -1, self.head_dim]
         | 
| 362 | 
            +
                    )
         | 
| 363 | 
            +
                    key_states = self.k_proj(hidden_states).reshape([bsz, q_len, -1, self.head_dim])
         | 
| 364 | 
            +
                    value_states = self.v_proj(hidden_states).reshape(
         | 
| 365 | 
            +
                        [bsz, q_len, -1, self.head_dim]
         | 
| 366 | 
            +
                    )
         | 
| 367 | 
            +
             | 
| 368 | 
            +
                    attn_output, attn_weights, past_key_value = self.rope_attn(
         | 
| 369 | 
            +
                        query_states=query_states,
         | 
| 370 | 
            +
                        key_states=key_states,
         | 
| 371 | 
            +
                        value_states=value_states,
         | 
| 372 | 
            +
                        attention_mask=attention_mask,
         | 
| 373 | 
            +
                        position_ids=position_ids,
         | 
| 374 | 
            +
                        output_attentions=output_attentions,
         | 
| 375 | 
            +
                        past_key_value=past_key_value,
         | 
| 376 | 
            +
                        use_cache=use_cache,
         | 
| 377 | 
            +
                        attn_mask_start_row_indices=attn_mask_start_row_indices,
         | 
| 378 | 
            +
                    )
         | 
| 379 | 
            +
             | 
| 380 | 
            +
                    attn_output = self.o_proj(attn_output)
         | 
| 381 | 
            +
             | 
| 382 | 
            +
                    if not output_attentions:
         | 
| 383 | 
            +
                        attn_weights = None
         | 
| 384 | 
            +
             | 
| 385 | 
            +
                    return attn_output, attn_weights, past_key_value
         | 
| 386 | 
            +
             | 
| 387 | 
            +
                def repeat_kv(self, hidden_states, n_rep):
         | 
| 388 | 
            +
                    """
         | 
| 389 | 
            +
                    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
         | 
| 390 | 
            +
                    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
         | 
| 391 | 
            +
                    """
         | 
| 392 | 
            +
                    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
         | 
| 393 | 
            +
                    if n_rep == 1:
         | 
| 394 | 
            +
                        return hidden_states
         | 
| 395 | 
            +
                    hidden_states = hidden_states[:, :, None, :, :].expand(
         | 
| 396 | 
            +
                        batch, num_key_value_heads, n_rep, slen, head_dim
         | 
| 397 | 
            +
                    )
         | 
| 398 | 
            +
                    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                def _flash_attention_wrapper(
         | 
| 401 | 
            +
                    self,
         | 
| 402 | 
            +
                    q,
         | 
| 403 | 
            +
                    k,
         | 
| 404 | 
            +
                    v,
         | 
| 405 | 
            +
                    attention_mask=None,
         | 
| 406 | 
            +
                    attn_mask_start_row_indices=None,
         | 
| 407 | 
            +
                    seq_length=None,
         | 
| 408 | 
            +
                ):
         | 
| 409 | 
            +
                    """Wrapper for flash attention implementation.
         | 
| 410 | 
            +
             | 
| 411 | 
            +
                    Args:
         | 
| 412 | 
            +
                        q (torch.Tensor): Query tensor
         | 
| 413 | 
            +
                        k (torch.Tensor): Key tensor
         | 
| 414 | 
            +
                        v (torch.Tensor): Value tensor
         | 
| 415 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask
         | 
| 416 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length indices
         | 
| 417 | 
            +
                        seq_length (Optional[int]): Sequence length
         | 
| 418 | 
            +
             | 
| 419 | 
            +
                    Returns:
         | 
| 420 | 
            +
                        Tuple[torch.Tensor, torch.Tensor]: Attention output and weights
         | 
| 421 | 
            +
                    """
         | 
| 422 | 
            +
                    q = q.transpose(1, 2)
         | 
| 423 | 
            +
                    k = k.transpose(1, 2)
         | 
| 424 | 
            +
                    v = v.transpose(1, 2)
         | 
| 425 | 
            +
             | 
| 426 | 
            +
                    with sdpa_kernel(SDPBackend.FLASH_ATTENTION):
         | 
| 427 | 
            +
                        out = F.scaled_dot_product_attention(
         | 
| 428 | 
            +
                            q,
         | 
| 429 | 
            +
                            k,
         | 
| 430 | 
            +
                            v,
         | 
| 431 | 
            +
                            attn_mask=attention_mask,
         | 
| 432 | 
            +
                            dropout_p=self.config.attention_probs_dropout_prob,
         | 
| 433 | 
            +
                            is_causal=attention_mask is None and q.shape[1] != 1,
         | 
| 434 | 
            +
                            scale=1
         | 
| 435 | 
            +
                            / (getattr(self.config, "scale_qk_coeff", 1.0) * self.head_dim**0.5),
         | 
| 436 | 
            +
                            enable_gqa=self.is_gqa,
         | 
| 437 | 
            +
                        )
         | 
| 438 | 
            +
                    out = out.transpose(1, 2)
         | 
| 439 | 
            +
                    out = out.contiguous().view(out.size(0), out.size(1), -1)
         | 
| 440 | 
            +
             | 
| 441 | 
            +
                    return out, None
         | 
| 442 | 
            +
             | 
| 443 | 
            +
                def core_attn(
         | 
| 444 | 
            +
                    self,
         | 
| 445 | 
            +
                    q,
         | 
| 446 | 
            +
                    k,
         | 
| 447 | 
            +
                    v,
         | 
| 448 | 
            +
                    attention_mask=None,
         | 
| 449 | 
            +
                    attn_mask_start_row_indices=None,
         | 
| 450 | 
            +
                    seq_length=None,
         | 
| 451 | 
            +
                ):
         | 
| 452 | 
            +
                    """Standard self-attention implementation.
         | 
| 453 | 
            +
             | 
| 454 | 
            +
                    Args:
         | 
| 455 | 
            +
                        q (torch.Tensor): Query tensor
         | 
| 456 | 
            +
                        k (torch.Tensor): Key tensor
         | 
| 457 | 
            +
                        v (torch.Tensor): Value tensor
         | 
| 458 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask
         | 
| 459 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length indices
         | 
| 460 | 
            +
                        seq_length (Optional[int]): Sequence length
         | 
| 461 | 
            +
             | 
| 462 | 
            +
                    Returns:
         | 
| 463 | 
            +
                        Tuple[torch.Tensor, torch.Tensor]: Attention output and weights
         | 
| 464 | 
            +
                    """
         | 
| 465 | 
            +
                    origin_dtype = q.dtype
         | 
| 466 | 
            +
             | 
| 467 | 
            +
                    q = q.permute(0, 2, 1, 3)
         | 
| 468 | 
            +
                    k = k.permute(0, 2, 1, 3)
         | 
| 469 | 
            +
                    v = v.permute(0, 2, 1, 3)
         | 
| 470 | 
            +
             | 
| 471 | 
            +
                    scale_qk_coeff = (
         | 
| 472 | 
            +
                        getattr(self.config, "scale_qk_coeff", 1.0) * self.head_dim**0.5
         | 
| 473 | 
            +
                    )
         | 
| 474 | 
            +
             | 
| 475 | 
            +
                    q = q / scale_qk_coeff
         | 
| 476 | 
            +
             | 
| 477 | 
            +
                    # Handle GQA case - repeat k and v heads to match q heads
         | 
| 478 | 
            +
                    if self.is_gqa:
         | 
| 479 | 
            +
                        # [batch, num_key_value_heads, seq_len, head_dim] -> [batch, num_heads, seq_len, head_dim]
         | 
| 480 | 
            +
                        repeat_factor = self.num_heads // self.num_key_value_heads
         | 
| 481 | 
            +
                        k = self.repeat_kv(k, repeat_factor)
         | 
| 482 | 
            +
                        v = self.repeat_kv(v, repeat_factor)
         | 
| 483 | 
            +
             | 
| 484 | 
            +
                    attn_scores = torch.matmul(q, k.transpose(-2, -1))
         | 
| 485 | 
            +
             | 
| 486 | 
            +
                    if getattr(self.config, "scale_qk_coeff", 1.0) != 1.0:
         | 
| 487 | 
            +
                        attn_scores = attn_scores * getattr(self.config, "scale_qk_coeff", 1.0)
         | 
| 488 | 
            +
             | 
| 489 | 
            +
                    # Causal mask
         | 
| 490 | 
            +
                    seq_len = attn_scores.size(-1)
         | 
| 491 | 
            +
                    mask = torch.triu(
         | 
| 492 | 
            +
                        torch.ones((seq_len, seq_len), dtype=torch.bool, device=attn_scores.device),
         | 
| 493 | 
            +
                        diagonal=1,
         | 
| 494 | 
            +
                    )
         | 
| 495 | 
            +
                    attn_scores = attn_scores.masked_fill(mask, float("-inf"))
         | 
| 496 | 
            +
                    attn_weights = F.softmax(attn_scores, dim=-1)
         | 
| 497 | 
            +
             | 
| 498 | 
            +
                    attn_weights = attn_weights.to(origin_dtype)
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                    # attention_probs_dropout_prob default 0.0
         | 
| 501 | 
            +
                    if getattr(self.config, "attention_probs_dropout_prob", 0.0) > 0:
         | 
| 502 | 
            +
                        attn_weights = F.dropout(
         | 
| 503 | 
            +
                            attn_weights,
         | 
| 504 | 
            +
                            p=self.config.attention_probs_dropout_prob,
         | 
| 505 | 
            +
                            training=self.training,
         | 
| 506 | 
            +
                        )
         | 
| 507 | 
            +
             | 
| 508 | 
            +
                    # [batch, num_heads, q_len, k_len] @ [batch, num_heads, k_len, head_dim] -> [batch, num_heads, q_len, head_dim]
         | 
| 509 | 
            +
                    out = torch.matmul(attn_weights, v)
         | 
| 510 | 
            +
             | 
| 511 | 
            +
                    # [batch, num_heads, seq_len, head_dim] -> [batch, seq_len, num_heads, head_dim]
         | 
| 512 | 
            +
                    out = out.permute(0, 2, 1, 3)
         | 
| 513 | 
            +
                    # [batch, seq_len, hidden_size]
         | 
| 514 | 
            +
                    out = out.contiguous().view(out.size(0), out.size(1), -1)
         | 
| 515 | 
            +
             | 
| 516 | 
            +
                    return out, attn_weights
         | 
| 517 | 
            +
             | 
| 518 | 
            +
                def rope_attn(
         | 
| 519 | 
            +
                    self,
         | 
| 520 | 
            +
                    query_states,
         | 
| 521 | 
            +
                    key_states,
         | 
| 522 | 
            +
                    value_states,
         | 
| 523 | 
            +
                    attention_mask,
         | 
| 524 | 
            +
                    position_ids,
         | 
| 525 | 
            +
                    output_attentions=False,
         | 
| 526 | 
            +
                    past_key_value=None,
         | 
| 527 | 
            +
                    use_cache=False,
         | 
| 528 | 
            +
                    attn_mask_start_row_indices=None,
         | 
| 529 | 
            +
                ):
         | 
| 530 | 
            +
                    """Attention computation with rotary embeddings.
         | 
| 531 | 
            +
             | 
| 532 | 
            +
                    Args:
         | 
| 533 | 
            +
                        query_states (torch.Tensor): Query states
         | 
| 534 | 
            +
                        key_states (torch.Tensor): Key states
         | 
| 535 | 
            +
                        value_states (torch.Tensor): Value states
         | 
| 536 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask
         | 
| 537 | 
            +
                        position_ids (Optional[torch.Tensor]): Position indices
         | 
| 538 | 
            +
                        output_attentions (bool): Return attention weights
         | 
| 539 | 
            +
                        past_key_value (Optional[Tuple[torch.Tensor, torch.Tensor]]): Cached states
         | 
| 540 | 
            +
                        use_cache (bool): Cache new states
         | 
| 541 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length indices
         | 
| 542 | 
            +
             | 
| 543 | 
            +
                    Returns:
         | 
| 544 | 
            +
                        Tuple containing:
         | 
| 545 | 
            +
                            - attention_output: Result tensor
         | 
| 546 | 
            +
                            - attention_weights: Optional weights
         | 
| 547 | 
            +
                            - updated_key_value_cache: Optional cache
         | 
| 548 | 
            +
                    """
         | 
| 549 | 
            +
             | 
| 550 | 
            +
                    query_states_dtype = query_states.dtype
         | 
| 551 | 
            +
             | 
| 552 | 
            +
                    kv_seq_len = key_states.shape[-3]
         | 
| 553 | 
            +
                    offset = 0
         | 
| 554 | 
            +
                    if past_key_value is not None:
         | 
| 555 | 
            +
                        offset = past_key_value[0].shape[-3]
         | 
| 556 | 
            +
                        kv_seq_len += offset
         | 
| 557 | 
            +
             | 
| 558 | 
            +
                    cos_sin = self.rotary_emb(kv_seq_len).permute(
         | 
| 559 | 
            +
                        [0, 2, 1, 3]
         | 
| 560 | 
            +
                    )  # [b,h,s,d]->[b,s,h,d]
         | 
| 561 | 
            +
                    if offset > 0:
         | 
| 562 | 
            +
                        cos_sin = cos_sin[:, offset:]
         | 
| 563 | 
            +
                    query_states, key_states = self.rotary_emb.apply_rotary(
         | 
| 564 | 
            +
                        cos_sin, query_states, key_states
         | 
| 565 | 
            +
                    )
         | 
| 566 | 
            +
             | 
| 567 | 
            +
                    query_states = query_states.to(query_states_dtype)
         | 
| 568 | 
            +
                    key_states = key_states.to(query_states_dtype)
         | 
| 569 | 
            +
                    if past_key_value is not None:
         | 
| 570 | 
            +
                        # reuse k, v, self_attention
         | 
| 571 | 
            +
                        key_states = torch.cat([past_key_value[0], key_states], dim=1)
         | 
| 572 | 
            +
                        value_states = torch.cat([past_key_value[1], value_states], dim=1)
         | 
| 573 | 
            +
             | 
| 574 | 
            +
                    # shape: [2, b, s, kvh, d]
         | 
| 575 | 
            +
                    past_key_value = [key_states, value_states] if use_cache else None
         | 
| 576 | 
            +
                    seq_length = query_states.shape[1]
         | 
| 577 | 
            +
                    attn_output, attn_weights = self.attn_func(
         | 
| 578 | 
            +
                        query_states,
         | 
| 579 | 
            +
                        key_states,
         | 
| 580 | 
            +
                        value_states,
         | 
| 581 | 
            +
                        attention_mask,
         | 
| 582 | 
            +
                        attn_mask_start_row_indices,
         | 
| 583 | 
            +
                        seq_length,
         | 
| 584 | 
            +
                    )
         | 
| 585 | 
            +
                    return attn_output, attn_weights, past_key_value
         | 
| 586 | 
            +
             | 
| 587 | 
            +
             | 
| 588 | 
            +
            class Ernie4_5_DecoderLayer(nn.Module):
         | 
| 589 | 
            +
                """
         | 
| 590 | 
            +
                A single transformer decoder layer in ERNIE model.
         | 
| 591 | 
            +
                """
         | 
| 592 | 
            +
             | 
| 593 | 
            +
                def __init__(self, config, layer_idx):
         | 
| 594 | 
            +
                    """Initialize the decoder layer.
         | 
| 595 | 
            +
             | 
| 596 | 
            +
                    Args:
         | 
| 597 | 
            +
                        config: Model configuration.
         | 
| 598 | 
            +
                        layer_idx (int): Index of this layer in the transformer stack
         | 
| 599 | 
            +
                    """
         | 
| 600 | 
            +
                    super().__init__()
         | 
| 601 | 
            +
                    self.hidden_size = config.hidden_size
         | 
| 602 | 
            +
                    self.layer_idx = layer_idx
         | 
| 603 | 
            +
                    self.config = config
         | 
| 604 | 
            +
             | 
| 605 | 
            +
                    self.self_attn = Ernie4_5_Attention(config, layer_idx)
         | 
| 606 | 
            +
                    self.mlp = Ernie4_5_MLP(config)
         | 
| 607 | 
            +
             | 
| 608 | 
            +
                    self.input_layernorm = Ernie4_5_RMSNorm(config)
         | 
| 609 | 
            +
                    self.post_attention_layernorm = Ernie4_5_RMSNorm(config)
         | 
| 610 | 
            +
             | 
| 611 | 
            +
                    self.residual_add1 = Ernie4_5_FusedDropoutImpl(config.hidden_dropout_prob)
         | 
| 612 | 
            +
                    self.residual_add2 = Ernie4_5_FusedDropoutImpl(config.hidden_dropout_prob)
         | 
| 613 | 
            +
             | 
| 614 | 
            +
                def forward(
         | 
| 615 | 
            +
                    self,
         | 
| 616 | 
            +
                    hidden_states: torch.Tensor,
         | 
| 617 | 
            +
                    attention_mask: Optional[torch.Tensor] = None,
         | 
| 618 | 
            +
                    attn_mask_start_row_indices: Optional[torch.Tensor] = None,
         | 
| 619 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 620 | 
            +
                    token_type_ids: Optional[torch.Tensor] = None,
         | 
| 621 | 
            +
                    output_attentions: Optional[bool] = False,
         | 
| 622 | 
            +
                    past_key_value: Optional[Tuple[torch.Tensor]] = None,
         | 
| 623 | 
            +
                    use_cache: Optional[bool] = False,
         | 
| 624 | 
            +
                ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
         | 
| 625 | 
            +
                    """Forward pass through the decoder layer.
         | 
| 626 | 
            +
             | 
| 627 | 
            +
                    Args:
         | 
| 628 | 
            +
                        hidden_states (torch.Tensor): Input tensor [batch_size, seq_len, hidden_size]
         | 
| 629 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask tensor
         | 
| 630 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Indices for variable length attention
         | 
| 631 | 
            +
                        position_ids (Optional[torch.Tensor]): Position indices for rotary embeddings
         | 
| 632 | 
            +
                        output_attentions (Optional[bool]): Whether to return attention weights
         | 
| 633 | 
            +
                        past_key_value (Optional[Tuple[torch.Tensor]]): Cached key/value states
         | 
| 634 | 
            +
                        use_cache (Optional[bool]): Whether to cache key/value states
         | 
| 635 | 
            +
             | 
| 636 | 
            +
                    Returns:
         | 
| 637 | 
            +
                        Union: Various output combinations depending on arguments:
         | 
| 638 | 
            +
                            - Base case: Hidden states tensor
         | 
| 639 | 
            +
                            - With attention: Tuple of (hidden_states, attention_weights)
         | 
| 640 | 
            +
                            - With cache: Tuple of (hidden_states, cached_key_value)
         | 
| 641 | 
            +
                    """
         | 
| 642 | 
            +
                    residual = hidden_states
         | 
| 643 | 
            +
             | 
| 644 | 
            +
                    hidden_states = self.input_layernorm(hidden_states)
         | 
| 645 | 
            +
             | 
| 646 | 
            +
                    # Self Attention
         | 
| 647 | 
            +
                    (hidden_states, self_attn_weights, present_key_value) = self.self_attn(
         | 
| 648 | 
            +
                        hidden_states=hidden_states,
         | 
| 649 | 
            +
                        past_key_value=past_key_value,
         | 
| 650 | 
            +
                        attention_mask=attention_mask,
         | 
| 651 | 
            +
                        attn_mask_start_row_indices=attn_mask_start_row_indices,
         | 
| 652 | 
            +
                        position_ids=position_ids,
         | 
| 653 | 
            +
                        output_attentions=output_attentions,
         | 
| 654 | 
            +
                        use_cache=use_cache,
         | 
| 655 | 
            +
                        token_type_ids=token_type_ids,
         | 
| 656 | 
            +
                    )
         | 
| 657 | 
            +
                    hidden_states = self.residual_add1(hidden_states, residual)
         | 
| 658 | 
            +
             | 
| 659 | 
            +
                    # Fully Connected
         | 
| 660 | 
            +
                    residual = hidden_states
         | 
| 661 | 
            +
                    hidden_states = self.post_attention_layernorm(hidden_states)
         | 
| 662 | 
            +
                    hidden_states = self.mlp(hidden_states)
         | 
| 663 | 
            +
             | 
| 664 | 
            +
                    hidden_states = self.residual_add2(hidden_states, residual)
         | 
| 665 | 
            +
                    outputs = (hidden_states,)
         | 
| 666 | 
            +
             | 
| 667 | 
            +
                    if output_attentions:
         | 
| 668 | 
            +
                        outputs += (self_attn_weights,)
         | 
| 669 | 
            +
             | 
| 670 | 
            +
                    if use_cache:
         | 
| 671 | 
            +
                        outputs += (present_key_value,)
         | 
| 672 | 
            +
             | 
| 673 | 
            +
                    if type(outputs) is tuple and len(outputs) == 1:
         | 
| 674 | 
            +
                        outputs = outputs[0]
         | 
| 675 | 
            +
             | 
| 676 | 
            +
                    return outputs
         | 
| 677 | 
            +
             | 
| 678 | 
            +
             | 
| 679 | 
            +
            class Ernie4_5_PretrainedModel(PreTrainedModel):
         | 
| 680 | 
            +
                """Base class for ERNIE pretrained models."""
         | 
| 681 | 
            +
             | 
| 682 | 
            +
                config_class = Ernie4_5_Config
         | 
| 683 | 
            +
                base_model_prefix = "ernie"
         | 
| 684 | 
            +
             | 
| 685 | 
            +
             | 
| 686 | 
            +
            class Ernie4_5_Model(Ernie4_5_PretrainedModel):
         | 
| 687 | 
            +
             | 
| 688 | 
            +
                def __init__(self, config):
         | 
| 689 | 
            +
                    """Initialize the ERNIE model architecture.
         | 
| 690 | 
            +
             | 
| 691 | 
            +
                    Args:
         | 
| 692 | 
            +
                        config: Model configuration.
         | 
| 693 | 
            +
                    """
         | 
| 694 | 
            +
                    super().__init__(config)
         | 
| 695 | 
            +
                    self.padding_idx = config.pad_token_id
         | 
| 696 | 
            +
                    self.vocab_size = config.vocab_size
         | 
| 697 | 
            +
                    self.hidden_size = config.hidden_size
         | 
| 698 | 
            +
                    self.config = config
         | 
| 699 | 
            +
             | 
| 700 | 
            +
                    self.embed_tokens = nn.Embedding(
         | 
| 701 | 
            +
                        self.vocab_size,
         | 
| 702 | 
            +
                        self.hidden_size,
         | 
| 703 | 
            +
                    )
         | 
| 704 | 
            +
             | 
| 705 | 
            +
                    self.layers = nn.ModuleList(
         | 
| 706 | 
            +
                        [Ernie4_5_DecoderLayer(config, i) for i in range(config.num_hidden_layers)]
         | 
| 707 | 
            +
                    )
         | 
| 708 | 
            +
             | 
| 709 | 
            +
                    self.norm = Ernie4_5_RMSNorm(config)
         | 
| 710 | 
            +
             | 
| 711 | 
            +
                    self.gradient_checkpointing = False
         | 
| 712 | 
            +
             | 
| 713 | 
            +
                def get_input_embeddings(self):
         | 
| 714 | 
            +
                    """Get the input embedding layer.
         | 
| 715 | 
            +
             | 
| 716 | 
            +
                    Returns:
         | 
| 717 | 
            +
                        nn.Embedding: The embedding layer for input tokens
         | 
| 718 | 
            +
                    """
         | 
| 719 | 
            +
                    return self.embed_tokens
         | 
| 720 | 
            +
             | 
| 721 | 
            +
                def set_input_embeddings(self, value):
         | 
| 722 | 
            +
                    """Set new input embeddings.
         | 
| 723 | 
            +
             | 
| 724 | 
            +
                    Args:
         | 
| 725 | 
            +
                        value (nn.Embedding): New embedding layer to use
         | 
| 726 | 
            +
                    """
         | 
| 727 | 
            +
                    self.embed_tokens = value
         | 
| 728 | 
            +
             | 
| 729 | 
            +
                def forward(
         | 
| 730 | 
            +
                    self,
         | 
| 731 | 
            +
                    input_ids=None,
         | 
| 732 | 
            +
                    position_ids=None,
         | 
| 733 | 
            +
                    token_type_ids=None,
         | 
| 734 | 
            +
                    attention_mask=None,
         | 
| 735 | 
            +
                    attn_mask_start_row_indices=None,
         | 
| 736 | 
            +
                    inputs_embeds=None,
         | 
| 737 | 
            +
                    use_cache=None,
         | 
| 738 | 
            +
                    past_key_values=None,
         | 
| 739 | 
            +
                    output_attentions=False,
         | 
| 740 | 
            +
                    output_hidden_states=None,
         | 
| 741 | 
            +
                    return_dict=False,
         | 
| 742 | 
            +
                ):
         | 
| 743 | 
            +
                    """Forward pass through the ERNIE model.
         | 
| 744 | 
            +
             | 
| 745 | 
            +
                    Args:
         | 
| 746 | 
            +
                        input_ids (Optional[torch.Tensor]): Input token IDs
         | 
| 747 | 
            +
                        position_ids (Optional[torch.Tensor]): Position indices
         | 
| 748 | 
            +
                        attention_mask (Optional[torch.Tensor]): Attention mask
         | 
| 749 | 
            +
                        attn_mask_start_row_indices (Optional[torch.Tensor]): Variable length attention indices
         | 
| 750 | 
            +
                        inputs_embeds (Optional[torch.Tensor]): Precomputed embeddings
         | 
| 751 | 
            +
                        use_cache (Optional[bool]): Whether to cache key/value states
         | 
| 752 | 
            +
                        past_key_values (Optional[Tuple[Tuple[torch.Tensor]]]): Cached key/value states
         | 
| 753 | 
            +
                        output_attentions (Optional[bool]): Whether to output attention weights
         | 
| 754 | 
            +
                        output_hidden_states (Optional[bool]): Whether to output all hidden states
         | 
| 755 | 
            +
                        return_dict (Optional[bool]): Whether to return dict or tuple
         | 
| 756 | 
            +
             | 
| 757 | 
            +
                    Returns:
         | 
| 758 | 
            +
                        Union[Tuple, BaseModelOutputWithPast]:
         | 
| 759 | 
            +
                            Various outputs depending on configuration, including:
         | 
| 760 | 
            +
                            - last_hidden_state: Final layer hidden states
         | 
| 761 | 
            +
                            - past_key_values: Cached key/value states if use_cache=True
         | 
| 762 | 
            +
                            - hidden_states: All hidden states if output_hidden_states=True
         | 
| 763 | 
            +
                            - attentions: Attention weights if output_attentions=True
         | 
| 764 | 
            +
                    """
         | 
| 765 | 
            +
                    use_cache = use_cache if use_cache is not None else self.config.use_cache
         | 
| 766 | 
            +
             | 
| 767 | 
            +
                    # retrieve input_ids and inputs_embeds
         | 
| 768 | 
            +
                    if input_ids is not None and inputs_embeds is not None:
         | 
| 769 | 
            +
                        raise ValueError(
         | 
| 770 | 
            +
                            "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
         | 
| 771 | 
            +
                        )
         | 
| 772 | 
            +
                    elif input_ids is not None:
         | 
| 773 | 
            +
                        _, seq_length = input_ids.shape
         | 
| 774 | 
            +
                    elif inputs_embeds is not None:
         | 
| 775 | 
            +
                        _, seq_length, _ = inputs_embeds.shape
         | 
| 776 | 
            +
                    else:
         | 
| 777 | 
            +
                        raise ValueError(
         | 
| 778 | 
            +
                            "You have to specify either decoder_input_ids or decoder_inputs_embeds"
         | 
| 779 | 
            +
                        )
         | 
| 780 | 
            +
             | 
| 781 | 
            +
                    if past_key_values is None:
         | 
| 782 | 
            +
                        past_key_values = tuple([None] * len(self.layers))
         | 
| 783 | 
            +
             | 
| 784 | 
            +
                    if inputs_embeds is None:
         | 
| 785 | 
            +
                        inputs_embeds = self.embed_tokens(input_ids)
         | 
| 786 | 
            +
                    inputs_embeds = inputs_embeds.to(self.embed_tokens.weight.dtype)
         | 
| 787 | 
            +
             | 
| 788 | 
            +
                    hidden_states = inputs_embeds
         | 
| 789 | 
            +
             | 
| 790 | 
            +
                    # decoder layers
         | 
| 791 | 
            +
                    all_hidden_states = () if output_hidden_states else None
         | 
| 792 | 
            +
                    all_self_attns = () if output_attentions else None
         | 
| 793 | 
            +
                    next_decoder_cache = () if use_cache else None
         | 
| 794 | 
            +
             | 
| 795 | 
            +
                    for idx, (decoder_layer) in enumerate(self.layers):
         | 
| 796 | 
            +
             | 
| 797 | 
            +
                        if output_hidden_states:
         | 
| 798 | 
            +
                            all_hidden_states += (hidden_states,)
         | 
| 799 | 
            +
             | 
| 800 | 
            +
                        past_key_value = (
         | 
| 801 | 
            +
                            past_key_values[idx] if past_key_values is not None else None
         | 
| 802 | 
            +
                        )
         | 
| 803 | 
            +
             | 
| 804 | 
            +
                        layer_outputs = decoder_layer(
         | 
| 805 | 
            +
                            hidden_states,
         | 
| 806 | 
            +
                            attention_mask,
         | 
| 807 | 
            +
                            attn_mask_start_row_indices,
         | 
| 808 | 
            +
                            position_ids,
         | 
| 809 | 
            +
                            token_type_ids,
         | 
| 810 | 
            +
                            output_attentions,
         | 
| 811 | 
            +
                            past_key_value,
         | 
| 812 | 
            +
                            use_cache,
         | 
| 813 | 
            +
                        )
         | 
| 814 | 
            +
             | 
| 815 | 
            +
                        if isinstance(layer_outputs, (tuple, list)):
         | 
| 816 | 
            +
                            hidden_states = layer_outputs[0]
         | 
| 817 | 
            +
                        else:
         | 
| 818 | 
            +
                            hidden_states = layer_outputs
         | 
| 819 | 
            +
             | 
| 820 | 
            +
                        if use_cache:
         | 
| 821 | 
            +
                            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
         | 
| 822 | 
            +
             | 
| 823 | 
            +
                        if output_attentions:
         | 
| 824 | 
            +
                            all_self_attns += (layer_outputs[1],)
         | 
| 825 | 
            +
             | 
| 826 | 
            +
                        # apply kv cache
         | 
| 827 | 
            +
                        if past_key_value is not None:
         | 
| 828 | 
            +
                            hidden_states = hidden_states[:, -1:, :]
         | 
| 829 | 
            +
             | 
| 830 | 
            +
                    hidden_states = self.norm(hidden_states)
         | 
| 831 | 
            +
             | 
| 832 | 
            +
                    # add hidden states from the last decoder layer
         | 
| 833 | 
            +
                    if output_hidden_states:
         | 
| 834 | 
            +
                        all_hidden_states += (hidden_states,)
         | 
| 835 | 
            +
             | 
| 836 | 
            +
                    next_cache = next_decoder_cache if use_cache else None
         | 
| 837 | 
            +
             | 
| 838 | 
            +
                    if not return_dict:
         | 
| 839 | 
            +
                        return tuple(
         | 
| 840 | 
            +
                            v
         | 
| 841 | 
            +
                            for v in [
         | 
| 842 | 
            +
                                hidden_states,
         | 
| 843 | 
            +
                                next_cache,
         | 
| 844 | 
            +
                                all_hidden_states,
         | 
| 845 | 
            +
                                all_self_attns,
         | 
| 846 | 
            +
                            ]
         | 
| 847 | 
            +
                            if v is not None
         | 
| 848 | 
            +
                        )
         | 
| 849 | 
            +
             | 
| 850 | 
            +
                    return BaseModelOutputWithPast(
         | 
| 851 | 
            +
                        last_hidden_state=hidden_states,
         | 
| 852 | 
            +
                        past_key_values=next_cache,
         | 
| 853 | 
            +
                        hidden_states=all_hidden_states,
         | 
| 854 | 
            +
                        attentions=all_self_attns,
         | 
| 855 | 
            +
                    )
         | 
| 856 | 
            +
             | 
| 857 | 
            +
             | 
| 858 | 
            +
            class Ernie4_5_LMHead(nn.Module):
         | 
| 859 | 
            +
                """Language model head for ERNIE"""
         | 
| 860 | 
            +
             | 
| 861 | 
            +
                def __init__(self, config):
         | 
| 862 | 
            +
                    """Initialize the language model head.
         | 
| 863 | 
            +
             | 
| 864 | 
            +
                    Args:
         | 
| 865 | 
            +
                        config: Model configuration containing:
         | 
| 866 | 
            +
                            - vocab_size: Size of vocabulary
         | 
| 867 | 
            +
                            - hidden_size: Dimension of hidden states
         | 
| 868 | 
            +
                            - tie_word_embeddings: Whether to tie input/output embeddings
         | 
| 869 | 
            +
                            - weight_share_add_bias: Whether to add bias when weight sharing
         | 
| 870 | 
            +
                            - use_bias: Whether to use bias term
         | 
| 871 | 
            +
                    """
         | 
| 872 | 
            +
             | 
| 873 | 
            +
                    super(Ernie4_5_LMHead, self).__init__()
         | 
| 874 | 
            +
                    self.config = config
         | 
| 875 | 
            +
                    vocab_size = config.vocab_size
         | 
| 876 | 
            +
             | 
| 877 | 
            +
                    if config.tie_word_embeddings:
         | 
| 878 | 
            +
                        # Weight of shape [vocab_size, hidden_size]
         | 
| 879 | 
            +
                        self.weight = nn.Parameter(
         | 
| 880 | 
            +
                            torch.empty(
         | 
| 881 | 
            +
                                vocab_size, config.hidden_size, dtype=torch.get_default_dtype()
         | 
| 882 | 
            +
                            )
         | 
| 883 | 
            +
                        )
         | 
| 884 | 
            +
                    else:
         | 
| 885 | 
            +
                        # Weight of shape [hidden_size, vocab_size]
         | 
| 886 | 
            +
                        self.weight = nn.Parameter(
         | 
| 887 | 
            +
                            torch.empty(
         | 
| 888 | 
            +
                                config.hidden_size, vocab_size, dtype=torch.get_default_dtype()
         | 
| 889 | 
            +
                            )
         | 
| 890 | 
            +
                        )
         | 
| 891 | 
            +
                    nn.init.xavier_uniform_(self.weight)
         | 
| 892 | 
            +
             | 
| 893 | 
            +
                    logger.info(
         | 
| 894 | 
            +
                        f"output-weight: {self.weight.shape}, tie_word_embeddings: {config.tie_word_embeddings}"
         | 
| 895 | 
            +
                    )
         | 
| 896 | 
            +
             | 
| 897 | 
            +
                    if config.weight_share_add_bias and config.use_bias:
         | 
| 898 | 
            +
                        self.bias = nn.Parameter(
         | 
| 899 | 
            +
                            torch.zeros(vocab_size, dtype=torch.get_default_dtype())
         | 
| 900 | 
            +
                        )
         | 
| 901 | 
            +
                    else:
         | 
| 902 | 
            +
                        self.bias = None
         | 
| 903 | 
            +
             | 
| 904 | 
            +
                def forward(self, hidden_states):
         | 
| 905 | 
            +
                    """Project hidden states to vocabulary logits.
         | 
| 906 | 
            +
             | 
| 907 | 
            +
                    Args:
         | 
| 908 | 
            +
                        hidden_states (torch.Tensor): Input tensor of shape [batch_size, seq_len, hidden_size]
         | 
| 909 | 
            +
             | 
| 910 | 
            +
                    Returns:
         | 
| 911 | 
            +
                        Logits tensor of shape [batch_size, seq_len, vocab_size]
         | 
| 912 | 
            +
                    """
         | 
| 913 | 
            +
                    return self.calc_lm_head_logits(
         | 
| 914 | 
            +
                        self.config, hidden_states, self.weight, self.bias
         | 
| 915 | 
            +
                    )
         | 
| 916 | 
            +
             | 
| 917 | 
            +
                def calc_lm_head_logits(self, config, hidden_states, weight, bias):
         | 
| 918 | 
            +
                    """
         | 
| 919 | 
            +
                    Calculate language model head logits.
         | 
| 920 | 
            +
             | 
| 921 | 
            +
                    This is the core function that computes the final output logits for a language model.
         | 
| 922 | 
            +
             | 
| 923 | 
            +
                    Args:
         | 
| 924 | 
            +
                        config: Model configuration.
         | 
| 925 | 
            +
                        hidden_states (Tensor): Hidden states from the transformer layers
         | 
| 926 | 
            +
                        weight (Tensor): Weight matrix for the language model head
         | 
| 927 | 
            +
                        bias (Tensor): Bias vector for the language model head
         | 
| 928 | 
            +
             | 
| 929 | 
            +
                    Returns:
         | 
| 930 | 
            +
                        Tensor: The computed logits for language modeling.
         | 
| 931 | 
            +
                    """
         | 
| 932 | 
            +
             | 
| 933 | 
            +
                    if config.tie_word_embeddings:
         | 
| 934 | 
            +
                        logits = torch.matmul(hidden_states, weight.T)
         | 
| 935 | 
            +
                    else:
         | 
| 936 | 
            +
                        logits = torch.matmul(hidden_states, weight)
         | 
| 937 | 
            +
             | 
| 938 | 
            +
                    if bias is not None:
         | 
| 939 | 
            +
                        logits = logits + bias
         | 
| 940 | 
            +
             | 
| 941 | 
            +
                    return logits
         | 
| 942 | 
            +
             | 
| 943 | 
            +
             | 
| 944 | 
            +
            class Ernie4_5_ForCausalLM(Ernie4_5_PretrainedModel, GenerationMixin):
         | 
| 945 | 
            +
                """ERNIE model for causal language modeling."""
         | 
| 946 | 
            +
             | 
| 947 | 
            +
                _tied_weights_keys = ["lm_head.weight"]
         | 
| 948 | 
            +
                _tp_plan = {"lm_head": "colwise_rep"}
         | 
| 949 | 
            +
                _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
         | 
| 950 | 
            +
             | 
| 951 | 
            +
                def __init__(self, config):
         | 
| 952 | 
            +
                    """
         | 
| 953 | 
            +
                    Initializes the ERNIE model for causal language modeling.
         | 
| 954 | 
            +
             | 
| 955 | 
            +
                    Args:
         | 
| 956 | 
            +
                        config: Model configuration.
         | 
| 957 | 
            +
                    """
         | 
| 958 | 
            +
                    super().__init__(config)
         | 
| 959 | 
            +
             | 
| 960 | 
            +
                    self.config = config
         | 
| 961 | 
            +
                    self.model = Ernie4_5_Model(config)
         | 
| 962 | 
            +
                    self.lm_head = Ernie4_5_LMHead(config)
         | 
| 963 | 
            +
             | 
| 964 | 
            +
                    # Initialize weights and apply final processing
         | 
| 965 | 
            +
                    self.post_init()
         | 
| 966 | 
            +
             | 
| 967 | 
            +
                @torch.no_grad()
         | 
| 968 | 
            +
                def set_state_dict(self, state_dict, *args, **kwargs):
         | 
| 969 | 
            +
                    """
         | 
| 970 | 
            +
                    Loads the model state dictionary.
         | 
| 971 | 
            +
                    """
         | 
| 972 | 
            +
                    ret = super().set_state_dict(state_dict)
         | 
| 973 | 
            +
                    return ret
         | 
| 974 | 
            +
             | 
| 975 | 
            +
                def get_input_embeddings(self):
         | 
| 976 | 
            +
                    """Returns the input embeddings layer."""
         | 
| 977 | 
            +
                    return self.model.embed_tokens
         | 
| 978 | 
            +
             | 
| 979 | 
            +
                def set_input_embeddings(self, value):
         | 
| 980 | 
            +
                    """Sets the input embeddings layer."""
         | 
| 981 | 
            +
                    self.model.embed_tokens = value
         | 
| 982 | 
            +
             | 
| 983 | 
            +
                def get_output_embeddings(self):
         | 
| 984 | 
            +
                    """Returns the output embeddings (LM head)."""
         | 
| 985 | 
            +
                    return self.lm_head
         | 
| 986 | 
            +
             | 
| 987 | 
            +
                def set_output_embeddings(self, new_embeddings):
         | 
| 988 | 
            +
                    """Sets the output embeddings layer."""
         | 
| 989 | 
            +
                    self.lm_head = new_embeddings
         | 
| 990 | 
            +
             | 
| 991 | 
            +
                def set_decoder(self, decoder):
         | 
| 992 | 
            +
                    """Sets the ERNIE decoder model."""
         | 
| 993 | 
            +
                    self.model = decoder
         | 
| 994 | 
            +
             | 
| 995 | 
            +
                def get_decoder(self):
         | 
| 996 | 
            +
                    """Gets the ERNIE decoder model."""
         | 
| 997 | 
            +
                    return self.model
         | 
| 998 | 
            +
             | 
| 999 | 
            +
                def forward(
         | 
| 1000 | 
            +
                    self,
         | 
| 1001 | 
            +
                    input_ids,
         | 
| 1002 | 
            +
                    position_ids=None,
         | 
| 1003 | 
            +
                    attention_mask=None,
         | 
| 1004 | 
            +
                    attn_mask_start_row_indices=None,
         | 
| 1005 | 
            +
                    token_type_ids=None,
         | 
| 1006 | 
            +
                    inputs_embeds=None,
         | 
| 1007 | 
            +
                    labels=None,
         | 
| 1008 | 
            +
                    use_cache=False,
         | 
| 1009 | 
            +
                    past_key_values=None,
         | 
| 1010 | 
            +
                    output_attentions=None,
         | 
| 1011 | 
            +
                    output_hidden_states=None,
         | 
| 1012 | 
            +
                    **kwargs,
         | 
| 1013 | 
            +
                ):
         | 
| 1014 | 
            +
                    """
         | 
| 1015 | 
            +
                    Forward pass for causal language modeling.
         | 
| 1016 | 
            +
             | 
| 1017 | 
            +
                    Args:
         | 
| 1018 | 
            +
                        input_ids (torch.Tensor): Input token IDs.
         | 
| 1019 | 
            +
                        position_ids (torch.Tensor): Position IDs.
         | 
| 1020 | 
            +
                        attention_mask (torch.Tensor): Attention mask.
         | 
| 1021 | 
            +
                        attn_mask_start_row_indices (torch.Tensor): Attention mask start indices.
         | 
| 1022 | 
            +
                        inputs_embeds (torch.Tensor): Optional embedded inputs.
         | 
| 1023 | 
            +
                        labels (torch.Tensor): Target labels.
         | 
| 1024 | 
            +
                        use_cache (bool): Whether to use cached hidden states.
         | 
| 1025 | 
            +
                        past_key_values (dict): Pre-computed hidden states.
         | 
| 1026 | 
            +
                        output_attentions (bool): Whether to output attentions.
         | 
| 1027 | 
            +
                        output_hidden_states (bool): Whether to output hidden states.
         | 
| 1028 | 
            +
             | 
| 1029 | 
            +
                    Returns:
         | 
| 1030 | 
            +
                        CausalLMOutputWithPast: Model outputs.
         | 
| 1031 | 
            +
                    """
         | 
| 1032 | 
            +
             | 
| 1033 | 
            +
                    if past_key_values is not None:
         | 
| 1034 | 
            +
                        input_ids = input_ids[:, -1:]
         | 
| 1035 | 
            +
             | 
| 1036 | 
            +
                    outputs = self.model(
         | 
| 1037 | 
            +
                        input_ids,
         | 
| 1038 | 
            +
                        position_ids=position_ids,
         | 
| 1039 | 
            +
                        attention_mask=attention_mask,
         | 
| 1040 | 
            +
                        token_type_ids=token_type_ids,
         | 
| 1041 | 
            +
                        attn_mask_start_row_indices=attn_mask_start_row_indices,
         | 
| 1042 | 
            +
                        inputs_embeds=inputs_embeds,
         | 
| 1043 | 
            +
                        use_cache=use_cache,
         | 
| 1044 | 
            +
                        past_key_values=past_key_values,
         | 
| 1045 | 
            +
                        output_attentions=output_attentions,
         | 
| 1046 | 
            +
                        output_hidden_states=output_hidden_states,
         | 
| 1047 | 
            +
                        return_dict=True,
         | 
| 1048 | 
            +
                    )
         | 
| 1049 | 
            +
             | 
| 1050 | 
            +
                    hidden_states = outputs.last_hidden_state
         | 
| 1051 | 
            +
                    logits = self.lm_head(hidden_states)
         | 
| 1052 | 
            +
             | 
| 1053 | 
            +
                    loss = None
         | 
| 1054 | 
            +
                    if labels is not None:
         | 
| 1055 | 
            +
                        loss = self.loss_function(
         | 
| 1056 | 
            +
                            logits=logits,
         | 
| 1057 | 
            +
                            labels=labels,
         | 
| 1058 | 
            +
                            vocab_size=self.config.vocab_size,
         | 
| 1059 | 
            +
                            **kwargs,
         | 
| 1060 | 
            +
                        )
         | 
| 1061 | 
            +
             | 
| 1062 | 
            +
                    return CausalLMOutputWithPast(
         | 
| 1063 | 
            +
                        loss=loss,
         | 
| 1064 | 
            +
                        logits=logits,
         | 
| 1065 | 
            +
                        past_key_values=outputs.past_key_values,
         | 
| 1066 | 
            +
                        hidden_states=outputs.hidden_states,
         | 
| 1067 | 
            +
                        attentions=outputs.attentions,
         | 
| 1068 | 
            +
                    )
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,1062 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                "<|IMAGE_PLACEHOLDER|>",
         | 
| 4 | 
            +
                "<|AUDIO_PLACEHOLDER|>",
         | 
| 5 | 
            +
                "<|LOC_0|>",
         | 
| 6 | 
            +
                "<|LOC_1|>",
         | 
| 7 | 
            +
                "<|LOC_2|>",
         | 
| 8 | 
            +
                "<|LOC_3|>",
         | 
| 9 | 
            +
                "<|LOC_4|>",
         | 
| 10 | 
            +
                "<|LOC_5|>",
         | 
| 11 | 
            +
                "<|LOC_6|>",
         | 
| 12 | 
            +
                "<|LOC_7|>",
         | 
| 13 | 
            +
                "<|LOC_8|>",
         | 
| 14 | 
            +
                "<|LOC_9|>",
         | 
| 15 | 
            +
                "<|LOC_10|>",
         | 
| 16 | 
            +
                "<|LOC_11|>",
         | 
| 17 | 
            +
                "<|LOC_12|>",
         | 
| 18 | 
            +
                "<|LOC_13|>",
         | 
| 19 | 
            +
                "<|LOC_14|>",
         | 
| 20 | 
            +
                "<|LOC_15|>",
         | 
| 21 | 
            +
                "<|LOC_16|>",
         | 
| 22 | 
            +
                "<|LOC_17|>",
         | 
| 23 | 
            +
                "<|LOC_18|>",
         | 
| 24 | 
            +
                "<|LOC_19|>",
         | 
| 25 | 
            +
                "<|LOC_20|>",
         | 
| 26 | 
            +
                "<|LOC_21|>",
         | 
| 27 | 
            +
                "<|LOC_22|>",
         | 
| 28 | 
            +
                "<|LOC_23|>",
         | 
| 29 | 
            +
                "<|LOC_24|>",
         | 
| 30 | 
            +
                "<|LOC_25|>",
         | 
| 31 | 
            +
                "<|LOC_26|>",
         | 
| 32 | 
            +
                "<|LOC_27|>",
         | 
| 33 | 
            +
                "<|LOC_28|>",
         | 
| 34 | 
            +
                "<|LOC_29|>",
         | 
| 35 | 
            +
                "<|LOC_30|>",
         | 
| 36 | 
            +
                "<|LOC_31|>",
         | 
| 37 | 
            +
                "<|LOC_32|>",
         | 
| 38 | 
            +
                "<|LOC_33|>",
         | 
| 39 | 
            +
                "<|LOC_34|>",
         | 
| 40 | 
            +
                "<|LOC_35|>",
         | 
| 41 | 
            +
                "<|LOC_36|>",
         | 
| 42 | 
            +
                "<|LOC_37|>",
         | 
| 43 | 
            +
                "<|LOC_38|>",
         | 
| 44 | 
            +
                "<|LOC_39|>",
         | 
| 45 | 
            +
                "<|LOC_40|>",
         | 
| 46 | 
            +
                "<|LOC_41|>",
         | 
| 47 | 
            +
                "<|LOC_42|>",
         | 
| 48 | 
            +
                "<|LOC_43|>",
         | 
| 49 | 
            +
                "<|LOC_44|>",
         | 
| 50 | 
            +
                "<|LOC_45|>",
         | 
| 51 | 
            +
                "<|LOC_46|>",
         | 
| 52 | 
            +
                "<|LOC_47|>",
         | 
| 53 | 
            +
                "<|LOC_48|>",
         | 
| 54 | 
            +
                "<|LOC_49|>",
         | 
| 55 | 
            +
                "<|LOC_50|>",
         | 
| 56 | 
            +
                "<|LOC_51|>",
         | 
| 57 | 
            +
                "<|LOC_52|>",
         | 
| 58 | 
            +
                "<|LOC_53|>",
         | 
| 59 | 
            +
                "<|LOC_54|>",
         | 
| 60 | 
            +
                "<|LOC_55|>",
         | 
| 61 | 
            +
                "<|LOC_56|>",
         | 
| 62 | 
            +
                "<|LOC_57|>",
         | 
| 63 | 
            +
                "<|LOC_58|>",
         | 
| 64 | 
            +
                "<|LOC_59|>",
         | 
| 65 | 
            +
                "<|LOC_60|>",
         | 
| 66 | 
            +
                "<|LOC_61|>",
         | 
| 67 | 
            +
                "<|LOC_62|>",
         | 
| 68 | 
            +
                "<|LOC_63|>",
         | 
| 69 | 
            +
                "<|LOC_64|>",
         | 
| 70 | 
            +
                "<|LOC_65|>",
         | 
| 71 | 
            +
                "<|LOC_66|>",
         | 
| 72 | 
            +
                "<|LOC_67|>",
         | 
| 73 | 
            +
                "<|LOC_68|>",
         | 
| 74 | 
            +
                "<|LOC_69|>",
         | 
| 75 | 
            +
                "<|LOC_70|>",
         | 
| 76 | 
            +
                "<|LOC_71|>",
         | 
| 77 | 
            +
                "<|LOC_72|>",
         | 
| 78 | 
            +
                "<|LOC_73|>",
         | 
| 79 | 
            +
                "<|LOC_74|>",
         | 
| 80 | 
            +
                "<|LOC_75|>",
         | 
| 81 | 
            +
                "<|LOC_76|>",
         | 
| 82 | 
            +
                "<|LOC_77|>",
         | 
| 83 | 
            +
                "<|LOC_78|>",
         | 
| 84 | 
            +
                "<|LOC_79|>",
         | 
| 85 | 
            +
                "<|LOC_80|>",
         | 
| 86 | 
            +
                "<|LOC_81|>",
         | 
| 87 | 
            +
                "<|LOC_82|>",
         | 
| 88 | 
            +
                "<|LOC_83|>",
         | 
| 89 | 
            +
                "<|LOC_84|>",
         | 
| 90 | 
            +
                "<|LOC_85|>",
         | 
| 91 | 
            +
                "<|LOC_86|>",
         | 
| 92 | 
            +
                "<|LOC_87|>",
         | 
| 93 | 
            +
                "<|LOC_88|>",
         | 
| 94 | 
            +
                "<|LOC_89|>",
         | 
| 95 | 
            +
                "<|LOC_90|>",
         | 
| 96 | 
            +
                "<|LOC_91|>",
         | 
| 97 | 
            +
                "<|LOC_92|>",
         | 
| 98 | 
            +
                "<|LOC_93|>",
         | 
| 99 | 
            +
                "<|LOC_94|>",
         | 
| 100 | 
            +
                "<|LOC_95|>",
         | 
| 101 | 
            +
                "<|LOC_96|>",
         | 
| 102 | 
            +
                "<|LOC_97|>",
         | 
| 103 | 
            +
                "<|LOC_98|>",
         | 
| 104 | 
            +
                "<|LOC_99|>",
         | 
| 105 | 
            +
                "<|LOC_100|>",
         | 
| 106 | 
            +
                "<|LOC_101|>",
         | 
| 107 | 
            +
                "<|LOC_102|>",
         | 
| 108 | 
            +
                "<|LOC_103|>",
         | 
| 109 | 
            +
                "<|LOC_104|>",
         | 
| 110 | 
            +
                "<|LOC_105|>",
         | 
| 111 | 
            +
                "<|LOC_106|>",
         | 
| 112 | 
            +
                "<|LOC_107|>",
         | 
| 113 | 
            +
                "<|LOC_108|>",
         | 
| 114 | 
            +
                "<|LOC_109|>",
         | 
| 115 | 
            +
                "<|LOC_110|>",
         | 
| 116 | 
            +
                "<|LOC_111|>",
         | 
| 117 | 
            +
                "<|LOC_112|>",
         | 
| 118 | 
            +
                "<|LOC_113|>",
         | 
| 119 | 
            +
                "<|LOC_114|>",
         | 
| 120 | 
            +
                "<|LOC_115|>",
         | 
| 121 | 
            +
                "<|LOC_116|>",
         | 
| 122 | 
            +
                "<|LOC_117|>",
         | 
| 123 | 
            +
                "<|LOC_118|>",
         | 
| 124 | 
            +
                "<|LOC_119|>",
         | 
| 125 | 
            +
                "<|LOC_120|>",
         | 
| 126 | 
            +
                "<|LOC_121|>",
         | 
| 127 | 
            +
                "<|LOC_122|>",
         | 
| 128 | 
            +
                "<|LOC_123|>",
         | 
| 129 | 
            +
                "<|LOC_124|>",
         | 
| 130 | 
            +
                "<|LOC_125|>",
         | 
| 131 | 
            +
                "<|LOC_126|>",
         | 
| 132 | 
            +
                "<|LOC_127|>",
         | 
| 133 | 
            +
                "<|LOC_128|>",
         | 
| 134 | 
            +
                "<|LOC_129|>",
         | 
| 135 | 
            +
                "<|LOC_130|>",
         | 
| 136 | 
            +
                "<|LOC_131|>",
         | 
| 137 | 
            +
                "<|LOC_132|>",
         | 
| 138 | 
            +
                "<|LOC_133|>",
         | 
| 139 | 
            +
                "<|LOC_134|>",
         | 
| 140 | 
            +
                "<|LOC_135|>",
         | 
| 141 | 
            +
                "<|LOC_136|>",
         | 
| 142 | 
            +
                "<|LOC_137|>",
         | 
| 143 | 
            +
                "<|LOC_138|>",
         | 
| 144 | 
            +
                "<|LOC_139|>",
         | 
| 145 | 
            +
                "<|LOC_140|>",
         | 
| 146 | 
            +
                "<|LOC_141|>",
         | 
| 147 | 
            +
                "<|LOC_142|>",
         | 
| 148 | 
            +
                "<|LOC_143|>",
         | 
| 149 | 
            +
                "<|LOC_144|>",
         | 
| 150 | 
            +
                "<|LOC_145|>",
         | 
| 151 | 
            +
                "<|LOC_146|>",
         | 
| 152 | 
            +
                "<|LOC_147|>",
         | 
| 153 | 
            +
                "<|LOC_148|>",
         | 
| 154 | 
            +
                "<|LOC_149|>",
         | 
| 155 | 
            +
                "<|LOC_150|>",
         | 
| 156 | 
            +
                "<|LOC_151|>",
         | 
| 157 | 
            +
                "<|LOC_152|>",
         | 
| 158 | 
            +
                "<|LOC_153|>",
         | 
| 159 | 
            +
                "<|LOC_154|>",
         | 
| 160 | 
            +
                "<|LOC_155|>",
         | 
| 161 | 
            +
                "<|LOC_156|>",
         | 
| 162 | 
            +
                "<|LOC_157|>",
         | 
| 163 | 
            +
                "<|LOC_158|>",
         | 
| 164 | 
            +
                "<|LOC_159|>",
         | 
| 165 | 
            +
                "<|LOC_160|>",
         | 
| 166 | 
            +
                "<|LOC_161|>",
         | 
| 167 | 
            +
                "<|LOC_162|>",
         | 
| 168 | 
            +
                "<|LOC_163|>",
         | 
| 169 | 
            +
                "<|LOC_164|>",
         | 
| 170 | 
            +
                "<|LOC_165|>",
         | 
| 171 | 
            +
                "<|LOC_166|>",
         | 
| 172 | 
            +
                "<|LOC_167|>",
         | 
| 173 | 
            +
                "<|LOC_168|>",
         | 
| 174 | 
            +
                "<|LOC_169|>",
         | 
| 175 | 
            +
                "<|LOC_170|>",
         | 
| 176 | 
            +
                "<|LOC_171|>",
         | 
| 177 | 
            +
                "<|LOC_172|>",
         | 
| 178 | 
            +
                "<|LOC_173|>",
         | 
| 179 | 
            +
                "<|LOC_174|>",
         | 
| 180 | 
            +
                "<|LOC_175|>",
         | 
| 181 | 
            +
                "<|LOC_176|>",
         | 
| 182 | 
            +
                "<|LOC_177|>",
         | 
| 183 | 
            +
                "<|LOC_178|>",
         | 
| 184 | 
            +
                "<|LOC_179|>",
         | 
| 185 | 
            +
                "<|LOC_180|>",
         | 
| 186 | 
            +
                "<|LOC_181|>",
         | 
| 187 | 
            +
                "<|LOC_182|>",
         | 
| 188 | 
            +
                "<|LOC_183|>",
         | 
| 189 | 
            +
                "<|LOC_184|>",
         | 
| 190 | 
            +
                "<|LOC_185|>",
         | 
| 191 | 
            +
                "<|LOC_186|>",
         | 
| 192 | 
            +
                "<|LOC_187|>",
         | 
| 193 | 
            +
                "<|LOC_188|>",
         | 
| 194 | 
            +
                "<|LOC_189|>",
         | 
| 195 | 
            +
                "<|LOC_190|>",
         | 
| 196 | 
            +
                "<|LOC_191|>",
         | 
| 197 | 
            +
                "<|LOC_192|>",
         | 
| 198 | 
            +
                "<|LOC_193|>",
         | 
| 199 | 
            +
                "<|LOC_194|>",
         | 
| 200 | 
            +
                "<|LOC_195|>",
         | 
| 201 | 
            +
                "<|LOC_196|>",
         | 
| 202 | 
            +
                "<|LOC_197|>",
         | 
| 203 | 
            +
                "<|LOC_198|>",
         | 
| 204 | 
            +
                "<|LOC_199|>",
         | 
| 205 | 
            +
                "<|LOC_200|>",
         | 
| 206 | 
            +
                "<|LOC_201|>",
         | 
| 207 | 
            +
                "<|LOC_202|>",
         | 
| 208 | 
            +
                "<|LOC_203|>",
         | 
| 209 | 
            +
                "<|LOC_204|>",
         | 
| 210 | 
            +
                "<|LOC_205|>",
         | 
| 211 | 
            +
                "<|LOC_206|>",
         | 
| 212 | 
            +
                "<|LOC_207|>",
         | 
| 213 | 
            +
                "<|LOC_208|>",
         | 
| 214 | 
            +
                "<|LOC_209|>",
         | 
| 215 | 
            +
                "<|LOC_210|>",
         | 
| 216 | 
            +
                "<|LOC_211|>",
         | 
| 217 | 
            +
                "<|LOC_212|>",
         | 
| 218 | 
            +
                "<|LOC_213|>",
         | 
| 219 | 
            +
                "<|LOC_214|>",
         | 
| 220 | 
            +
                "<|LOC_215|>",
         | 
| 221 | 
            +
                "<|LOC_216|>",
         | 
| 222 | 
            +
                "<|LOC_217|>",
         | 
| 223 | 
            +
                "<|LOC_218|>",
         | 
| 224 | 
            +
                "<|LOC_219|>",
         | 
| 225 | 
            +
                "<|LOC_220|>",
         | 
| 226 | 
            +
                "<|LOC_221|>",
         | 
| 227 | 
            +
                "<|LOC_222|>",
         | 
| 228 | 
            +
                "<|LOC_223|>",
         | 
| 229 | 
            +
                "<|LOC_224|>",
         | 
| 230 | 
            +
                "<|LOC_225|>",
         | 
| 231 | 
            +
                "<|LOC_226|>",
         | 
| 232 | 
            +
                "<|LOC_227|>",
         | 
| 233 | 
            +
                "<|LOC_228|>",
         | 
| 234 | 
            +
                "<|LOC_229|>",
         | 
| 235 | 
            +
                "<|LOC_230|>",
         | 
| 236 | 
            +
                "<|LOC_231|>",
         | 
| 237 | 
            +
                "<|LOC_232|>",
         | 
| 238 | 
            +
                "<|LOC_233|>",
         | 
| 239 | 
            +
                "<|LOC_234|>",
         | 
| 240 | 
            +
                "<|LOC_235|>",
         | 
| 241 | 
            +
                "<|LOC_236|>",
         | 
| 242 | 
            +
                "<|LOC_237|>",
         | 
| 243 | 
            +
                "<|LOC_238|>",
         | 
| 244 | 
            +
                "<|LOC_239|>",
         | 
| 245 | 
            +
                "<|LOC_240|>",
         | 
| 246 | 
            +
                "<|LOC_241|>",
         | 
| 247 | 
            +
                "<|LOC_242|>",
         | 
| 248 | 
            +
                "<|LOC_243|>",
         | 
| 249 | 
            +
                "<|LOC_244|>",
         | 
| 250 | 
            +
                "<|LOC_245|>",
         | 
| 251 | 
            +
                "<|LOC_246|>",
         | 
| 252 | 
            +
                "<|LOC_247|>",
         | 
| 253 | 
            +
                "<|LOC_248|>",
         | 
| 254 | 
            +
                "<|LOC_249|>",
         | 
| 255 | 
            +
                "<|LOC_250|>",
         | 
| 256 | 
            +
                "<|LOC_251|>",
         | 
| 257 | 
            +
                "<|LOC_252|>",
         | 
| 258 | 
            +
                "<|LOC_253|>",
         | 
| 259 | 
            +
                "<|LOC_254|>",
         | 
| 260 | 
            +
                "<|LOC_255|>",
         | 
| 261 | 
            +
                "<|LOC_256|>",
         | 
| 262 | 
            +
                "<|LOC_257|>",
         | 
| 263 | 
            +
                "<|LOC_258|>",
         | 
| 264 | 
            +
                "<|LOC_259|>",
         | 
| 265 | 
            +
                "<|LOC_260|>",
         | 
| 266 | 
            +
                "<|LOC_261|>",
         | 
| 267 | 
            +
                "<|LOC_262|>",
         | 
| 268 | 
            +
                "<|LOC_263|>",
         | 
| 269 | 
            +
                "<|LOC_264|>",
         | 
| 270 | 
            +
                "<|LOC_265|>",
         | 
| 271 | 
            +
                "<|LOC_266|>",
         | 
| 272 | 
            +
                "<|LOC_267|>",
         | 
| 273 | 
            +
                "<|LOC_268|>",
         | 
| 274 | 
            +
                "<|LOC_269|>",
         | 
| 275 | 
            +
                "<|LOC_270|>",
         | 
| 276 | 
            +
                "<|LOC_271|>",
         | 
| 277 | 
            +
                "<|LOC_272|>",
         | 
| 278 | 
            +
                "<|LOC_273|>",
         | 
| 279 | 
            +
                "<|LOC_274|>",
         | 
| 280 | 
            +
                "<|LOC_275|>",
         | 
| 281 | 
            +
                "<|LOC_276|>",
         | 
| 282 | 
            +
                "<|LOC_277|>",
         | 
| 283 | 
            +
                "<|LOC_278|>",
         | 
| 284 | 
            +
                "<|LOC_279|>",
         | 
| 285 | 
            +
                "<|LOC_280|>",
         | 
| 286 | 
            +
                "<|LOC_281|>",
         | 
| 287 | 
            +
                "<|LOC_282|>",
         | 
| 288 | 
            +
                "<|LOC_283|>",
         | 
| 289 | 
            +
                "<|LOC_284|>",
         | 
| 290 | 
            +
                "<|LOC_285|>",
         | 
| 291 | 
            +
                "<|LOC_286|>",
         | 
| 292 | 
            +
                "<|LOC_287|>",
         | 
| 293 | 
            +
                "<|LOC_288|>",
         | 
| 294 | 
            +
                "<|LOC_289|>",
         | 
| 295 | 
            +
                "<|LOC_290|>",
         | 
| 296 | 
            +
                "<|LOC_291|>",
         | 
| 297 | 
            +
                "<|LOC_292|>",
         | 
| 298 | 
            +
                "<|LOC_293|>",
         | 
| 299 | 
            +
                "<|LOC_294|>",
         | 
| 300 | 
            +
                "<|LOC_295|>",
         | 
| 301 | 
            +
                "<|LOC_296|>",
         | 
| 302 | 
            +
                "<|LOC_297|>",
         | 
| 303 | 
            +
                "<|LOC_298|>",
         | 
| 304 | 
            +
                "<|LOC_299|>",
         | 
| 305 | 
            +
                "<|LOC_300|>",
         | 
| 306 | 
            +
                "<|LOC_301|>",
         | 
| 307 | 
            +
                "<|LOC_302|>",
         | 
| 308 | 
            +
                "<|LOC_303|>",
         | 
| 309 | 
            +
                "<|LOC_304|>",
         | 
| 310 | 
            +
                "<|LOC_305|>",
         | 
| 311 | 
            +
                "<|LOC_306|>",
         | 
| 312 | 
            +
                "<|LOC_307|>",
         | 
| 313 | 
            +
                "<|LOC_308|>",
         | 
| 314 | 
            +
                "<|LOC_309|>",
         | 
| 315 | 
            +
                "<|LOC_310|>",
         | 
| 316 | 
            +
                "<|LOC_311|>",
         | 
| 317 | 
            +
                "<|LOC_312|>",
         | 
| 318 | 
            +
                "<|LOC_313|>",
         | 
| 319 | 
            +
                "<|LOC_314|>",
         | 
| 320 | 
            +
                "<|LOC_315|>",
         | 
| 321 | 
            +
                "<|LOC_316|>",
         | 
| 322 | 
            +
                "<|LOC_317|>",
         | 
| 323 | 
            +
                "<|LOC_318|>",
         | 
| 324 | 
            +
                "<|LOC_319|>",
         | 
| 325 | 
            +
                "<|LOC_320|>",
         | 
| 326 | 
            +
                "<|LOC_321|>",
         | 
| 327 | 
            +
                "<|LOC_322|>",
         | 
| 328 | 
            +
                "<|LOC_323|>",
         | 
| 329 | 
            +
                "<|LOC_324|>",
         | 
| 330 | 
            +
                "<|LOC_325|>",
         | 
| 331 | 
            +
                "<|LOC_326|>",
         | 
| 332 | 
            +
                "<|LOC_327|>",
         | 
| 333 | 
            +
                "<|LOC_328|>",
         | 
| 334 | 
            +
                "<|LOC_329|>",
         | 
| 335 | 
            +
                "<|LOC_330|>",
         | 
| 336 | 
            +
                "<|LOC_331|>",
         | 
| 337 | 
            +
                "<|LOC_332|>",
         | 
| 338 | 
            +
                "<|LOC_333|>",
         | 
| 339 | 
            +
                "<|LOC_334|>",
         | 
| 340 | 
            +
                "<|LOC_335|>",
         | 
| 341 | 
            +
                "<|LOC_336|>",
         | 
| 342 | 
            +
                "<|LOC_337|>",
         | 
| 343 | 
            +
                "<|LOC_338|>",
         | 
| 344 | 
            +
                "<|LOC_339|>",
         | 
| 345 | 
            +
                "<|LOC_340|>",
         | 
| 346 | 
            +
                "<|LOC_341|>",
         | 
| 347 | 
            +
                "<|LOC_342|>",
         | 
| 348 | 
            +
                "<|LOC_343|>",
         | 
| 349 | 
            +
                "<|LOC_344|>",
         | 
| 350 | 
            +
                "<|LOC_345|>",
         | 
| 351 | 
            +
                "<|LOC_346|>",
         | 
| 352 | 
            +
                "<|LOC_347|>",
         | 
| 353 | 
            +
                "<|LOC_348|>",
         | 
| 354 | 
            +
                "<|LOC_349|>",
         | 
| 355 | 
            +
                "<|LOC_350|>",
         | 
| 356 | 
            +
                "<|LOC_351|>",
         | 
| 357 | 
            +
                "<|LOC_352|>",
         | 
| 358 | 
            +
                "<|LOC_353|>",
         | 
| 359 | 
            +
                "<|LOC_354|>",
         | 
| 360 | 
            +
                "<|LOC_355|>",
         | 
| 361 | 
            +
                "<|LOC_356|>",
         | 
| 362 | 
            +
                "<|LOC_357|>",
         | 
| 363 | 
            +
                "<|LOC_358|>",
         | 
| 364 | 
            +
                "<|LOC_359|>",
         | 
| 365 | 
            +
                "<|LOC_360|>",
         | 
| 366 | 
            +
                "<|LOC_361|>",
         | 
| 367 | 
            +
                "<|LOC_362|>",
         | 
| 368 | 
            +
                "<|LOC_363|>",
         | 
| 369 | 
            +
                "<|LOC_364|>",
         | 
| 370 | 
            +
                "<|LOC_365|>",
         | 
| 371 | 
            +
                "<|LOC_366|>",
         | 
| 372 | 
            +
                "<|LOC_367|>",
         | 
| 373 | 
            +
                "<|LOC_368|>",
         | 
| 374 | 
            +
                "<|LOC_369|>",
         | 
| 375 | 
            +
                "<|LOC_370|>",
         | 
| 376 | 
            +
                "<|LOC_371|>",
         | 
| 377 | 
            +
                "<|LOC_372|>",
         | 
| 378 | 
            +
                "<|LOC_373|>",
         | 
| 379 | 
            +
                "<|LOC_374|>",
         | 
| 380 | 
            +
                "<|LOC_375|>",
         | 
| 381 | 
            +
                "<|LOC_376|>",
         | 
| 382 | 
            +
                "<|LOC_377|>",
         | 
| 383 | 
            +
                "<|LOC_378|>",
         | 
| 384 | 
            +
                "<|LOC_379|>",
         | 
| 385 | 
            +
                "<|LOC_380|>",
         | 
| 386 | 
            +
                "<|LOC_381|>",
         | 
| 387 | 
            +
                "<|LOC_382|>",
         | 
| 388 | 
            +
                "<|LOC_383|>",
         | 
| 389 | 
            +
                "<|LOC_384|>",
         | 
| 390 | 
            +
                "<|LOC_385|>",
         | 
| 391 | 
            +
                "<|LOC_386|>",
         | 
| 392 | 
            +
                "<|LOC_387|>",
         | 
| 393 | 
            +
                "<|LOC_388|>",
         | 
| 394 | 
            +
                "<|LOC_389|>",
         | 
| 395 | 
            +
                "<|LOC_390|>",
         | 
| 396 | 
            +
                "<|LOC_391|>",
         | 
| 397 | 
            +
                "<|LOC_392|>",
         | 
| 398 | 
            +
                "<|LOC_393|>",
         | 
| 399 | 
            +
                "<|LOC_394|>",
         | 
| 400 | 
            +
                "<|LOC_395|>",
         | 
| 401 | 
            +
                "<|LOC_396|>",
         | 
| 402 | 
            +
                "<|LOC_397|>",
         | 
| 403 | 
            +
                "<|LOC_398|>",
         | 
| 404 | 
            +
                "<|LOC_399|>",
         | 
| 405 | 
            +
                "<|LOC_400|>",
         | 
| 406 | 
            +
                "<|LOC_401|>",
         | 
| 407 | 
            +
                "<|LOC_402|>",
         | 
| 408 | 
            +
                "<|LOC_403|>",
         | 
| 409 | 
            +
                "<|LOC_404|>",
         | 
| 410 | 
            +
                "<|LOC_405|>",
         | 
| 411 | 
            +
                "<|LOC_406|>",
         | 
| 412 | 
            +
                "<|LOC_407|>",
         | 
| 413 | 
            +
                "<|LOC_408|>",
         | 
| 414 | 
            +
                "<|LOC_409|>",
         | 
| 415 | 
            +
                "<|LOC_410|>",
         | 
| 416 | 
            +
                "<|LOC_411|>",
         | 
| 417 | 
            +
                "<|LOC_412|>",
         | 
| 418 | 
            +
                "<|LOC_413|>",
         | 
| 419 | 
            +
                "<|LOC_414|>",
         | 
| 420 | 
            +
                "<|LOC_415|>",
         | 
| 421 | 
            +
                "<|LOC_416|>",
         | 
| 422 | 
            +
                "<|LOC_417|>",
         | 
| 423 | 
            +
                "<|LOC_418|>",
         | 
| 424 | 
            +
                "<|LOC_419|>",
         | 
| 425 | 
            +
                "<|LOC_420|>",
         | 
| 426 | 
            +
                "<|LOC_421|>",
         | 
| 427 | 
            +
                "<|LOC_422|>",
         | 
| 428 | 
            +
                "<|LOC_423|>",
         | 
| 429 | 
            +
                "<|LOC_424|>",
         | 
| 430 | 
            +
                "<|LOC_425|>",
         | 
| 431 | 
            +
                "<|LOC_426|>",
         | 
| 432 | 
            +
                "<|LOC_427|>",
         | 
| 433 | 
            +
                "<|LOC_428|>",
         | 
| 434 | 
            +
                "<|LOC_429|>",
         | 
| 435 | 
            +
                "<|LOC_430|>",
         | 
| 436 | 
            +
                "<|LOC_431|>",
         | 
| 437 | 
            +
                "<|LOC_432|>",
         | 
| 438 | 
            +
                "<|LOC_433|>",
         | 
| 439 | 
            +
                "<|LOC_434|>",
         | 
| 440 | 
            +
                "<|LOC_435|>",
         | 
| 441 | 
            +
                "<|LOC_436|>",
         | 
| 442 | 
            +
                "<|LOC_437|>",
         | 
| 443 | 
            +
                "<|LOC_438|>",
         | 
| 444 | 
            +
                "<|LOC_439|>",
         | 
| 445 | 
            +
                "<|LOC_440|>",
         | 
| 446 | 
            +
                "<|LOC_441|>",
         | 
| 447 | 
            +
                "<|LOC_442|>",
         | 
| 448 | 
            +
                "<|LOC_443|>",
         | 
| 449 | 
            +
                "<|LOC_444|>",
         | 
| 450 | 
            +
                "<|LOC_445|>",
         | 
| 451 | 
            +
                "<|LOC_446|>",
         | 
| 452 | 
            +
                "<|LOC_447|>",
         | 
| 453 | 
            +
                "<|LOC_448|>",
         | 
| 454 | 
            +
                "<|LOC_449|>",
         | 
| 455 | 
            +
                "<|LOC_450|>",
         | 
| 456 | 
            +
                "<|LOC_451|>",
         | 
| 457 | 
            +
                "<|LOC_452|>",
         | 
| 458 | 
            +
                "<|LOC_453|>",
         | 
| 459 | 
            +
                "<|LOC_454|>",
         | 
| 460 | 
            +
                "<|LOC_455|>",
         | 
| 461 | 
            +
                "<|LOC_456|>",
         | 
| 462 | 
            +
                "<|LOC_457|>",
         | 
| 463 | 
            +
                "<|LOC_458|>",
         | 
| 464 | 
            +
                "<|LOC_459|>",
         | 
| 465 | 
            +
                "<|LOC_460|>",
         | 
| 466 | 
            +
                "<|LOC_461|>",
         | 
| 467 | 
            +
                "<|LOC_462|>",
         | 
| 468 | 
            +
                "<|LOC_463|>",
         | 
| 469 | 
            +
                "<|LOC_464|>",
         | 
| 470 | 
            +
                "<|LOC_465|>",
         | 
| 471 | 
            +
                "<|LOC_466|>",
         | 
| 472 | 
            +
                "<|LOC_467|>",
         | 
| 473 | 
            +
                "<|LOC_468|>",
         | 
| 474 | 
            +
                "<|LOC_469|>",
         | 
| 475 | 
            +
                "<|LOC_470|>",
         | 
| 476 | 
            +
                "<|LOC_471|>",
         | 
| 477 | 
            +
                "<|LOC_472|>",
         | 
| 478 | 
            +
                "<|LOC_473|>",
         | 
| 479 | 
            +
                "<|LOC_474|>",
         | 
| 480 | 
            +
                "<|LOC_475|>",
         | 
| 481 | 
            +
                "<|LOC_476|>",
         | 
| 482 | 
            +
                "<|LOC_477|>",
         | 
| 483 | 
            +
                "<|LOC_478|>",
         | 
| 484 | 
            +
                "<|LOC_479|>",
         | 
| 485 | 
            +
                "<|LOC_480|>",
         | 
| 486 | 
            +
                "<|LOC_481|>",
         | 
| 487 | 
            +
                "<|LOC_482|>",
         | 
| 488 | 
            +
                "<|LOC_483|>",
         | 
| 489 | 
            +
                "<|LOC_484|>",
         | 
| 490 | 
            +
                "<|LOC_485|>",
         | 
| 491 | 
            +
                "<|LOC_486|>",
         | 
| 492 | 
            +
                "<|LOC_487|>",
         | 
| 493 | 
            +
                "<|LOC_488|>",
         | 
| 494 | 
            +
                "<|LOC_489|>",
         | 
| 495 | 
            +
                "<|LOC_490|>",
         | 
| 496 | 
            +
                "<|LOC_491|>",
         | 
| 497 | 
            +
                "<|LOC_492|>",
         | 
| 498 | 
            +
                "<|LOC_493|>",
         | 
| 499 | 
            +
                "<|LOC_494|>",
         | 
| 500 | 
            +
                "<|LOC_495|>",
         | 
| 501 | 
            +
                "<|LOC_496|>",
         | 
| 502 | 
            +
                "<|LOC_497|>",
         | 
| 503 | 
            +
                "<|LOC_498|>",
         | 
| 504 | 
            +
                "<|LOC_499|>",
         | 
| 505 | 
            +
                "<|LOC_500|>",
         | 
| 506 | 
            +
                "<|LOC_501|>",
         | 
| 507 | 
            +
                "<|LOC_502|>",
         | 
| 508 | 
            +
                "<|LOC_503|>",
         | 
| 509 | 
            +
                "<|LOC_504|>",
         | 
| 510 | 
            +
                "<|LOC_505|>",
         | 
| 511 | 
            +
                "<|LOC_506|>",
         | 
| 512 | 
            +
                "<|LOC_507|>",
         | 
| 513 | 
            +
                "<|LOC_508|>",
         | 
| 514 | 
            +
                "<|LOC_509|>",
         | 
| 515 | 
            +
                "<|LOC_510|>",
         | 
| 516 | 
            +
                "<|LOC_511|>",
         | 
| 517 | 
            +
                "<|LOC_512|>",
         | 
| 518 | 
            +
                "<|LOC_513|>",
         | 
| 519 | 
            +
                "<|LOC_514|>",
         | 
| 520 | 
            +
                "<|LOC_515|>",
         | 
| 521 | 
            +
                "<|LOC_516|>",
         | 
| 522 | 
            +
                "<|LOC_517|>",
         | 
| 523 | 
            +
                "<|LOC_518|>",
         | 
| 524 | 
            +
                "<|LOC_519|>",
         | 
| 525 | 
            +
                "<|LOC_520|>",
         | 
| 526 | 
            +
                "<|LOC_521|>",
         | 
| 527 | 
            +
                "<|LOC_522|>",
         | 
| 528 | 
            +
                "<|LOC_523|>",
         | 
| 529 | 
            +
                "<|LOC_524|>",
         | 
| 530 | 
            +
                "<|LOC_525|>",
         | 
| 531 | 
            +
                "<|LOC_526|>",
         | 
| 532 | 
            +
                "<|LOC_527|>",
         | 
| 533 | 
            +
                "<|LOC_528|>",
         | 
| 534 | 
            +
                "<|LOC_529|>",
         | 
| 535 | 
            +
                "<|LOC_530|>",
         | 
| 536 | 
            +
                "<|LOC_531|>",
         | 
| 537 | 
            +
                "<|LOC_532|>",
         | 
| 538 | 
            +
                "<|LOC_533|>",
         | 
| 539 | 
            +
                "<|LOC_534|>",
         | 
| 540 | 
            +
                "<|LOC_535|>",
         | 
| 541 | 
            +
                "<|LOC_536|>",
         | 
| 542 | 
            +
                "<|LOC_537|>",
         | 
| 543 | 
            +
                "<|LOC_538|>",
         | 
| 544 | 
            +
                "<|LOC_539|>",
         | 
| 545 | 
            +
                "<|LOC_540|>",
         | 
| 546 | 
            +
                "<|LOC_541|>",
         | 
| 547 | 
            +
                "<|LOC_542|>",
         | 
| 548 | 
            +
                "<|LOC_543|>",
         | 
| 549 | 
            +
                "<|LOC_544|>",
         | 
| 550 | 
            +
                "<|LOC_545|>",
         | 
| 551 | 
            +
                "<|LOC_546|>",
         | 
| 552 | 
            +
                "<|LOC_547|>",
         | 
| 553 | 
            +
                "<|LOC_548|>",
         | 
| 554 | 
            +
                "<|LOC_549|>",
         | 
| 555 | 
            +
                "<|LOC_550|>",
         | 
| 556 | 
            +
                "<|LOC_551|>",
         | 
| 557 | 
            +
                "<|LOC_552|>",
         | 
| 558 | 
            +
                "<|LOC_553|>",
         | 
| 559 | 
            +
                "<|LOC_554|>",
         | 
| 560 | 
            +
                "<|LOC_555|>",
         | 
| 561 | 
            +
                "<|LOC_556|>",
         | 
| 562 | 
            +
                "<|LOC_557|>",
         | 
| 563 | 
            +
                "<|LOC_558|>",
         | 
| 564 | 
            +
                "<|LOC_559|>",
         | 
| 565 | 
            +
                "<|LOC_560|>",
         | 
| 566 | 
            +
                "<|LOC_561|>",
         | 
| 567 | 
            +
                "<|LOC_562|>",
         | 
| 568 | 
            +
                "<|LOC_563|>",
         | 
| 569 | 
            +
                "<|LOC_564|>",
         | 
| 570 | 
            +
                "<|LOC_565|>",
         | 
| 571 | 
            +
                "<|LOC_566|>",
         | 
| 572 | 
            +
                "<|LOC_567|>",
         | 
| 573 | 
            +
                "<|LOC_568|>",
         | 
| 574 | 
            +
                "<|LOC_569|>",
         | 
| 575 | 
            +
                "<|LOC_570|>",
         | 
| 576 | 
            +
                "<|LOC_571|>",
         | 
| 577 | 
            +
                "<|LOC_572|>",
         | 
| 578 | 
            +
                "<|LOC_573|>",
         | 
| 579 | 
            +
                "<|LOC_574|>",
         | 
| 580 | 
            +
                "<|LOC_575|>",
         | 
| 581 | 
            +
                "<|LOC_576|>",
         | 
| 582 | 
            +
                "<|LOC_577|>",
         | 
| 583 | 
            +
                "<|LOC_578|>",
         | 
| 584 | 
            +
                "<|LOC_579|>",
         | 
| 585 | 
            +
                "<|LOC_580|>",
         | 
| 586 | 
            +
                "<|LOC_581|>",
         | 
| 587 | 
            +
                "<|LOC_582|>",
         | 
| 588 | 
            +
                "<|LOC_583|>",
         | 
| 589 | 
            +
                "<|LOC_584|>",
         | 
| 590 | 
            +
                "<|LOC_585|>",
         | 
| 591 | 
            +
                "<|LOC_586|>",
         | 
| 592 | 
            +
                "<|LOC_587|>",
         | 
| 593 | 
            +
                "<|LOC_588|>",
         | 
| 594 | 
            +
                "<|LOC_589|>",
         | 
| 595 | 
            +
                "<|LOC_590|>",
         | 
| 596 | 
            +
                "<|LOC_591|>",
         | 
| 597 | 
            +
                "<|LOC_592|>",
         | 
| 598 | 
            +
                "<|LOC_593|>",
         | 
| 599 | 
            +
                "<|LOC_594|>",
         | 
| 600 | 
            +
                "<|LOC_595|>",
         | 
| 601 | 
            +
                "<|LOC_596|>",
         | 
| 602 | 
            +
                "<|LOC_597|>",
         | 
| 603 | 
            +
                "<|LOC_598|>",
         | 
| 604 | 
            +
                "<|LOC_599|>",
         | 
| 605 | 
            +
                "<|LOC_600|>",
         | 
| 606 | 
            +
                "<|LOC_601|>",
         | 
| 607 | 
            +
                "<|LOC_602|>",
         | 
| 608 | 
            +
                "<|LOC_603|>",
         | 
| 609 | 
            +
                "<|LOC_604|>",
         | 
| 610 | 
            +
                "<|LOC_605|>",
         | 
| 611 | 
            +
                "<|LOC_606|>",
         | 
| 612 | 
            +
                "<|LOC_607|>",
         | 
| 613 | 
            +
                "<|LOC_608|>",
         | 
| 614 | 
            +
                "<|LOC_609|>",
         | 
| 615 | 
            +
                "<|LOC_610|>",
         | 
| 616 | 
            +
                "<|LOC_611|>",
         | 
| 617 | 
            +
                "<|LOC_612|>",
         | 
| 618 | 
            +
                "<|LOC_613|>",
         | 
| 619 | 
            +
                "<|LOC_614|>",
         | 
| 620 | 
            +
                "<|LOC_615|>",
         | 
| 621 | 
            +
                "<|LOC_616|>",
         | 
| 622 | 
            +
                "<|LOC_617|>",
         | 
| 623 | 
            +
                "<|LOC_618|>",
         | 
| 624 | 
            +
                "<|LOC_619|>",
         | 
| 625 | 
            +
                "<|LOC_620|>",
         | 
| 626 | 
            +
                "<|LOC_621|>",
         | 
| 627 | 
            +
                "<|LOC_622|>",
         | 
| 628 | 
            +
                "<|LOC_623|>",
         | 
| 629 | 
            +
                "<|LOC_624|>",
         | 
| 630 | 
            +
                "<|LOC_625|>",
         | 
| 631 | 
            +
                "<|LOC_626|>",
         | 
| 632 | 
            +
                "<|LOC_627|>",
         | 
| 633 | 
            +
                "<|LOC_628|>",
         | 
| 634 | 
            +
                "<|LOC_629|>",
         | 
| 635 | 
            +
                "<|LOC_630|>",
         | 
| 636 | 
            +
                "<|LOC_631|>",
         | 
| 637 | 
            +
                "<|LOC_632|>",
         | 
| 638 | 
            +
                "<|LOC_633|>",
         | 
| 639 | 
            +
                "<|LOC_634|>",
         | 
| 640 | 
            +
                "<|LOC_635|>",
         | 
| 641 | 
            +
                "<|LOC_636|>",
         | 
| 642 | 
            +
                "<|LOC_637|>",
         | 
| 643 | 
            +
                "<|LOC_638|>",
         | 
| 644 | 
            +
                "<|LOC_639|>",
         | 
| 645 | 
            +
                "<|LOC_640|>",
         | 
| 646 | 
            +
                "<|LOC_641|>",
         | 
| 647 | 
            +
                "<|LOC_642|>",
         | 
| 648 | 
            +
                "<|LOC_643|>",
         | 
| 649 | 
            +
                "<|LOC_644|>",
         | 
| 650 | 
            +
                "<|LOC_645|>",
         | 
| 651 | 
            +
                "<|LOC_646|>",
         | 
| 652 | 
            +
                "<|LOC_647|>",
         | 
| 653 | 
            +
                "<|LOC_648|>",
         | 
| 654 | 
            +
                "<|LOC_649|>",
         | 
| 655 | 
            +
                "<|LOC_650|>",
         | 
| 656 | 
            +
                "<|LOC_651|>",
         | 
| 657 | 
            +
                "<|LOC_652|>",
         | 
| 658 | 
            +
                "<|LOC_653|>",
         | 
| 659 | 
            +
                "<|LOC_654|>",
         | 
| 660 | 
            +
                "<|LOC_655|>",
         | 
| 661 | 
            +
                "<|LOC_656|>",
         | 
| 662 | 
            +
                "<|LOC_657|>",
         | 
| 663 | 
            +
                "<|LOC_658|>",
         | 
| 664 | 
            +
                "<|LOC_659|>",
         | 
| 665 | 
            +
                "<|LOC_660|>",
         | 
| 666 | 
            +
                "<|LOC_661|>",
         | 
| 667 | 
            +
                "<|LOC_662|>",
         | 
| 668 | 
            +
                "<|LOC_663|>",
         | 
| 669 | 
            +
                "<|LOC_664|>",
         | 
| 670 | 
            +
                "<|LOC_665|>",
         | 
| 671 | 
            +
                "<|LOC_666|>",
         | 
| 672 | 
            +
                "<|LOC_667|>",
         | 
| 673 | 
            +
                "<|LOC_668|>",
         | 
| 674 | 
            +
                "<|LOC_669|>",
         | 
| 675 | 
            +
                "<|LOC_670|>",
         | 
| 676 | 
            +
                "<|LOC_671|>",
         | 
| 677 | 
            +
                "<|LOC_672|>",
         | 
| 678 | 
            +
                "<|LOC_673|>",
         | 
| 679 | 
            +
                "<|LOC_674|>",
         | 
| 680 | 
            +
                "<|LOC_675|>",
         | 
| 681 | 
            +
                "<|LOC_676|>",
         | 
| 682 | 
            +
                "<|LOC_677|>",
         | 
| 683 | 
            +
                "<|LOC_678|>",
         | 
| 684 | 
            +
                "<|LOC_679|>",
         | 
| 685 | 
            +
                "<|LOC_680|>",
         | 
| 686 | 
            +
                "<|LOC_681|>",
         | 
| 687 | 
            +
                "<|LOC_682|>",
         | 
| 688 | 
            +
                "<|LOC_683|>",
         | 
| 689 | 
            +
                "<|LOC_684|>",
         | 
| 690 | 
            +
                "<|LOC_685|>",
         | 
| 691 | 
            +
                "<|LOC_686|>",
         | 
| 692 | 
            +
                "<|LOC_687|>",
         | 
| 693 | 
            +
                "<|LOC_688|>",
         | 
| 694 | 
            +
                "<|LOC_689|>",
         | 
| 695 | 
            +
                "<|LOC_690|>",
         | 
| 696 | 
            +
                "<|LOC_691|>",
         | 
| 697 | 
            +
                "<|LOC_692|>",
         | 
| 698 | 
            +
                "<|LOC_693|>",
         | 
| 699 | 
            +
                "<|LOC_694|>",
         | 
| 700 | 
            +
                "<|LOC_695|>",
         | 
| 701 | 
            +
                "<|LOC_696|>",
         | 
| 702 | 
            +
                "<|LOC_697|>",
         | 
| 703 | 
            +
                "<|LOC_698|>",
         | 
| 704 | 
            +
                "<|LOC_699|>",
         | 
| 705 | 
            +
                "<|LOC_700|>",
         | 
| 706 | 
            +
                "<|LOC_701|>",
         | 
| 707 | 
            +
                "<|LOC_702|>",
         | 
| 708 | 
            +
                "<|LOC_703|>",
         | 
| 709 | 
            +
                "<|LOC_704|>",
         | 
| 710 | 
            +
                "<|LOC_705|>",
         | 
| 711 | 
            +
                "<|LOC_706|>",
         | 
| 712 | 
            +
                "<|LOC_707|>",
         | 
| 713 | 
            +
                "<|LOC_708|>",
         | 
| 714 | 
            +
                "<|LOC_709|>",
         | 
| 715 | 
            +
                "<|LOC_710|>",
         | 
| 716 | 
            +
                "<|LOC_711|>",
         | 
| 717 | 
            +
                "<|LOC_712|>",
         | 
| 718 | 
            +
                "<|LOC_713|>",
         | 
| 719 | 
            +
                "<|LOC_714|>",
         | 
| 720 | 
            +
                "<|LOC_715|>",
         | 
| 721 | 
            +
                "<|LOC_716|>",
         | 
| 722 | 
            +
                "<|LOC_717|>",
         | 
| 723 | 
            +
                "<|LOC_718|>",
         | 
| 724 | 
            +
                "<|LOC_719|>",
         | 
| 725 | 
            +
                "<|LOC_720|>",
         | 
| 726 | 
            +
                "<|LOC_721|>",
         | 
| 727 | 
            +
                "<|LOC_722|>",
         | 
| 728 | 
            +
                "<|LOC_723|>",
         | 
| 729 | 
            +
                "<|LOC_724|>",
         | 
| 730 | 
            +
                "<|LOC_725|>",
         | 
| 731 | 
            +
                "<|LOC_726|>",
         | 
| 732 | 
            +
                "<|LOC_727|>",
         | 
| 733 | 
            +
                "<|LOC_728|>",
         | 
| 734 | 
            +
                "<|LOC_729|>",
         | 
| 735 | 
            +
                "<|LOC_730|>",
         | 
| 736 | 
            +
                "<|LOC_731|>",
         | 
| 737 | 
            +
                "<|LOC_732|>",
         | 
| 738 | 
            +
                "<|LOC_733|>",
         | 
| 739 | 
            +
                "<|LOC_734|>",
         | 
| 740 | 
            +
                "<|LOC_735|>",
         | 
| 741 | 
            +
                "<|LOC_736|>",
         | 
| 742 | 
            +
                "<|LOC_737|>",
         | 
| 743 | 
            +
                "<|LOC_738|>",
         | 
| 744 | 
            +
                "<|LOC_739|>",
         | 
| 745 | 
            +
                "<|LOC_740|>",
         | 
| 746 | 
            +
                "<|LOC_741|>",
         | 
| 747 | 
            +
                "<|LOC_742|>",
         | 
| 748 | 
            +
                "<|LOC_743|>",
         | 
| 749 | 
            +
                "<|LOC_744|>",
         | 
| 750 | 
            +
                "<|LOC_745|>",
         | 
| 751 | 
            +
                "<|LOC_746|>",
         | 
| 752 | 
            +
                "<|LOC_747|>",
         | 
| 753 | 
            +
                "<|LOC_748|>",
         | 
| 754 | 
            +
                "<|LOC_749|>",
         | 
| 755 | 
            +
                "<|LOC_750|>",
         | 
| 756 | 
            +
                "<|LOC_751|>",
         | 
| 757 | 
            +
                "<|LOC_752|>",
         | 
| 758 | 
            +
                "<|LOC_753|>",
         | 
| 759 | 
            +
                "<|LOC_754|>",
         | 
| 760 | 
            +
                "<|LOC_755|>",
         | 
| 761 | 
            +
                "<|LOC_756|>",
         | 
| 762 | 
            +
                "<|LOC_757|>",
         | 
| 763 | 
            +
                "<|LOC_758|>",
         | 
| 764 | 
            +
                "<|LOC_759|>",
         | 
| 765 | 
            +
                "<|LOC_760|>",
         | 
| 766 | 
            +
                "<|LOC_761|>",
         | 
| 767 | 
            +
                "<|LOC_762|>",
         | 
| 768 | 
            +
                "<|LOC_763|>",
         | 
| 769 | 
            +
                "<|LOC_764|>",
         | 
| 770 | 
            +
                "<|LOC_765|>",
         | 
| 771 | 
            +
                "<|LOC_766|>",
         | 
| 772 | 
            +
                "<|LOC_767|>",
         | 
| 773 | 
            +
                "<|LOC_768|>",
         | 
| 774 | 
            +
                "<|LOC_769|>",
         | 
| 775 | 
            +
                "<|LOC_770|>",
         | 
| 776 | 
            +
                "<|LOC_771|>",
         | 
| 777 | 
            +
                "<|LOC_772|>",
         | 
| 778 | 
            +
                "<|LOC_773|>",
         | 
| 779 | 
            +
                "<|LOC_774|>",
         | 
| 780 | 
            +
                "<|LOC_775|>",
         | 
| 781 | 
            +
                "<|LOC_776|>",
         | 
| 782 | 
            +
                "<|LOC_777|>",
         | 
| 783 | 
            +
                "<|LOC_778|>",
         | 
| 784 | 
            +
                "<|LOC_779|>",
         | 
| 785 | 
            +
                "<|LOC_780|>",
         | 
| 786 | 
            +
                "<|LOC_781|>",
         | 
| 787 | 
            +
                "<|LOC_782|>",
         | 
| 788 | 
            +
                "<|LOC_783|>",
         | 
| 789 | 
            +
                "<|LOC_784|>",
         | 
| 790 | 
            +
                "<|LOC_785|>",
         | 
| 791 | 
            +
                "<|LOC_786|>",
         | 
| 792 | 
            +
                "<|LOC_787|>",
         | 
| 793 | 
            +
                "<|LOC_788|>",
         | 
| 794 | 
            +
                "<|LOC_789|>",
         | 
| 795 | 
            +
                "<|LOC_790|>",
         | 
| 796 | 
            +
                "<|LOC_791|>",
         | 
| 797 | 
            +
                "<|LOC_792|>",
         | 
| 798 | 
            +
                "<|LOC_793|>",
         | 
| 799 | 
            +
                "<|LOC_794|>",
         | 
| 800 | 
            +
                "<|LOC_795|>",
         | 
| 801 | 
            +
                "<|LOC_796|>",
         | 
| 802 | 
            +
                "<|LOC_797|>",
         | 
| 803 | 
            +
                "<|LOC_798|>",
         | 
| 804 | 
            +
                "<|LOC_799|>",
         | 
| 805 | 
            +
                "<|LOC_800|>",
         | 
| 806 | 
            +
                "<|LOC_801|>",
         | 
| 807 | 
            +
                "<|LOC_802|>",
         | 
| 808 | 
            +
                "<|LOC_803|>",
         | 
| 809 | 
            +
                "<|LOC_804|>",
         | 
| 810 | 
            +
                "<|LOC_805|>",
         | 
| 811 | 
            +
                "<|LOC_806|>",
         | 
| 812 | 
            +
                "<|LOC_807|>",
         | 
| 813 | 
            +
                "<|LOC_808|>",
         | 
| 814 | 
            +
                "<|LOC_809|>",
         | 
| 815 | 
            +
                "<|LOC_810|>",
         | 
| 816 | 
            +
                "<|LOC_811|>",
         | 
| 817 | 
            +
                "<|LOC_812|>",
         | 
| 818 | 
            +
                "<|LOC_813|>",
         | 
| 819 | 
            +
                "<|LOC_814|>",
         | 
| 820 | 
            +
                "<|LOC_815|>",
         | 
| 821 | 
            +
                "<|LOC_816|>",
         | 
| 822 | 
            +
                "<|LOC_817|>",
         | 
| 823 | 
            +
                "<|LOC_818|>",
         | 
| 824 | 
            +
                "<|LOC_819|>",
         | 
| 825 | 
            +
                "<|LOC_820|>",
         | 
| 826 | 
            +
                "<|LOC_821|>",
         | 
| 827 | 
            +
                "<|LOC_822|>",
         | 
| 828 | 
            +
                "<|LOC_823|>",
         | 
| 829 | 
            +
                "<|LOC_824|>",
         | 
| 830 | 
            +
                "<|LOC_825|>",
         | 
| 831 | 
            +
                "<|LOC_826|>",
         | 
| 832 | 
            +
                "<|LOC_827|>",
         | 
| 833 | 
            +
                "<|LOC_828|>",
         | 
| 834 | 
            +
                "<|LOC_829|>",
         | 
| 835 | 
            +
                "<|LOC_830|>",
         | 
| 836 | 
            +
                "<|LOC_831|>",
         | 
| 837 | 
            +
                "<|LOC_832|>",
         | 
| 838 | 
            +
                "<|LOC_833|>",
         | 
| 839 | 
            +
                "<|LOC_834|>",
         | 
| 840 | 
            +
                "<|LOC_835|>",
         | 
| 841 | 
            +
                "<|LOC_836|>",
         | 
| 842 | 
            +
                "<|LOC_837|>",
         | 
| 843 | 
            +
                "<|LOC_838|>",
         | 
| 844 | 
            +
                "<|LOC_839|>",
         | 
| 845 | 
            +
                "<|LOC_840|>",
         | 
| 846 | 
            +
                "<|LOC_841|>",
         | 
| 847 | 
            +
                "<|LOC_842|>",
         | 
| 848 | 
            +
                "<|LOC_843|>",
         | 
| 849 | 
            +
                "<|LOC_844|>",
         | 
| 850 | 
            +
                "<|LOC_845|>",
         | 
| 851 | 
            +
                "<|LOC_846|>",
         | 
| 852 | 
            +
                "<|LOC_847|>",
         | 
| 853 | 
            +
                "<|LOC_848|>",
         | 
| 854 | 
            +
                "<|LOC_849|>",
         | 
| 855 | 
            +
                "<|LOC_850|>",
         | 
| 856 | 
            +
                "<|LOC_851|>",
         | 
| 857 | 
            +
                "<|LOC_852|>",
         | 
| 858 | 
            +
                "<|LOC_853|>",
         | 
| 859 | 
            +
                "<|LOC_854|>",
         | 
| 860 | 
            +
                "<|LOC_855|>",
         | 
| 861 | 
            +
                "<|LOC_856|>",
         | 
| 862 | 
            +
                "<|LOC_857|>",
         | 
| 863 | 
            +
                "<|LOC_858|>",
         | 
| 864 | 
            +
                "<|LOC_859|>",
         | 
| 865 | 
            +
                "<|LOC_860|>",
         | 
| 866 | 
            +
                "<|LOC_861|>",
         | 
| 867 | 
            +
                "<|LOC_862|>",
         | 
| 868 | 
            +
                "<|LOC_863|>",
         | 
| 869 | 
            +
                "<|LOC_864|>",
         | 
| 870 | 
            +
                "<|LOC_865|>",
         | 
| 871 | 
            +
                "<|LOC_866|>",
         | 
| 872 | 
            +
                "<|LOC_867|>",
         | 
| 873 | 
            +
                "<|LOC_868|>",
         | 
| 874 | 
            +
                "<|LOC_869|>",
         | 
| 875 | 
            +
                "<|LOC_870|>",
         | 
| 876 | 
            +
                "<|LOC_871|>",
         | 
| 877 | 
            +
                "<|LOC_872|>",
         | 
| 878 | 
            +
                "<|LOC_873|>",
         | 
| 879 | 
            +
                "<|LOC_874|>",
         | 
| 880 | 
            +
                "<|LOC_875|>",
         | 
| 881 | 
            +
                "<|LOC_876|>",
         | 
| 882 | 
            +
                "<|LOC_877|>",
         | 
| 883 | 
            +
                "<|LOC_878|>",
         | 
| 884 | 
            +
                "<|LOC_879|>",
         | 
| 885 | 
            +
                "<|LOC_880|>",
         | 
| 886 | 
            +
                "<|LOC_881|>",
         | 
| 887 | 
            +
                "<|LOC_882|>",
         | 
| 888 | 
            +
                "<|LOC_883|>",
         | 
| 889 | 
            +
                "<|LOC_884|>",
         | 
| 890 | 
            +
                "<|LOC_885|>",
         | 
| 891 | 
            +
                "<|LOC_886|>",
         | 
| 892 | 
            +
                "<|LOC_887|>",
         | 
| 893 | 
            +
                "<|LOC_888|>",
         | 
| 894 | 
            +
                "<|LOC_889|>",
         | 
| 895 | 
            +
                "<|LOC_890|>",
         | 
| 896 | 
            +
                "<|LOC_891|>",
         | 
| 897 | 
            +
                "<|LOC_892|>",
         | 
| 898 | 
            +
                "<|LOC_893|>",
         | 
| 899 | 
            +
                "<|LOC_894|>",
         | 
| 900 | 
            +
                "<|LOC_895|>",
         | 
| 901 | 
            +
                "<|LOC_896|>",
         | 
| 902 | 
            +
                "<|LOC_897|>",
         | 
| 903 | 
            +
                "<|LOC_898|>",
         | 
| 904 | 
            +
                "<|LOC_899|>",
         | 
| 905 | 
            +
                "<|LOC_900|>",
         | 
| 906 | 
            +
                "<|LOC_901|>",
         | 
| 907 | 
            +
                "<|LOC_902|>",
         | 
| 908 | 
            +
                "<|LOC_903|>",
         | 
| 909 | 
            +
                "<|LOC_904|>",
         | 
| 910 | 
            +
                "<|LOC_905|>",
         | 
| 911 | 
            +
                "<|LOC_906|>",
         | 
| 912 | 
            +
                "<|LOC_907|>",
         | 
| 913 | 
            +
                "<|LOC_908|>",
         | 
| 914 | 
            +
                "<|LOC_909|>",
         | 
| 915 | 
            +
                "<|LOC_910|>",
         | 
| 916 | 
            +
                "<|LOC_911|>",
         | 
| 917 | 
            +
                "<|LOC_912|>",
         | 
| 918 | 
            +
                "<|LOC_913|>",
         | 
| 919 | 
            +
                "<|LOC_914|>",
         | 
| 920 | 
            +
                "<|LOC_915|>",
         | 
| 921 | 
            +
                "<|LOC_916|>",
         | 
| 922 | 
            +
                "<|LOC_917|>",
         | 
| 923 | 
            +
                "<|LOC_918|>",
         | 
| 924 | 
            +
                "<|LOC_919|>",
         | 
| 925 | 
            +
                "<|LOC_920|>",
         | 
| 926 | 
            +
                "<|LOC_921|>",
         | 
| 927 | 
            +
                "<|LOC_922|>",
         | 
| 928 | 
            +
                "<|LOC_923|>",
         | 
| 929 | 
            +
                "<|LOC_924|>",
         | 
| 930 | 
            +
                "<|LOC_925|>",
         | 
| 931 | 
            +
                "<|LOC_926|>",
         | 
| 932 | 
            +
                "<|LOC_927|>",
         | 
| 933 | 
            +
                "<|LOC_928|>",
         | 
| 934 | 
            +
                "<|LOC_929|>",
         | 
| 935 | 
            +
                "<|LOC_930|>",
         | 
| 936 | 
            +
                "<|LOC_931|>",
         | 
| 937 | 
            +
                "<|LOC_932|>",
         | 
| 938 | 
            +
                "<|LOC_933|>",
         | 
| 939 | 
            +
                "<|LOC_934|>",
         | 
| 940 | 
            +
                "<|LOC_935|>",
         | 
| 941 | 
            +
                "<|LOC_936|>",
         | 
| 942 | 
            +
                "<|LOC_937|>",
         | 
| 943 | 
            +
                "<|LOC_938|>",
         | 
| 944 | 
            +
                "<|LOC_939|>",
         | 
| 945 | 
            +
                "<|LOC_940|>",
         | 
| 946 | 
            +
                "<|LOC_941|>",
         | 
| 947 | 
            +
                "<|LOC_942|>",
         | 
| 948 | 
            +
                "<|LOC_943|>",
         | 
| 949 | 
            +
                "<|LOC_944|>",
         | 
| 950 | 
            +
                "<|LOC_945|>",
         | 
| 951 | 
            +
                "<|LOC_946|>",
         | 
| 952 | 
            +
                "<|LOC_947|>",
         | 
| 953 | 
            +
                "<|LOC_948|>",
         | 
| 954 | 
            +
                "<|LOC_949|>",
         | 
| 955 | 
            +
                "<|LOC_950|>",
         | 
| 956 | 
            +
                "<|LOC_951|>",
         | 
| 957 | 
            +
                "<|LOC_952|>",
         | 
| 958 | 
            +
                "<|LOC_953|>",
         | 
| 959 | 
            +
                "<|LOC_954|>",
         | 
| 960 | 
            +
                "<|LOC_955|>",
         | 
| 961 | 
            +
                "<|LOC_956|>",
         | 
| 962 | 
            +
                "<|LOC_957|>",
         | 
| 963 | 
            +
                "<|LOC_958|>",
         | 
| 964 | 
            +
                "<|LOC_959|>",
         | 
| 965 | 
            +
                "<|LOC_960|>",
         | 
| 966 | 
            +
                "<|LOC_961|>",
         | 
| 967 | 
            +
                "<|LOC_962|>",
         | 
| 968 | 
            +
                "<|LOC_963|>",
         | 
| 969 | 
            +
                "<|LOC_964|>",
         | 
| 970 | 
            +
                "<|LOC_965|>",
         | 
| 971 | 
            +
                "<|LOC_966|>",
         | 
| 972 | 
            +
                "<|LOC_967|>",
         | 
| 973 | 
            +
                "<|LOC_968|>",
         | 
| 974 | 
            +
                "<|LOC_969|>",
         | 
| 975 | 
            +
                "<|LOC_970|>",
         | 
| 976 | 
            +
                "<|LOC_971|>",
         | 
| 977 | 
            +
                "<|LOC_972|>",
         | 
| 978 | 
            +
                "<|LOC_973|>",
         | 
| 979 | 
            +
                "<|LOC_974|>",
         | 
| 980 | 
            +
                "<|LOC_975|>",
         | 
| 981 | 
            +
                "<|LOC_976|>",
         | 
| 982 | 
            +
                "<|LOC_977|>",
         | 
| 983 | 
            +
                "<|LOC_978|>",
         | 
| 984 | 
            +
                "<|LOC_979|>",
         | 
| 985 | 
            +
                "<|LOC_980|>",
         | 
| 986 | 
            +
                "<|LOC_981|>",
         | 
| 987 | 
            +
                "<|LOC_982|>",
         | 
| 988 | 
            +
                "<|LOC_983|>",
         | 
| 989 | 
            +
                "<|LOC_984|>",
         | 
| 990 | 
            +
                "<|LOC_985|>",
         | 
| 991 | 
            +
                "<|LOC_986|>",
         | 
| 992 | 
            +
                "<|LOC_987|>",
         | 
| 993 | 
            +
                "<|LOC_988|>",
         | 
| 994 | 
            +
                "<|LOC_989|>",
         | 
| 995 | 
            +
                "<|LOC_990|>",
         | 
| 996 | 
            +
                "<|LOC_991|>",
         | 
| 997 | 
            +
                "<|LOC_992|>",
         | 
| 998 | 
            +
                "<|LOC_993|>",
         | 
| 999 | 
            +
                "<|LOC_994|>",
         | 
| 1000 | 
            +
                "<|LOC_995|>",
         | 
| 1001 | 
            +
                "<|LOC_996|>",
         | 
| 1002 | 
            +
                "<|LOC_997|>",
         | 
| 1003 | 
            +
                "<|LOC_998|>",
         | 
| 1004 | 
            +
                "<|LOC_999|>",
         | 
| 1005 | 
            +
                "<|LOC_1000|>",
         | 
| 1006 | 
            +
                "<|LOC_BEGIN|>",
         | 
| 1007 | 
            +
                "<|LOC_END|>",
         | 
| 1008 | 
            +
                "<|LOC_SEP|>",
         | 
| 1009 | 
            +
                "<|CROP_COL_SEP|>",
         | 
| 1010 | 
            +
                "<|CROP_ROW_SEP|>",
         | 
| 1011 | 
            +
                "<|IMAGE_SEP|>"
         | 
| 1012 | 
            +
              ],
         | 
| 1013 | 
            +
              "bos_token": {
         | 
| 1014 | 
            +
                "content": "<s>",
         | 
| 1015 | 
            +
                "lstrip": false,
         | 
| 1016 | 
            +
                "normalized": false,
         | 
| 1017 | 
            +
                "rstrip": false,
         | 
| 1018 | 
            +
                "single_word": false
         | 
| 1019 | 
            +
              },
         | 
| 1020 | 
            +
              "cls_token": {
         | 
| 1021 | 
            +
                "content": "<|begin_of_sentence|>",
         | 
| 1022 | 
            +
                "lstrip": false,
         | 
| 1023 | 
            +
                "normalized": false,
         | 
| 1024 | 
            +
                "rstrip": false,
         | 
| 1025 | 
            +
                "single_word": false
         | 
| 1026 | 
            +
              },
         | 
| 1027 | 
            +
              "eos_token": {
         | 
| 1028 | 
            +
                "content": "</s>",
         | 
| 1029 | 
            +
                "lstrip": false,
         | 
| 1030 | 
            +
                "normalized": false,
         | 
| 1031 | 
            +
                "rstrip": false,
         | 
| 1032 | 
            +
                "single_word": false
         | 
| 1033 | 
            +
              },
         | 
| 1034 | 
            +
              "mask_token": {
         | 
| 1035 | 
            +
                "content": "<mask:1>",
         | 
| 1036 | 
            +
                "lstrip": false,
         | 
| 1037 | 
            +
                "normalized": false,
         | 
| 1038 | 
            +
                "rstrip": false,
         | 
| 1039 | 
            +
                "single_word": false
         | 
| 1040 | 
            +
              },
         | 
| 1041 | 
            +
              "pad_token": {
         | 
| 1042 | 
            +
                "content": "<unk>",
         | 
| 1043 | 
            +
                "lstrip": false,
         | 
| 1044 | 
            +
                "normalized": false,
         | 
| 1045 | 
            +
                "rstrip": false,
         | 
| 1046 | 
            +
                "single_word": false
         | 
| 1047 | 
            +
              },
         | 
| 1048 | 
            +
              "sep_token": {
         | 
| 1049 | 
            +
                "content": "<|end_of_sentence|>",
         | 
| 1050 | 
            +
                "lstrip": false,
         | 
| 1051 | 
            +
                "normalized": false,
         | 
| 1052 | 
            +
                "rstrip": false,
         | 
| 1053 | 
            +
                "single_word": false
         | 
| 1054 | 
            +
              },
         | 
| 1055 | 
            +
              "unk_token": {
         | 
| 1056 | 
            +
                "content": "<unk>",
         | 
| 1057 | 
            +
                "lstrip": false,
         | 
| 1058 | 
            +
                "normalized": false,
         | 
| 1059 | 
            +
                "rstrip": false,
         | 
| 1060 | 
            +
                "single_word": false
         | 
| 1061 | 
            +
              }
         | 
| 1062 | 
            +
            }
         | 
    	
        tokenization_ernie4_5.py
    ADDED
    
    | @@ -0,0 +1,373 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 Baidu, Inc. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            import os
         | 
| 16 | 
            +
            from shutil import copyfile
         | 
| 17 | 
            +
            from typing import Dict, List, Optional, Tuple, Union
         | 
| 18 | 
            +
            import torch
         | 
| 19 | 
            +
            import numpy as np
         | 
| 20 | 
            +
            import sentencepiece as spm
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            from transformers.tokenization_utils import PreTrainedTokenizer
         | 
| 23 | 
            +
            from transformers.tokenization_utils_base import (
         | 
| 24 | 
            +
                PaddingStrategy,
         | 
| 25 | 
            +
            )
         | 
| 26 | 
            +
            from transformers.utils import logging
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            logger = logging.get_logger(__name__)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
             | 
| 32 | 
            +
            class Ernie4_5_Tokenizer(PreTrainedTokenizer):
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                vocab_files_names = {
         | 
| 35 | 
            +
                    "vocab_file": "tokenizer.model",
         | 
| 36 | 
            +
                }
         | 
| 37 | 
            +
                # Model input names expected by the tokenizer
         | 
| 38 | 
            +
                model_input_names = ["input_ids", "position_ids", "attention_mask", "labels"]
         | 
| 39 | 
            +
                # Padding side (where to add padding tokens)
         | 
| 40 | 
            +
                padding_side = "right"
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def __init__(
         | 
| 43 | 
            +
                    self,
         | 
| 44 | 
            +
                    vocab_file,
         | 
| 45 | 
            +
                    bos_token="<s>",
         | 
| 46 | 
            +
                    cls_token="<cls>",
         | 
| 47 | 
            +
                    eos_token="</s>",
         | 
| 48 | 
            +
                    mask_token="<mask:0>",
         | 
| 49 | 
            +
                    pad_token="<pad>",
         | 
| 50 | 
            +
                    sep_token="<sep>",
         | 
| 51 | 
            +
                    unk_token="<unk>",
         | 
| 52 | 
            +
                    additional_special_tokens=None,
         | 
| 53 | 
            +
                    split_special_tokens=False,
         | 
| 54 | 
            +
                    tokenizer_alpha=None,
         | 
| 55 | 
            +
                    **kwargs,
         | 
| 56 | 
            +
                ):
         | 
| 57 | 
            +
                    """
         | 
| 58 | 
            +
                    Initialize the ERNIE tokenizer.
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                    Args:
         | 
| 61 | 
            +
                        vocab_file (str): Path to the SentencePiece model file.
         | 
| 62 | 
            +
                        bos_token (str, optional): Beginning of sentence token. Defaults to "<s>".
         | 
| 63 | 
            +
                        cls_token (str, optional): Classification token. Defaults to "<cls>".
         | 
| 64 | 
            +
                        eos_token (str, optional): End of sentence token. Defaults to "</s>".
         | 
| 65 | 
            +
                        mask_token (str, optional): Mask token. Defaults to "<mask:0>".
         | 
| 66 | 
            +
                        pad_token (str, optional): Padding token. Defaults to "<pad>".
         | 
| 67 | 
            +
                        sep_token (str, optional): Separator token. Defaults to "<sep>".
         | 
| 68 | 
            +
                        unk_token (str, optional): Unknown token. Defaults to "<unk>".
         | 
| 69 | 
            +
                        additional_special_tokens (List[str], optional): Additional special tokens.
         | 
| 70 | 
            +
                            Defaults to ["<mask:1>", "<mask:7>"].
         | 
| 71 | 
            +
                        split_special_tokens (bool, optional): Whether to split special tokens. Defaults to False.
         | 
| 72 | 
            +
                        tokenizer_alpha (float, optional): Alpha parameter for SentencePiece sampling.
         | 
| 73 | 
            +
                        **kwargs: Additional keyword arguments passed to the parent class.
         | 
| 74 | 
            +
                    """
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    self.vocab_file = vocab_file
         | 
| 77 | 
            +
                    self.sp_model = spm.SentencePieceProcessor()
         | 
| 78 | 
            +
                    self.sp_model.Load(vocab_file)
         | 
| 79 | 
            +
                    self.tokenizer_alpha = tokenizer_alpha
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    if additional_special_tokens is None:
         | 
| 82 | 
            +
                        additional_special_tokens = ["<mask:1>", "<mask:7>"]
         | 
| 83 | 
            +
                    super().__init__(
         | 
| 84 | 
            +
                        bos_token=bos_token,
         | 
| 85 | 
            +
                        cls_token=cls_token,
         | 
| 86 | 
            +
                        eos_token=eos_token,
         | 
| 87 | 
            +
                        mask_token=mask_token,
         | 
| 88 | 
            +
                        pad_token=pad_token,
         | 
| 89 | 
            +
                        sep_token=sep_token,
         | 
| 90 | 
            +
                        unk_token=unk_token,
         | 
| 91 | 
            +
                        additional_special_tokens=additional_special_tokens,
         | 
| 92 | 
            +
                        split_special_tokens=split_special_tokens,
         | 
| 93 | 
            +
                        **kwargs,
         | 
| 94 | 
            +
                    )
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                @property
         | 
| 97 | 
            +
                def vocab_size(self):
         | 
| 98 | 
            +
                    """Returns the size of the vocabulary.
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                    Returns:
         | 
| 101 | 
            +
                        int: The number of tokens in the vocabulary.
         | 
| 102 | 
            +
                    """
         | 
| 103 | 
            +
                    return self.sp_model.vocab_size()
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                def get_vocab(self):
         | 
| 106 | 
            +
                    """Get the vocabulary as a dictionary mapping tokens to their IDs.
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    Returns:
         | 
| 109 | 
            +
                        dict: A dictionary mapping tokens to their corresponding IDs.
         | 
| 110 | 
            +
                    """
         | 
| 111 | 
            +
                    vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
         | 
| 112 | 
            +
                    vocab.update(self.added_tokens_encoder)
         | 
| 113 | 
            +
                    return vocab
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                def _tokenize(self, text):
         | 
| 116 | 
            +
                    """Tokenize text using SentencePiece.
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    Args:
         | 
| 119 | 
            +
                        text (str): The text to tokenize.
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                    Returns:
         | 
| 122 | 
            +
                        list: A list of tokens.
         | 
| 123 | 
            +
                    """
         | 
| 124 | 
            +
                    if self.tokenizer_alpha is not None:
         | 
| 125 | 
            +
                        return self.sp_model.encode_as_pieces(
         | 
| 126 | 
            +
                            text,
         | 
| 127 | 
            +
                            enable_sampling=True,
         | 
| 128 | 
            +
                            nbest_size=-1,
         | 
| 129 | 
            +
                            alpha=self.tokenizer_alpha,
         | 
| 130 | 
            +
                        )
         | 
| 131 | 
            +
                    else:
         | 
| 132 | 
            +
                        return self.sp_model.encode_as_pieces(text)
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                def _convert_token_to_id(self, token):
         | 
| 135 | 
            +
                    """Convert a token (str) to an ID using the vocabulary.
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    Args:
         | 
| 138 | 
            +
                        token (str): The token to convert.
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                    Returns:
         | 
| 141 | 
            +
                        int: The corresponding token ID.
         | 
| 142 | 
            +
                    """
         | 
| 143 | 
            +
                    return self.sp_model.piece_to_id(token)
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                def _convert_id_to_token(self, id):
         | 
| 146 | 
            +
                    """Convert an ID to a token (str) using the vocabulary.
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    Args:
         | 
| 149 | 
            +
                        id (int): The token ID to convert.
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    Returns:
         | 
| 152 | 
            +
                        str: The corresponding token.
         | 
| 153 | 
            +
                    """
         | 
| 154 | 
            +
                    if id >= self.vocab_size:
         | 
| 155 | 
            +
                        return self.unk_token
         | 
| 156 | 
            +
                    else:
         | 
| 157 | 
            +
                        return self.sp_model.id_to_piece(id)
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                def convert_tokens_to_string(self, tokens):
         | 
| 160 | 
            +
                    """Convert a sequence of tokens back to a single string.
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    Args:
         | 
| 163 | 
            +
                        tokens (List[str]): A list of tokens to convert.
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    Returns:
         | 
| 166 | 
            +
                        str: The reconstructed string.
         | 
| 167 | 
            +
                    """
         | 
| 168 | 
            +
                    current_sub_tokens = []
         | 
| 169 | 
            +
                    out_string = ""
         | 
| 170 | 
            +
                    prev_is_special = False
         | 
| 171 | 
            +
                    for token in tokens:
         | 
| 172 | 
            +
                        # make sure that special tokens are not decoded using sentencepiece model
         | 
| 173 | 
            +
                        if token in self.all_special_tokens:
         | 
| 174 | 
            +
                            if not prev_is_special:
         | 
| 175 | 
            +
                                out_string += " "
         | 
| 176 | 
            +
                            out_string += self.sp_model.decode(current_sub_tokens) + token
         | 
| 177 | 
            +
                            prev_is_special = True
         | 
| 178 | 
            +
                            current_sub_tokens = []
         | 
| 179 | 
            +
                        else:
         | 
| 180 | 
            +
                            current_sub_tokens.append(token)
         | 
| 181 | 
            +
                            prev_is_special = False
         | 
| 182 | 
            +
                    out_string += self.sp_model.decode(current_sub_tokens)
         | 
| 183 | 
            +
                    return out_string
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
         | 
| 186 | 
            +
                    """Build model inputs by adding special tokens to sequences.
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                    Args:
         | 
| 189 | 
            +
                        token_ids_0 (List[int]): List of token IDs for the first sequence.
         | 
| 190 | 
            +
                        token_ids_1 (List[int], optional): List of token IDs for the second sequence.
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                    Returns:
         | 
| 193 | 
            +
                        List[int]: List of token IDs with special tokens added.
         | 
| 194 | 
            +
                    """
         | 
| 195 | 
            +
                    output = token_ids_0
         | 
| 196 | 
            +
                    last_cls_index = -1
         | 
| 197 | 
            +
                    last_sep_index = -1
         | 
| 198 | 
            +
                    if self.cls_token_id in output:
         | 
| 199 | 
            +
                        last_cls_index = len(output) - output[::-1].index(self.cls_token_id) - 1
         | 
| 200 | 
            +
                    if self.sep_token_id in output:
         | 
| 201 | 
            +
                        last_sep_index = len(output) - output[::-1].index(self.sep_token_id) - 1
         | 
| 202 | 
            +
             | 
| 203 | 
            +
                    if last_cls_index > last_sep_index:
         | 
| 204 | 
            +
                        next_token_id = self.sep_token_id
         | 
| 205 | 
            +
                    elif last_sep_index > last_cls_index:
         | 
| 206 | 
            +
                        next_token_id = self.cls_token_id
         | 
| 207 | 
            +
                    else:
         | 
| 208 | 
            +
                        output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
         | 
| 209 | 
            +
                        next_token_id = self.cls_token_id
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                    output = [self.bos_token_id] + output
         | 
| 212 | 
            +
                    # Assume no markup in text if token_ids_1 is given.
         | 
| 213 | 
            +
                    if token_ids_1 is not None:
         | 
| 214 | 
            +
                        output = output + token_ids_1 + [next_token_id]
         | 
| 215 | 
            +
                    return output
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                def get_special_tokens_mask(
         | 
| 218 | 
            +
                    self, token_ids_0, token_ids_1=None, already_has_special_tokens=False
         | 
| 219 | 
            +
                ):
         | 
| 220 | 
            +
                    """Get a mask showing which tokens are special tokens.
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                    Args:
         | 
| 223 | 
            +
                        token_ids_0 (List[int]): List of token IDs for the first sequence.
         | 
| 224 | 
            +
                        token_ids_1 (List[int], optional): List of token IDs for the second sequence.
         | 
| 225 | 
            +
                        already_has_special_tokens (bool): Whether the tokens already include special tokens.
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                    Returns:
         | 
| 228 | 
            +
                        List[int]: A mask where 1 indicates special tokens and 0 indicates regular tokens.
         | 
| 229 | 
            +
                    """
         | 
| 230 | 
            +
                    if already_has_special_tokens:
         | 
| 231 | 
            +
                        return super().get_special_tokens_mask(
         | 
| 232 | 
            +
                            token_ids_0, token_ids_1, already_has_special_tokens=True
         | 
| 233 | 
            +
                        )
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    # [bos_token, cls_token, tokens_0, sep_token]
         | 
| 236 | 
            +
                    if token_ids_1 is None:
         | 
| 237 | 
            +
                        return [1, 1] + ([0] * len(token_ids_0)) + [1]
         | 
| 238 | 
            +
                    # [bos_token, cls_token, tokens_0, sep_token, tokens_1, cls_token]
         | 
| 239 | 
            +
                    return [1, 1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                def save_vocabulary(
         | 
| 242 | 
            +
                    self, save_directory, filename_prefix: Optional[str] = None
         | 
| 243 | 
            +
                ) -> Tuple[str]:
         | 
| 244 | 
            +
                    """
         | 
| 245 | 
            +
                    Save the vocabulary and special tokens file to a directory.
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                    Args:
         | 
| 248 | 
            +
                        save_directory (str): The directory in which to save the vocabulary.
         | 
| 249 | 
            +
                        filename_prefix (Optional[str]): Optional prefix for the saved filename.
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                    Returns:
         | 
| 252 | 
            +
                        Tuple[str]: Paths to the files saved.
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                    Raises:
         | 
| 255 | 
            +
                        ValueError: If the save_directory is not a valid directory.
         | 
| 256 | 
            +
                    """
         | 
| 257 | 
            +
                    if not os.path.isdir(save_directory):
         | 
| 258 | 
            +
                        logger.error(f"Vocabulary path ({save_directory}) should be a directory")
         | 
| 259 | 
            +
                        return
         | 
| 260 | 
            +
                    out_vocab_file = os.path.join(
         | 
| 261 | 
            +
                        save_directory,
         | 
| 262 | 
            +
                        (filename_prefix + "-" if filename_prefix else "")
         | 
| 263 | 
            +
                        + self.vocab_files_names["vocab_file"],
         | 
| 264 | 
            +
                    )
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    if os.path.abspath(self.vocab_file) != os.path.abspath(
         | 
| 267 | 
            +
                        out_vocab_file
         | 
| 268 | 
            +
                    ) and os.path.isfile(self.vocab_file):
         | 
| 269 | 
            +
                        copyfile(self.vocab_file, out_vocab_file)
         | 
| 270 | 
            +
                    elif not os.path.isfile(self.vocab_file):
         | 
| 271 | 
            +
                        with open(out_vocab_file, "wb") as fi:
         | 
| 272 | 
            +
                            content_spiece_model = self.sp_model.serialized_model_proto()
         | 
| 273 | 
            +
                            fi.write(content_spiece_model)
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                    return (out_vocab_file,)
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                def _pad(
         | 
| 278 | 
            +
                    self,
         | 
| 279 | 
            +
                    encoded_inputs: Union[Dict],
         | 
| 280 | 
            +
                    max_length: Optional[int] = None,
         | 
| 281 | 
            +
                    padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
         | 
| 282 | 
            +
                    pad_to_multiple_of: Optional[int] = None,
         | 
| 283 | 
            +
                    padding_side: Optional[str] = None,
         | 
| 284 | 
            +
                    return_attention_mask: Optional[bool] = None,
         | 
| 285 | 
            +
                ) -> dict:
         | 
| 286 | 
            +
                    """
         | 
| 287 | 
            +
                    Pad encoded inputs according to specified strategy.
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                    Args:
         | 
| 290 | 
            +
                        encoded_inputs (Union[Dict]): Dictionary of encoded inputs.
         | 
| 291 | 
            +
                        max_length (Optional[int]): Maximum length to pad to.
         | 
| 292 | 
            +
                        padding_strategy (PaddingStrategy): Strategy for padding.
         | 
| 293 | 
            +
                        pad_to_multiple_of (Optional[int]): Pad to a multiple of this value.
         | 
| 294 | 
            +
                        return_attention_mask (Optional[bool]): Whether to return attention mask.
         | 
| 295 | 
            +
             | 
| 296 | 
            +
                    Returns:
         | 
| 297 | 
            +
                        dict: Dictionary with padded inputs and optional attention mask.
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                    Raises:
         | 
| 300 | 
            +
                        ValueError: If attention_mask has unexpected type or invalid padding strategy.
         | 
| 301 | 
            +
                    """
         | 
| 302 | 
            +
                    if return_attention_mask is None:
         | 
| 303 | 
            +
                        return_attention_mask = "attention_mask" in self.model_input_names
         | 
| 304 | 
            +
                    if return_attention_mask:
         | 
| 305 | 
            +
                        required_input = encoded_inputs[self.model_input_names[0]]
         | 
| 306 | 
            +
                        if padding_strategy == PaddingStrategy.LONGEST:
         | 
| 307 | 
            +
                            max_length = len(required_input)
         | 
| 308 | 
            +
                        if (
         | 
| 309 | 
            +
                            max_length is not None
         | 
| 310 | 
            +
                            and pad_to_multiple_of is not None
         | 
| 311 | 
            +
                            and (max_length % pad_to_multiple_of != 0)
         | 
| 312 | 
            +
                        ):
         | 
| 313 | 
            +
                            max_length = (
         | 
| 314 | 
            +
                                (max_length // pad_to_multiple_of) + 1
         | 
| 315 | 
            +
                            ) * pad_to_multiple_of
         | 
| 316 | 
            +
                        needs_to_be_padded = (
         | 
| 317 | 
            +
                            padding_strategy != PaddingStrategy.DO_NOT_PAD
         | 
| 318 | 
            +
                            and len(required_input) != max_length
         | 
| 319 | 
            +
                        )
         | 
| 320 | 
            +
             | 
| 321 | 
            +
                        if (
         | 
| 322 | 
            +
                            "attention_mask" in encoded_inputs
         | 
| 323 | 
            +
                            and encoded_inputs["attention_mask"] is not None
         | 
| 324 | 
            +
                        ):
         | 
| 325 | 
            +
                            attention_mask = encoded_inputs.pop("attention_mask")
         | 
| 326 | 
            +
                            if isinstance(attention_mask, torch.Tensor):
         | 
| 327 | 
            +
                                attention_mask = attention_mask.numpy()
         | 
| 328 | 
            +
                            elif isinstance(attention_mask, list):
         | 
| 329 | 
            +
                                attention_mask = np.array(attention_mask)
         | 
| 330 | 
            +
                            elif not isinstance(attention_mask, np.ndarray):
         | 
| 331 | 
            +
                                raise ValueError(
         | 
| 332 | 
            +
                                    f"Unexpected type {type(attention_mask)} of attention_mask, "
         | 
| 333 | 
            +
                                )
         | 
| 334 | 
            +
                        else:
         | 
| 335 | 
            +
                            # Create default attention mask if none provided
         | 
| 336 | 
            +
                            attention_mask = np.tril(
         | 
| 337 | 
            +
                                np.ones((len(required_input), len(required_input)), dtype=np.int64)
         | 
| 338 | 
            +
                            )
         | 
| 339 | 
            +
                            attention_mask = np.expand_dims(attention_mask, axis=0)
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                        if needs_to_be_padded:
         | 
| 342 | 
            +
                            difference = max_length - len(required_input)
         | 
| 343 | 
            +
                            if self.padding_side == "right":
         | 
| 344 | 
            +
                                if attention_mask.ndim == 1:
         | 
| 345 | 
            +
                                    pad_width = [(0, difference)]
         | 
| 346 | 
            +
                                else:
         | 
| 347 | 
            +
                                    pad_width = [(0, 0), (0, difference), (0, difference)]
         | 
| 348 | 
            +
                            elif self.padding_side == "left":
         | 
| 349 | 
            +
                                if attention_mask.ndim == 1:
         | 
| 350 | 
            +
                                    pad_width = [(difference, 0)]
         | 
| 351 | 
            +
                                else:
         | 
| 352 | 
            +
                                    pad_width = [(0, 0), (difference, 0), (difference, 0)]
         | 
| 353 | 
            +
                            else:
         | 
| 354 | 
            +
                                raise ValueError(
         | 
| 355 | 
            +
                                    "Invalid padding strategy:" + str(self.padding_side)
         | 
| 356 | 
            +
                                )
         | 
| 357 | 
            +
                            attention_mask = np.pad(
         | 
| 358 | 
            +
                                attention_mask,
         | 
| 359 | 
            +
                                pad_width=pad_width,
         | 
| 360 | 
            +
                                mode="constant",
         | 
| 361 | 
            +
                                constant_values=0,
         | 
| 362 | 
            +
                            )
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                    encoded_inputs = super()._pad(
         | 
| 365 | 
            +
                        encoded_inputs,
         | 
| 366 | 
            +
                        max_length,
         | 
| 367 | 
            +
                        padding_strategy=padding_strategy,
         | 
| 368 | 
            +
                        pad_to_multiple_of=pad_to_multiple_of,
         | 
| 369 | 
            +
                        return_attention_mask=False,
         | 
| 370 | 
            +
                    )
         | 
| 371 | 
            +
                    if return_attention_mask:
         | 
| 372 | 
            +
                        encoded_inputs["attention_mask"] = attention_mask.tolist()
         | 
| 373 | 
            +
                    return encoded_inputs
         | 
    	
        tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f8c2d29fa0b8c4b43f7c83ef8dc204a84e5bb61b1f98081b4c56d119e91d6ad1
         | 
| 3 | 
            +
            size 11185537
         | 
    	
        tokenizer.model
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:34ef7db83df785924fb83d7b887b6e822a031c56e15cff40aaf9b982988180df
         | 
| 3 | 
            +
            size 1614363
         | 
    	
        tokenizer_config.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  |