zR
commited on
Add files using upload-large-folder tool
Browse files- tokenizer.json +2 -2
- tokenizer_config.json +9 -5
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9340665016419c825c4bdabbcc9acc43b7ca2c68ce142724afa829abb1be5efd
|
| 3 |
+
size 19970699
|
tokenizer_config.json
CHANGED
|
@@ -150,7 +150,7 @@
|
|
| 150 |
"normalized": false,
|
| 151 |
"rstrip": false,
|
| 152 |
"single_word": false,
|
| 153 |
-
"special":
|
| 154 |
},
|
| 155 |
"151348": {
|
| 156 |
"content": "<|code_middle|>",
|
|
@@ -158,7 +158,7 @@
|
|
| 158 |
"normalized": false,
|
| 159 |
"rstrip": false,
|
| 160 |
"single_word": false,
|
| 161 |
-
"special":
|
| 162 |
},
|
| 163 |
"151349": {
|
| 164 |
"content": "<|code_suffix|>",
|
|
@@ -166,7 +166,7 @@
|
|
| 166 |
"normalized": false,
|
| 167 |
"rstrip": false,
|
| 168 |
"single_word": false,
|
| 169 |
-
"special":
|
| 170 |
},
|
| 171 |
"151350": {
|
| 172 |
"content": "<think>",
|
|
@@ -254,7 +254,7 @@
|
|
| 254 |
"normalized": false,
|
| 255 |
"rstrip": false,
|
| 256 |
"single_word": false,
|
| 257 |
-
"special":
|
| 258 |
},
|
| 259 |
"151361": {
|
| 260 |
"content": "<|begin_of_box|>",
|
|
@@ -307,7 +307,11 @@
|
|
| 307 |
"<|begin_of_audio|>",
|
| 308 |
"<|end_of_audio|>",
|
| 309 |
"<|begin_of_transcription|>",
|
| 310 |
-
"<|end_of_transcription|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
],
|
| 312 |
"clean_up_tokenization_spaces": false,
|
| 313 |
"do_lower_case": false,
|
|
|
|
| 150 |
"normalized": false,
|
| 151 |
"rstrip": false,
|
| 152 |
"single_word": false,
|
| 153 |
+
"special": true
|
| 154 |
},
|
| 155 |
"151348": {
|
| 156 |
"content": "<|code_middle|>",
|
|
|
|
| 158 |
"normalized": false,
|
| 159 |
"rstrip": false,
|
| 160 |
"single_word": false,
|
| 161 |
+
"special": true
|
| 162 |
},
|
| 163 |
"151349": {
|
| 164 |
"content": "<|code_suffix|>",
|
|
|
|
| 166 |
"normalized": false,
|
| 167 |
"rstrip": false,
|
| 168 |
"single_word": false,
|
| 169 |
+
"special": true
|
| 170 |
},
|
| 171 |
"151350": {
|
| 172 |
"content": "<think>",
|
|
|
|
| 254 |
"normalized": false,
|
| 255 |
"rstrip": false,
|
| 256 |
"single_word": false,
|
| 257 |
+
"special": true
|
| 258 |
},
|
| 259 |
"151361": {
|
| 260 |
"content": "<|begin_of_box|>",
|
|
|
|
| 307 |
"<|begin_of_audio|>",
|
| 308 |
"<|end_of_audio|>",
|
| 309 |
"<|begin_of_transcription|>",
|
| 310 |
+
"<|end_of_transcription|>",
|
| 311 |
+
"<|code_prefix|>",
|
| 312 |
+
"<|code_middle|>",
|
| 313 |
+
"<|code_suffix|>",
|
| 314 |
+
"/nothink"
|
| 315 |
],
|
| 316 |
"clean_up_tokenization_spaces": false,
|
| 317 |
"do_lower_case": false,
|