TiberiuCristianLeon commited on
Commit
73d7da5
·
verified ·
1 Parent(s): 32717a9

Upload 2 files

Browse files
Files changed (2) hide show
  1. isolanguages.parquet +3 -0
  2. languagecodes.py +745 -0
isolanguages.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93401c6c3822e82ce2d8c79f5365a61c907d76fec54115360d5380360409bba4
3
+ size 6595
languagecodes.py ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nllb_language_codes: dict[str, str] = {
2
+ "Acehnese (Arabic script)": "ace_Arab",
3
+ "Acehnese (Latin script)": "ace_Latn",
4
+ "Mesopotamian Arabic": "acm_Arab",
5
+ "Ta’izzi-Adeni Arabic": "acq_Arab",
6
+ "Tunisian Arabic": "aeb_Arab",
7
+ "Afrikaans": "afr_Latn",
8
+ "South Levantine Arabic": "ajp_Arab",
9
+ "Akan": "aka_Latn",
10
+ "Amharic": "amh_Ethi",
11
+ "North Levantine Arabic": "apc_Arab",
12
+ "Modern Standard Arabic": "arb_Arab",
13
+ "Modern Standard Arabic (Romanized)": "arb_Latn",
14
+ "Najdi Arabic": "ars_Arab",
15
+ "Moroccan Arabic": "ary_Arab",
16
+ "Egyptian Arabic": "arz_Arab",
17
+ "Assamese": "asm_Beng",
18
+ "Asturian": "ast_Latn",
19
+ "Awadhi": "awa_Deva",
20
+ "Central Aymara": "ayr_Latn",
21
+ "South Azerbaijani": "azb_Arab",
22
+ "North Azerbaijani": "azj_Latn",
23
+ "Bashkir": "bak_Cyrl",
24
+ "Bambara": "bam_Latn",
25
+ "Balinese": "ban_Latn",
26
+ "Belarusian": "bel_Cyrl",
27
+ "Bemba": "bem_Latn",
28
+ "Bengali": "ben_Beng",
29
+ "Bhojpuri": "bho_Deva",
30
+ "Banjar (Arabic script)": "bjn_Arab",
31
+ "Banjar (Latin script)": "bjn_Latn",
32
+ "Standard Tibetan": "bod_Tibt",
33
+ "Bosnian": "bos_Latn",
34
+ "Buginese": "bug_Latn",
35
+ "Bulgarian": "bul_Cyrl",
36
+ "Catalan": "cat_Latn",
37
+ "Cebuano": "ceb_Latn",
38
+ "Czech": "ces_Latn",
39
+ "Chokwe": "cjk_Latn",
40
+ "Central Kurdish": "ckb_Arab",
41
+ "Crimean Tatar": "crh_Latn",
42
+ "Welsh": "cym_Latn",
43
+ "Danish": "dan_Latn",
44
+ "German": "deu_Latn",
45
+ "Southwestern Dinka": "dik_Latn",
46
+ "Dyula": "dyu_Latn",
47
+ "Dzongkha": "dzo_Tibt",
48
+ "Greek": "ell_Grek",
49
+ "English": "eng_Latn",
50
+ "Esperanto": "epo_Latn",
51
+ "Estonian": "est_Latn",
52
+ "Basque": "eus_Latn",
53
+ "Ewe": "ewe_Latn",
54
+ "Faroese": "fao_Latn",
55
+ "Fijian": "fij_Latn",
56
+ "Finnish": "fin_Latn",
57
+ "Fon": "fon_Latn",
58
+ "French": "fra_Latn",
59
+ "Friulian": "fur_Latn",
60
+ "Nigerian Fulfulde": "fuv_Latn",
61
+ "Scottish Gaelic": "gla_Latn",
62
+ "Irish": "gle_Latn",
63
+ "Galician": "glg_Latn",
64
+ "Guarani": "grn_Latn",
65
+ "Gujarati": "guj_Gujr",
66
+ "Haitian Creole": "hat_Latn",
67
+ "Hausa": "hau_Latn",
68
+ "Hebrew": "heb_Hebr",
69
+ "Hindi": "hin_Deva",
70
+ "Chhattisgarhi": "hne_Deva",
71
+ "Croatian": "hrv_Latn",
72
+ "Hungarian": "hun_Latn",
73
+ "Armenian": "hye_Armn",
74
+ "Igbo": "ibo_Latn",
75
+ "Ilocano": "ilo_Latn",
76
+ "Indonesian": "ind_Latn",
77
+ "Icelandic": "isl_Latn",
78
+ "Italian": "ita_Latn",
79
+ "Javanese": "jav_Latn",
80
+ "Japanese": "jpn_Jpan",
81
+ "Kabyle": "kab_Latn",
82
+ "Jingpho": "kac_Latn",
83
+ "Kamba": "kam_Latn",
84
+ "Kannada": "kan_Knda",
85
+ "Kashmiri (Arabic script)": "kas_Arab",
86
+ "Kashmiri (Devanagari script)": "kas_Deva",
87
+ "Georgian": "kat_Geor",
88
+ "Central Kanuri (Arabic script)": "knc_Arab",
89
+ "Central Kanuri (Latin script)": "knc_Latn",
90
+ "Kazakh": "kaz_Cyrl",
91
+ "Kabiyè": "kbp_Latn",
92
+ "Kabuverdianu": "kea_Latn",
93
+ "Khmer": "khm_Khmr",
94
+ "Kikuyu": "kik_Latn",
95
+ "Kinyarwanda": "kin_Latn",
96
+ "Kyrgyz": "kir_Cyrl",
97
+ "Kimbundu": "kmb_Latn",
98
+ "Northern Kurdish": "kmr_Latn",
99
+ "Kikongo": "kon_Latn",
100
+ "Korean": "kor_Hang",
101
+ "Lao": "lao_Laoo",
102
+ "Ligurian": "lij_Latn",
103
+ "Limburgish": "lim_Latn",
104
+ "Lingala": "lin_Latn",
105
+ "Lithuanian": "lit_Latn",
106
+ "Lombard": "lmo_Latn",
107
+ "Latgalian": "ltg_Latn",
108
+ "Luxembourgish": "ltz_Latn",
109
+ "Luba-Kasai": "lua_Latn",
110
+ "Ganda": "lug_Latn",
111
+ "Luo": "luo_Latn",
112
+ "Mizo": "lus_Latn",
113
+ "Standard Latvian": "lvs_Latn",
114
+ "Magahi": "mag_Deva",
115
+ "Maithili": "mai_Deva",
116
+ "Malayalam": "mal_Mlym",
117
+ "Marathi": "mar_Deva",
118
+ "Minangkabau (Arabic script)": "min_Arab",
119
+ "Minangkabau (Latin script)": "min_Latn",
120
+ "Macedonian": "mkd_Cyrl",
121
+ "Plateau Malagasy": "plt_Latn",
122
+ "Maltese": "mlt_Latn",
123
+ "Meitei (Bengali script)": "mni_Beng",
124
+ "Halh Mongolian": "khk_Cyrl",
125
+ "Mossi": "mos_Latn",
126
+ "Maori": "mri_Latn",
127
+ "Burmese": "mya_Mymr",
128
+ "Dutch": "nld_Latn",
129
+ "Norwegian Nynorsk": "nno_Latn",
130
+ "Norwegian Bokmål": "nob_Latn",
131
+ "Nepali": "npi_Deva",
132
+ "Northern Sotho": "nso_Latn",
133
+ "Nuer": "nus_Latn",
134
+ "Nyanja": "nya_Latn",
135
+ "Occitan": "oci_Latn",
136
+ "West Central Oromo": "gaz_Latn",
137
+ "Odia": "ory_Orya",
138
+ "Pangasinan": "pag_Latn",
139
+ "Eastern Panjabi": "pan_Guru",
140
+ "Papiamento": "pap_Latn",
141
+ "Western Persian": "pes_Arab",
142
+ "Polish": "pol_Latn",
143
+ "Portuguese": "por_Latn",
144
+ "Dari": "prs_Arab",
145
+ "Southern Pashto": "pbt_Arab",
146
+ "Ayacucho Quechua": "quy_Latn",
147
+ "Romanian": "ron_Latn",
148
+ "Rundi": "run_Latn",
149
+ "Russian": "rus_Cyrl",
150
+ "Sango": "sag_Latn",
151
+ "Sanskrit": "san_Deva",
152
+ "Santali": "sat_Olck",
153
+ "Sicilian": "scn_Latn",
154
+ "Shan": "shn_Mymr",
155
+ "Sinhala": "sin_Sinh",
156
+ "Slovak": "slk_Latn",
157
+ "Slovenian": "slv_Latn",
158
+ "Samoan": "smo_Latn",
159
+ "Shona": "sna_Latn",
160
+ "Sindhi": "snd_Arab",
161
+ "Somali": "som_Latn",
162
+ "Southern Sotho": "sot_Latn",
163
+ "Spanish": "spa_Latn",
164
+ "Tosk Albanian": "als_Latn",
165
+ "Sardinian": "srd_Latn",
166
+ "Serbian": "srp_Cyrl",
167
+ "Swati": "ssw_Latn",
168
+ "Sundanese": "sun_Latn",
169
+ "Swedish": "swe_Latn",
170
+ "Swahili": "swh_Latn",
171
+ "Silesian": "szl_Latn",
172
+ "Tamil": "tam_Taml",
173
+ "Tatar": "tat_Cyrl",
174
+ "Telugu": "tel_Telu",
175
+ "Tajik": "tgk_Cyrl",
176
+ "Tagalog": "tgl_Latn",
177
+ "Thai": "tha_Thai",
178
+ "Tigrinya": "tir_Ethi",
179
+ "Tamasheq (Latin script)": "taq_Latn",
180
+ "Tamasheq (Tifinagh script)": "taq_Tfng",
181
+ "Tok Pisin": "tpi_Latn",
182
+ "Tswana": "tsn_Latn",
183
+ "Tsonga": "tso_Latn",
184
+ "Turkmen": "tuk_Latn",
185
+ "Tumbuka": "tum_Latn",
186
+ "Turkish": "tur_Latn",
187
+ "Twi": "twi_Latn",
188
+ "Central Atlas Tamazight": "tzm_Tfng",
189
+ "Uyghur": "uig_Arab",
190
+ "Ukrainian": "ukr_Cyrl",
191
+ "Umbundu": "umb_Latn",
192
+ "Urdu": "urd_Arab",
193
+ "Northern Uzbek": "uzn_Latn",
194
+ "Venetian": "vec_Latn",
195
+ "Vietnamese": "vie_Latn",
196
+ "Waray": "war_Latn",
197
+ "Wolof": "wol_Latn",
198
+ "Xhosa": "xho_Latn",
199
+ "Eastern Yiddish": "ydd_Hebr",
200
+ "Yoruba": "yor_Latn",
201
+ "Yue Chinese": "yue_Hant",
202
+ "Chinese (Simplified)": "zho_Hans",
203
+ "Chinese (Traditional)": "zho_Hant",
204
+ "Standard Malay": "zsm_Latn",
205
+ "Zulu": "zul_Latn",
206
+ }
207
+ mbart_large_languages: dict[str, str] = {
208
+ 'Arabic': 'ar_AR',
209
+ 'Czech': 'cs_CZ',
210
+ 'German': 'de_DE',
211
+ 'English': 'en_XX',
212
+ 'Spanish': 'es_XX',
213
+ 'Estonian': 'et_EE',
214
+ 'Finnish': 'fi_FI',
215
+ 'French': 'fr_XX',
216
+ 'Gujarati': 'gu_IN',
217
+ 'Hindi': 'hi_IN',
218
+ 'Italian': 'it_IT',
219
+ 'Japanese': 'ja_XX',
220
+ 'Kazakh': 'kk_KZ',
221
+ 'Korean': 'ko_KR',
222
+ 'Lithuanian': 'lt_LT',
223
+ 'Latvian': 'lv_LV',
224
+ 'Burmese': 'my_MM',
225
+ 'Nepali': 'ne_NP',
226
+ 'Dutch': 'nl_XX',
227
+ 'Romanian': 'ro_RO',
228
+ 'Russian': 'ru_RU',
229
+ 'Sinhala': 'si_LK',
230
+ 'Turkish': 'tr_TR',
231
+ 'Vietnamese': 'vi_VN',
232
+ 'Chinese': 'zh_CN',
233
+ 'Afrikaans': 'af_ZA',
234
+ 'Azerbaijani': 'az_AZ',
235
+ 'Bengali': 'bn_IN',
236
+ 'Persian': 'fa_IR',
237
+ 'Hebrew': 'he_IL',
238
+ 'Croatian': 'hr_HR',
239
+ 'Indonesian': 'id_ID',
240
+ 'Georgian': 'ka_GE',
241
+ 'Khmer': 'km_KH',
242
+ 'Macedonian': 'mk_MK',
243
+ 'Malayalam': 'ml_IN',
244
+ 'Mongolian': 'mn_MN',
245
+ 'Marathi': 'mr_IN',
246
+ 'Polish': 'pl_PL',
247
+ 'Pashto': 'ps_AF',
248
+ 'Portuguese': 'pt_XX',
249
+ 'Swedish': 'sv_SE',
250
+ 'Swahili': 'sw_KE',
251
+ 'Tamil': 'ta_IN',
252
+ 'Telugu': 'te_IN',
253
+ 'Thai': 'th_TH',
254
+ 'Tagalog': 'tl_XX',
255
+ 'Ukrainian': 'uk_UA',
256
+ 'Urdu': 'ur_PK',
257
+ 'Xhosa': 'xh_ZA',
258
+ 'Galician': 'gl_ES',
259
+ 'Slovene': 'sl_SI'
260
+ }
261
+ # language code system: ISO 639-1 standard, two-letter codes to represent languages
262
+ iso_languages: dict[str, str] = {
263
+ "Afrikaans": "af",
264
+ "Albanian": "sq",
265
+ "Amharic": "am",
266
+ "Arabic": "ar",
267
+ "Armenian": "hy",
268
+ "Azerbaijani": "az",
269
+ "Basque": "eu",
270
+ "Belarusian": "be",
271
+ "Bengali": "bn",
272
+ "Bosnian": "bs",
273
+ "Bulgarian": "bg",
274
+ "Catalan": "ca",
275
+ "Cebuano": "ceb",
276
+ "Chinese (Simplified)": "zh-CN",
277
+ "Chinese (Traditional)": "zh-TW",
278
+ "Chinese": "zh",
279
+ "Corsican": "co",
280
+ "Croatian": "hr",
281
+ "Czech": "cs",
282
+ "Danish": "da",
283
+ "Dutch": "nl",
284
+ "English": "en",
285
+ "Esperanto": "eo",
286
+ "Estonian": "et",
287
+ "Finnish": "fi",
288
+ "French": "fr",
289
+ "Galician": "gl",
290
+ "Georgian": "ka",
291
+ "German": "de",
292
+ "Greek": "el",
293
+ "Gujarati": "gu",
294
+ "Haitian Creole": "ht",
295
+ "Hausa": "ha",
296
+ "Hawaiian": "haw",
297
+ "Hebrew": "he",
298
+ "Hindi": "hi",
299
+ "Hungarian": "hu",
300
+ "Icelandic": "is",
301
+ "Igbo": "ig",
302
+ "Indonesian": "id",
303
+ "Irish": "ga",
304
+ "Italian": "it",
305
+ "Japanese": "ja",
306
+ "Javanese": "jv",
307
+ "Kannada": "kn",
308
+ "Kazakh": "kk",
309
+ "Khmer": "km",
310
+ "Kinyarwanda": "rw",
311
+ "Korean": "ko",
312
+ "Kurdish (Kurmanji)": "ku",
313
+ "Kyrgyz": "ky",
314
+ "Lao": "lo",
315
+ "Latin": "la",
316
+ "Latvian": "lv",
317
+ "Lithuanian": "lt",
318
+ "Luxembourgish": "lb",
319
+ "Macedonian": "mk",
320
+ "Malagasy": "mg",
321
+ "Malay": "ms",
322
+ "Malayalam": "ml",
323
+ "Maltese": "mt",
324
+ "Maori": "mi",
325
+ "Marathi": "mr",
326
+ "Mongolian": "mn",
327
+ "Myanmar (Burmese)": "my",
328
+ "Nepali": "ne",
329
+ "Norwegian": "no",
330
+ "Nyanja (Chichewa)": "ny",
331
+ "Odia (Oriya)": "or",
332
+ "Pashto": "ps",
333
+ "Persian": "fa",
334
+ "Polish": "pl",
335
+ "Portuguese": "pt",
336
+ "Punjabi": "pa",
337
+ "Romanian": "ro",
338
+ "Russian": "ru",
339
+ "Samoan": "sm",
340
+ "Scots Gaelic": "gd",
341
+ "Serbian": "sr",
342
+ "Sesotho": "st",
343
+ "Shona": "sn",
344
+ "Sindhi": "sd",
345
+ "Sinhala": "si",
346
+ "Slovak": "sk",
347
+ "Slovenian": "sl",
348
+ "Somali": "so",
349
+ "Spanish": "es",
350
+ "Sundanese": "su",
351
+ "Swahili": "sw",
352
+ "Swedish": "sv",
353
+ "Tagalog (Filipino)": "tl",
354
+ "Tajik": "tg",
355
+ "Tamil": "ta",
356
+ "Tatar": "tt",
357
+ "Telugu": "te",
358
+ "Thai": "th",
359
+ "Turkish": "tr",
360
+ "Turkmen": "tk",
361
+ "Ukrainian": "uk",
362
+ "Urdu": "ur",
363
+ "Uyghur": "ug",
364
+ "Uzbek": "uz",
365
+ "Vietnamese": "vi",
366
+ "Welsh": "cy",
367
+ "Xhosa": "xh",
368
+ "Yiddish": "yi",
369
+ "Yoruba": "yo",
370
+ "Zulu": "zu"
371
+ }
372
+
373
+ # language codes dict sorted by language name
374
+ iso_languages_byname: dict[str, tuple[str, str, str]] = {
375
+ 'Abkhazian': ('ab', 'abk', 'abk'),
376
+ 'Afar': ('aa', 'aar', 'aar'),
377
+ 'Afrikaans': ('af', 'afr', 'afr'),
378
+ 'Akan': ('ak', 'aka', 'aka'),
379
+ 'Albanian': ('sq', 'alb', 'sqi'),
380
+ 'Amharic': ('am', 'amh', 'amh'),
381
+ 'Arabic': ('ar', 'ara', 'ara'),
382
+ 'Aragonese': ('an', 'arg', 'arg'),
383
+ 'Armenian': ('hy', 'arm', 'hye'),
384
+ 'Assamese': ('as', 'asm', 'asm'),
385
+ 'Avaric': ('av', 'ava', 'ava'),
386
+ 'Avestan': ('ae', 'ave', 'ave'),
387
+ 'Aymara': ('ay', 'aym', 'aym'),
388
+ 'Azerbaijani': ('az', 'aze', 'aze'),
389
+ 'Bambara': ('bm', 'bam', 'bam'),
390
+ 'Bashkir': ('ba', 'bak', 'bak'),
391
+ 'Basque': ('eu', 'baq', 'eus'),
392
+ 'Belarusian': ('be', 'bel', 'bel'),
393
+ 'Bengali': ('bn', 'ben', 'ben'),
394
+ 'Bislama': ('bi', 'bis', 'bis'),
395
+ 'Bosnian': ('bs', 'bos', 'bos'),
396
+ 'Breton': ('br', 'bre', 'bre'),
397
+ 'Bulgarian': ('bg', 'bul', 'bul'),
398
+ 'Burmese': ('my', 'bur', 'mya'),
399
+ 'Catalan': ('ca', 'cat', 'cat'),
400
+ 'Chamorro': ('ch', 'cha', 'cha'),
401
+ 'Chechen': ('ce', 'che', 'che'),
402
+ 'Chichewa': ('ny', 'nya', 'nya'),
403
+ 'Chinese': ('zh', 'chi', 'zho'),
404
+ 'Church Slavic': ('cu', 'chu', 'chu'),
405
+ 'Chuvash': ('cv', 'chv', 'chv'),
406
+ 'Cornish': ('kw', 'cor', 'cor'),
407
+ 'Corsican': ('co', 'cos', 'cos'),
408
+ 'Cree': ('cr', 'cre', 'cre'),
409
+ 'Croatian': ('hr', 'hrv', 'hrv'),
410
+ 'Czech': ('cs', 'cze', 'ces'),
411
+ 'Danish': ('da', 'dan', 'dan'),
412
+ 'Divehi': ('dv', 'div', 'div'),
413
+ 'Dutch': ('nl', 'dut', 'nld'),
414
+ 'Dzongkha': ('dz', 'dzo', 'dzo'),
415
+ 'English': ('en', 'eng', 'eng'),
416
+ 'Esperanto': ('eo', 'epo', 'epo'),
417
+ 'Estonian': ('et', 'est', 'est'),
418
+ 'Ewe': ('ee', 'ewe', 'ewe'),
419
+ 'Faroese': ('fo', 'fao', 'fao'),
420
+ 'Fijian': ('fj', 'fij', 'fij'),
421
+ 'Finnish': ('fi', 'fin', 'fin'),
422
+ 'French': ('fr', 'fre', 'fra'),
423
+ 'Fulah': ('ff', 'ful', 'ful'),
424
+ 'Galician': ('gl', 'glg', 'glg'),
425
+ 'Ganda': ('lg', 'lug', 'lug'),
426
+ 'Georgian': ('ka', 'geo', 'kat'),
427
+ 'German': ('de', 'ger', 'deu'),
428
+ 'Greek': ('el', 'gre', 'ell'),
429
+ 'Guarani': ('gn', 'grn', 'grn'),
430
+ 'Gujarati': ('gu', 'guj', 'guj'),
431
+ 'Haitian': ('ht', 'hat', 'hat'),
432
+ 'Hausa': ('ha', 'hau', 'hau'),
433
+ 'Hebrew': ('he', 'heb', 'heb'),
434
+ 'Herero': ('hz', 'her', 'her'),
435
+ 'Hindi': ('hi', 'hin', 'hin'),
436
+ 'Hiri Motu': ('ho', 'hmo', 'hmo'),
437
+ 'Hungarian': ('hu', 'hun', 'hun'),
438
+ 'Icelandic': ('is', 'ice', 'isl'),
439
+ 'Ido': ('io', 'ido', 'ido'),
440
+ 'Igbo': ('ig', 'ibo', 'ibo'),
441
+ 'Indonesian': ('id', 'ind', 'ind'),
442
+ 'Interlingua': ('ia', 'ina', 'ina'),
443
+ 'Interlingue': ('ie', 'ile', 'ile'),
444
+ 'Inuktitut': ('iu', 'iku', 'iku'),
445
+ 'Inupiaq': ('ik', 'ipk', 'ipk'),
446
+ 'Irish': ('ga', 'gle', 'gle'),
447
+ 'Italian': ('it', 'ita', 'ita'),
448
+ 'Japanese': ('ja', 'jpn', 'jpn'),
449
+ 'Javanese': ('jv', 'jav', 'jav'),
450
+ 'Kalaallisut': ('kl', 'kal', 'kal'),
451
+ 'Kannada': ('kn', 'kan', 'kan'),
452
+ 'Kanuri': ('kr', 'kau', 'kau'),
453
+ 'Kashmiri': ('ks', 'kas', 'kas'),
454
+ 'Kazakh': ('kk', 'kaz', 'kaz'),
455
+ 'Khmer': ('km', 'khm', 'khm'),
456
+ 'Kikuyu': ('ki', 'kik', 'kik'),
457
+ 'Kinyarwanda': ('rw', 'kin', 'kin'),
458
+ 'Kirghiz': ('ky', 'kir', 'kir'),
459
+ 'Komi': ('kv', 'kom', 'kom'),
460
+ 'Kongo': ('kg', 'kon', 'kon'),
461
+ 'Korean': ('ko', 'kor', 'kor'),
462
+ 'Kuanyama': ('kj', 'kua', 'kua'),
463
+ 'Kurdish': ('ku', 'kur', 'kur'),
464
+ 'Lao': ('lo', 'lao', 'lao'),
465
+ 'Latin': ('la', 'lat', 'lat'),
466
+ 'Latvian': ('lv', 'lav', 'lav'),
467
+ 'Limburgan': ('li', 'lim', 'lim'),
468
+ 'Lingala': ('ln', 'lin', 'lin'),
469
+ 'Lithuanian': ('lt', 'lit', 'lit'),
470
+ 'Luba-Katanga': ('lu', 'lub', 'lub'),
471
+ 'Luxembourgish': ('lb', 'ltz', 'ltz'),
472
+ 'Macedonian': ('mk', 'mac', 'mkd'),
473
+ 'Malagasy': ('mg', 'mlg', 'mlg'),
474
+ 'Malay': ('ms', 'may', 'msa'),
475
+ 'Malayalam': ('ml', 'mal', 'mal'),
476
+ 'Maltese': ('mt', 'mlt', 'mlt'),
477
+ 'Manx': ('gv', 'glv', 'glv'),
478
+ 'Maori': ('mi', 'mao', 'mri'),
479
+ 'Marathi': ('mr', 'mar', 'mar'),
480
+ 'Marshallese': ('mh', 'mah', 'mah'),
481
+ 'Mongolian': ('mn', 'mon', 'mon'),
482
+ 'Nauru': ('na', 'nau', 'nau'),
483
+ 'Navajo': ('nv', 'nav', 'nav'),
484
+ 'Ndonga': ('ng', 'ndo', 'ndo'),
485
+ 'Nepali': ('ne', 'nep', 'nep'),
486
+ 'North Ndebele': ('nd', 'nde', 'nde'),
487
+ 'Northern Sami': ('se', 'sme', 'sme'),
488
+ 'Norwegian': ('no', 'nor', 'nor'),
489
+ 'Norwegian Bokmål': ('nb', 'nob', 'nob'),
490
+ 'Norwegian Nynorsk': ('nn', 'nno', 'nno'),
491
+ 'Occitan': ('oc', 'oci', 'oci'),
492
+ 'Ojibwa': ('oj', 'oji', 'oji'),
493
+ 'Oriya': ('or', 'ori', 'ori'),
494
+ 'Oromo': ('om', 'orm', 'orm'),
495
+ 'Ossetian': ('os', 'oss', 'oss'),
496
+ 'Pali': ('pi', 'pli', 'pli'),
497
+ 'Panjabi': ('pa', 'pan', 'pan'),
498
+ 'Persian': ('fa', 'per', 'fas'),
499
+ 'Polish': ('pl', 'pol', 'pol'),
500
+ 'Portuguese': ('pt', 'por', 'por'),
501
+ 'Pushto': ('ps', 'pus', 'pus'),
502
+ 'Quechua': ('qu', 'que', 'que'),
503
+ 'Romanian': ('ro', 'rum', 'ron'),
504
+ 'Romansh': ('rm', 'roh', 'roh'),
505
+ 'Rundi': ('rn', 'run', 'run'),
506
+ 'Russian': ('ru', 'rus', 'rus'),
507
+ 'Samoan': ('sm', 'smo', 'smo'),
508
+ 'Sango': ('sg', 'sag', 'sag'),
509
+ 'Sanskrit': ('sa', 'san', 'san'),
510
+ 'Sardinian': ('sc', 'srd', 'srd'),
511
+ 'Scottish Gaelic': ('gd', 'gla', 'gla'),
512
+ 'Serbian': ('sr', 'srp', 'srp'),
513
+ 'Shona': ('sn', 'sna', 'sna'),
514
+ 'Sichuan Yi': ('ii', 'iii', 'iii'),
515
+ 'Sindhi': ('sd', 'snd', 'snd'),
516
+ 'Sinhala': ('si', 'sin', 'sin'),
517
+ 'Slovak': ('sk', 'slo', 'slk'),
518
+ 'Slovenian': ('sl', 'slv', 'slv'),
519
+ 'Somali': ('so', 'som', 'som'),
520
+ 'South Ndebele': ('nr', 'nbl', 'nbl'),
521
+ 'Southern Sotho': ('st', 'sot', 'sot'),
522
+ 'Spanish': ('es', 'spa', 'spa'),
523
+ 'Sundanese': ('su', 'sun', 'sun'),
524
+ 'Swahili': ('sw', 'swa', 'swa'),
525
+ 'Swati': ('ss', 'ssw', 'ssw'),
526
+ 'Swedish': ('sv', 'swe', 'swe'),
527
+ 'Tagalog': ('tl', 'tgl', 'tgl'),
528
+ 'Tahitian': ('ty', 'tah', 'tah'),
529
+ 'Tajik': ('tg', 'tgk', 'tgk'),
530
+ 'Tamil': ('ta', 'tam', 'tam'),
531
+ 'Tatar': ('tt', 'tat', 'tat'),
532
+ 'Telugu': ('te', 'tel', 'tel'),
533
+ 'Thai': ('th', 'tha', 'tha'),
534
+ 'Tibetan': ('bo', 'tib', 'bod'),
535
+ 'Tigrinya': ('ti', 'tir', 'tir'),
536
+ 'Tonga': ('to', 'ton', 'ton'),
537
+ 'Tsonga': ('ts', 'tso', 'tso'),
538
+ 'Tswana': ('tn', 'tsn', 'tsn'),
539
+ 'Turkish': ('tr', 'tur', 'tur'),
540
+ 'Turkmen': ('tk', 'tuk', 'tuk'),
541
+ 'Twi': ('tw', 'twi', 'twi'),
542
+ 'Uighur': ('ug', 'uig', 'uig'),
543
+ 'Ukrainian': ('uk', 'ukr', 'ukr'),
544
+ 'Urdu': ('ur', 'urd', 'urd'),
545
+ 'Uzbek': ('uz', 'uzb', 'uzb'),
546
+ 'Venda': ('ve', 'ven', 'ven'),
547
+ 'Vietnamese': ('vi', 'vie', 'vie'),
548
+ 'Volapük': ('vo', 'vol', 'vol'),
549
+ 'Walloon': ('wa', 'wln', 'wln'),
550
+ 'Welsh': ('cy', 'wel', 'cym'),
551
+ 'Western Frisian': ('fy', 'fry', 'fry'),
552
+ 'Wolof': ('wo', 'wol', 'wol'),
553
+ 'Xhosa': ('xh', 'xho', 'xho'),
554
+ 'Yiddish': ('yi', 'yid', 'yid'),
555
+ 'Yoruba': ('yo', 'yor', 'yor'),
556
+ 'Zhuang': ('za', 'zha', 'zha'),
557
+ 'Zulu': ('zu', 'zul', 'zul')
558
+ }
559
+
560
+ # language codes dict sorted by 2-letter code
561
+ iso_languages_byiso1: dict[str, tuple[str, str, str]] = {
562
+ 'ab': ('Abkhazian', 'abk', 'abk'),
563
+ 'aa': ('Afar', 'aar', 'aar'),
564
+ 'af': ('Afrikaans', 'afr', 'afr'),
565
+ 'ak': ('Akan', 'aka', 'aka'),
566
+ 'sq': ('Albanian', 'alb', 'sqi'),
567
+ 'am': ('Amharic', 'amh', 'amh'),
568
+ 'ar': ('Arabic', 'ara', 'ara'),
569
+ 'an': ('Aragonese', 'arg', 'arg'),
570
+ 'hy': ('Armenian', 'arm', 'hye'),
571
+ 'as': ('Assamese', 'asm', 'asm'),
572
+ 'av': ('Avaric', 'ava', 'ava'),
573
+ 'ae': ('Avestan', 'ave', 'ave'),
574
+ 'ay': ('Aymara', 'aym', 'aym'),
575
+ 'az': ('Azerbaijani', 'aze', 'aze'),
576
+ 'bm': ('Bambara', 'bam', 'bam'),
577
+ 'ba': ('Bashkir', 'bak', 'bak'),
578
+ 'eu': ('Basque', 'baq', 'eus'),
579
+ 'be': ('Belarusian', 'bel', 'bel'),
580
+ 'bn': ('Bengali', 'ben', 'ben'),
581
+ 'bi': ('Bislama', 'bis', 'bis'),
582
+ 'bs': ('Bosnian', 'bos', 'bos'),
583
+ 'br': ('Breton', 'bre', 'bre'),
584
+ 'bg': ('Bulgarian', 'bul', 'bul'),
585
+ 'my': ('Burmese', 'bur', 'mya'),
586
+ 'ca': ('Catalan', 'cat', 'cat'),
587
+ 'ch': ('Chamorro', 'cha', 'cha'),
588
+ 'ce': ('Chechen', 'che', 'che'),
589
+ 'ny': ('Chichewa', 'nya', 'nya'),
590
+ 'zh': ('Chinese', 'chi', 'zho'),
591
+ 'cu': ('Church Slavic', 'chu', 'chu'),
592
+ 'cv': ('Chuvash', 'chv', 'chv'),
593
+ 'kw': ('Cornish', 'cor', 'cor'),
594
+ 'co': ('Corsican', 'cos', 'cos'),
595
+ 'cr': ('Cree', 'cre', 'cre'),
596
+ 'hr': ('Croatian', 'hrv', 'hrv'),
597
+ 'cs': ('Czech', 'cze', 'ces'),
598
+ 'da': ('Danish', 'dan', 'dan'),
599
+ 'dv': ('Divehi', 'div', 'div'),
600
+ 'nl': ('Dutch', 'dut', 'nld'),
601
+ 'dz': ('Dzongkha', 'dzo', 'dzo'),
602
+ 'en': ('English', 'eng', 'eng'),
603
+ 'eo': ('Esperanto', 'epo', 'epo'),
604
+ 'et': ('Estonian', 'est', 'est'),
605
+ 'ee': ('Ewe', 'ewe', 'ewe'),
606
+ 'fo': ('Faroese', 'fao', 'fao'),
607
+ 'fj': ('Fijian', 'fij', 'fij'),
608
+ 'fi': ('Finnish', 'fin', 'fin'),
609
+ 'fr': ('French', 'fre', 'fra'),
610
+ 'ff': ('Fulah', 'ful', 'ful'),
611
+ 'gl': ('Galician', 'glg', 'glg'),
612
+ 'lg': ('Ganda', 'lug', 'lug'),
613
+ 'ka': ('Georgian', 'geo', 'kat'),
614
+ 'de': ('German', 'ger', 'deu'),
615
+ 'el': ('Greek', 'gre', 'ell'),
616
+ 'gn': ('Guarani', 'grn', 'grn'),
617
+ 'gu': ('Gujarati', 'guj', 'guj'),
618
+ 'ht': ('Haitian', 'hat', 'hat'),
619
+ 'ha': ('Hausa', 'hau', 'hau'),
620
+ 'he': ('Hebrew', 'heb', 'heb'),
621
+ 'hz': ('Herero', 'her', 'her'),
622
+ 'hi': ('Hindi', 'hin', 'hin'),
623
+ 'ho': ('Hiri Motu', 'hmo', 'hmo'),
624
+ 'hu': ('Hungarian', 'hun', 'hun'),
625
+ 'is': ('Icelandic', 'ice', 'isl'),
626
+ 'io': ('Ido', 'ido', 'ido'),
627
+ 'ig': ('Igbo', 'ibo', 'ibo'),
628
+ 'id': ('Indonesian', 'ind', 'ind'),
629
+ 'ia': ('Interlingua', 'ina', 'ina'),
630
+ 'ie': ('Interlingue', 'ile', 'ile'),
631
+ 'iu': ('Inuktitut', 'iku', 'iku'),
632
+ 'ik': ('Inupiaq', 'ipk', 'ipk'),
633
+ 'ga': ('Irish', 'gle', 'gle'),
634
+ 'it': ('Italian', 'ita', 'ita'),
635
+ 'ja': ('Japanese', 'jpn', 'jpn'),
636
+ 'jv': ('Javanese', 'jav', 'jav'),
637
+ 'kl': ('Kalaallisut', 'kal', 'kal'),
638
+ 'kn': ('Kannada', 'kan', 'kan'),
639
+ 'kr': ('Kanuri', 'kau', 'kau'),
640
+ 'ks': ('Kashmiri', 'kas', 'kas'),
641
+ 'kk': ('Kazakh', 'kaz', 'kaz'),
642
+ 'km': ('Khmer', 'khm', 'khm'),
643
+ 'ki': ('Kikuyu', 'kik', 'kik'),
644
+ 'rw': ('Kinyarwanda', 'kin', 'kin'),
645
+ 'ky': ('Kirghiz', 'kir', 'kir'),
646
+ 'kv': ('Komi', 'kom', 'kom'),
647
+ 'kg': ('Kongo', 'kon', 'kon'),
648
+ 'ko': ('Korean', 'kor', 'kor'),
649
+ 'kj': ('Kuanyama', 'kua', 'kua'),
650
+ 'ku': ('Kurdish', 'kur', 'kur'),
651
+ 'lo': ('Lao', 'lao', 'lao'),
652
+ 'la': ('Latin', 'lat', 'lat'),
653
+ 'lv': ('Latvian', 'lav', 'lav'),
654
+ 'li': ('Limburgan', 'lim', 'lim'),
655
+ 'ln': ('Lingala', 'lin', 'lin'),
656
+ 'lt': ('Lithuanian', 'lit', 'lit'),
657
+ 'lu': ('Luba-Katanga', 'lub', 'lub'),
658
+ 'lb': ('Luxembourgish', 'ltz', 'ltz'),
659
+ 'mk': ('Macedonian', 'mac', 'mkd'),
660
+ 'mg': ('Malagasy', 'mlg', 'mlg'),
661
+ 'ms': ('Malay', 'may', 'msa'),
662
+ 'ml': ('Malayalam', 'mal', 'mal'),
663
+ 'mt': ('Maltese', 'mlt', 'mlt'),
664
+ 'gv': ('Manx', 'glv', 'glv'),
665
+ 'mi': ('Maori', 'mao', 'mri'),
666
+ 'mr': ('Marathi', 'mar', 'mar'),
667
+ 'mh': ('Marshallese', 'mah', 'mah'),
668
+ 'mn': ('Mongolian', 'mon', 'mon'),
669
+ 'na': ('Nauru', 'nau', 'nau'),
670
+ 'nv': ('Navajo', 'nav', 'nav'),
671
+ 'ng': ('Ndonga', 'ndo', 'ndo'),
672
+ 'ne': ('Nepali', 'nep', 'nep'),
673
+ 'nd': ('North Ndebele', 'nde', 'nde'),
674
+ 'se': ('Northern Sami', 'sme', 'sme'),
675
+ 'no': ('Norwegian', 'nor', 'nor'),
676
+ 'nb': ('Norwegian Bokmål', 'nob', 'nob'),
677
+ 'nn': ('Norwegian Nynorsk', 'nno', 'nno'),
678
+ 'oc': ('Occitan', 'oci', 'oci'),
679
+ 'oj': ('Ojibwa', 'oji', 'oji'),
680
+ 'or': ('Oriya', 'ori', 'ori'),
681
+ 'om': ('Oromo', 'orm', 'orm'),
682
+ 'os': ('Ossetian', 'oss', 'oss'),
683
+ 'pi': ('Pali', 'pli', 'pli'),
684
+ 'pa': ('Panjabi', 'pan', 'pan'),
685
+ 'fa': ('Persian', 'per', 'fas'),
686
+ 'pl': ('Polish', 'pol', 'pol'),
687
+ 'pt': ('Portuguese', 'por', 'por'),
688
+ 'ps': ('Pushto', 'pus', 'pus'),
689
+ 'qu': ('Quechua', 'que', 'que'),
690
+ 'ro': ('Romanian', 'rum', 'ron'),
691
+ 'rm': ('Romansh', 'roh', 'roh'),
692
+ 'rn': ('Rundi', 'run', 'run'),
693
+ 'ru': ('Russian', 'rus', 'rus'),
694
+ 'sm': ('Samoan', 'smo', 'smo'),
695
+ 'sg': ('Sango', 'sag', 'sag'),
696
+ 'sa': ('Sanskrit', 'san', 'san'),
697
+ 'sc': ('Sardinian', 'srd', 'srd'),
698
+ 'gd': ('Scottish Gaelic', 'gla', 'gla'),
699
+ 'sr': ('Serbian', 'srp', 'srp'),
700
+ 'sn': ('Shona', 'sna', 'sna'),
701
+ 'ii': ('Sichuan Yi', 'iii', 'iii'),
702
+ 'sd': ('Sindhi', 'snd', 'snd'),
703
+ 'si': ('Sinhala', 'sin', 'sin'),
704
+ 'sk': ('Slovak', 'slo', 'slk'),
705
+ 'sl': ('Slovenian', 'slv', 'slv'),
706
+ 'so': ('Somali', 'som', 'som'),
707
+ 'nr': ('South Ndebele', 'nbl', 'nbl'),
708
+ 'st': ('Southern Sotho', 'sot', 'sot'),
709
+ 'es': ('Spanish', 'spa', 'spa'),
710
+ 'su': ('Sundanese', 'sun', 'sun'),
711
+ 'sw': ('Swahili', 'swa', 'swa'),
712
+ 'ss': ('Swati', 'ssw', 'ssw'),
713
+ 'sv': ('Swedish', 'swe', 'swe'),
714
+ 'tl': ('Tagalog', 'tgl', 'tgl'),
715
+ 'ty': ('Tahitian', 'tah', 'tah'),
716
+ 'tg': ('Tajik', 'tgk', 'tgk'),
717
+ 'ta': ('Tamil', 'tam', 'tam'),
718
+ 'tt': ('Tatar', 'tat', 'tat'),
719
+ 'te': ('Telugu', 'tel', 'tel'),
720
+ 'th': ('Thai', 'tha', 'tha'),
721
+ 'bo': ('Tibetan', 'tib', 'bod'),
722
+ 'ti': ('Tigrinya', 'tir', 'tir'),
723
+ 'to': ('Tonga', 'ton', 'ton'),
724
+ 'ts': ('Tsonga', 'tso', 'tso'),
725
+ 'tn': ('Tswana', 'tsn', 'tsn'),
726
+ 'tr': ('Turkish', 'tur', 'tur'),
727
+ 'tk': ('Turkmen', 'tuk', 'tuk'),
728
+ 'tw': ('Twi', 'twi', 'twi'),
729
+ 'ug': ('Uighur', 'uig', 'uig'),
730
+ 'uk': ('Ukrainian', 'ukr', 'ukr'),
731
+ 'ur': ('Urdu', 'urd', 'urd'),
732
+ 'uz': ('Uzbek', 'uzb', 'uzb'),
733
+ 've': ('Venda', 'ven', 'ven'),
734
+ 'vi': ('Vietnamese', 'vie', 'vie'),
735
+ 'vo': ('Volapük', 'vol', 'vol'),
736
+ 'wa': ('Walloon', 'wln', 'wln'),
737
+ 'cy': ('Welsh', 'wel', 'cym'),
738
+ 'fy': ('Western Frisian', 'fry', 'fry'),
739
+ 'wo': ('Wolof', 'wol', 'wol'),
740
+ 'xh': ('Xhosa', 'xho', 'xho'),
741
+ 'yi': ('Yiddish', 'yid', 'yid'),
742
+ 'yo': ('Yoruba', 'yor', 'yor'),
743
+ 'za': ('Zhuang', 'zha', 'zha'),
744
+ 'zu': ('Zulu', 'zul', 'zul')
745
+ }