Upload 2 files
Browse files- isolanguages.parquet +3 -0
- languagecodes.py +745 -0
isolanguages.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93401c6c3822e82ce2d8c79f5365a61c907d76fec54115360d5380360409bba4
|
| 3 |
+
size 6595
|
languagecodes.py
ADDED
|
@@ -0,0 +1,745 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nllb_language_codes: dict[str, str] = {
|
| 2 |
+
"Acehnese (Arabic script)": "ace_Arab",
|
| 3 |
+
"Acehnese (Latin script)": "ace_Latn",
|
| 4 |
+
"Mesopotamian Arabic": "acm_Arab",
|
| 5 |
+
"Ta’izzi-Adeni Arabic": "acq_Arab",
|
| 6 |
+
"Tunisian Arabic": "aeb_Arab",
|
| 7 |
+
"Afrikaans": "afr_Latn",
|
| 8 |
+
"South Levantine Arabic": "ajp_Arab",
|
| 9 |
+
"Akan": "aka_Latn",
|
| 10 |
+
"Amharic": "amh_Ethi",
|
| 11 |
+
"North Levantine Arabic": "apc_Arab",
|
| 12 |
+
"Modern Standard Arabic": "arb_Arab",
|
| 13 |
+
"Modern Standard Arabic (Romanized)": "arb_Latn",
|
| 14 |
+
"Najdi Arabic": "ars_Arab",
|
| 15 |
+
"Moroccan Arabic": "ary_Arab",
|
| 16 |
+
"Egyptian Arabic": "arz_Arab",
|
| 17 |
+
"Assamese": "asm_Beng",
|
| 18 |
+
"Asturian": "ast_Latn",
|
| 19 |
+
"Awadhi": "awa_Deva",
|
| 20 |
+
"Central Aymara": "ayr_Latn",
|
| 21 |
+
"South Azerbaijani": "azb_Arab",
|
| 22 |
+
"North Azerbaijani": "azj_Latn",
|
| 23 |
+
"Bashkir": "bak_Cyrl",
|
| 24 |
+
"Bambara": "bam_Latn",
|
| 25 |
+
"Balinese": "ban_Latn",
|
| 26 |
+
"Belarusian": "bel_Cyrl",
|
| 27 |
+
"Bemba": "bem_Latn",
|
| 28 |
+
"Bengali": "ben_Beng",
|
| 29 |
+
"Bhojpuri": "bho_Deva",
|
| 30 |
+
"Banjar (Arabic script)": "bjn_Arab",
|
| 31 |
+
"Banjar (Latin script)": "bjn_Latn",
|
| 32 |
+
"Standard Tibetan": "bod_Tibt",
|
| 33 |
+
"Bosnian": "bos_Latn",
|
| 34 |
+
"Buginese": "bug_Latn",
|
| 35 |
+
"Bulgarian": "bul_Cyrl",
|
| 36 |
+
"Catalan": "cat_Latn",
|
| 37 |
+
"Cebuano": "ceb_Latn",
|
| 38 |
+
"Czech": "ces_Latn",
|
| 39 |
+
"Chokwe": "cjk_Latn",
|
| 40 |
+
"Central Kurdish": "ckb_Arab",
|
| 41 |
+
"Crimean Tatar": "crh_Latn",
|
| 42 |
+
"Welsh": "cym_Latn",
|
| 43 |
+
"Danish": "dan_Latn",
|
| 44 |
+
"German": "deu_Latn",
|
| 45 |
+
"Southwestern Dinka": "dik_Latn",
|
| 46 |
+
"Dyula": "dyu_Latn",
|
| 47 |
+
"Dzongkha": "dzo_Tibt",
|
| 48 |
+
"Greek": "ell_Grek",
|
| 49 |
+
"English": "eng_Latn",
|
| 50 |
+
"Esperanto": "epo_Latn",
|
| 51 |
+
"Estonian": "est_Latn",
|
| 52 |
+
"Basque": "eus_Latn",
|
| 53 |
+
"Ewe": "ewe_Latn",
|
| 54 |
+
"Faroese": "fao_Latn",
|
| 55 |
+
"Fijian": "fij_Latn",
|
| 56 |
+
"Finnish": "fin_Latn",
|
| 57 |
+
"Fon": "fon_Latn",
|
| 58 |
+
"French": "fra_Latn",
|
| 59 |
+
"Friulian": "fur_Latn",
|
| 60 |
+
"Nigerian Fulfulde": "fuv_Latn",
|
| 61 |
+
"Scottish Gaelic": "gla_Latn",
|
| 62 |
+
"Irish": "gle_Latn",
|
| 63 |
+
"Galician": "glg_Latn",
|
| 64 |
+
"Guarani": "grn_Latn",
|
| 65 |
+
"Gujarati": "guj_Gujr",
|
| 66 |
+
"Haitian Creole": "hat_Latn",
|
| 67 |
+
"Hausa": "hau_Latn",
|
| 68 |
+
"Hebrew": "heb_Hebr",
|
| 69 |
+
"Hindi": "hin_Deva",
|
| 70 |
+
"Chhattisgarhi": "hne_Deva",
|
| 71 |
+
"Croatian": "hrv_Latn",
|
| 72 |
+
"Hungarian": "hun_Latn",
|
| 73 |
+
"Armenian": "hye_Armn",
|
| 74 |
+
"Igbo": "ibo_Latn",
|
| 75 |
+
"Ilocano": "ilo_Latn",
|
| 76 |
+
"Indonesian": "ind_Latn",
|
| 77 |
+
"Icelandic": "isl_Latn",
|
| 78 |
+
"Italian": "ita_Latn",
|
| 79 |
+
"Javanese": "jav_Latn",
|
| 80 |
+
"Japanese": "jpn_Jpan",
|
| 81 |
+
"Kabyle": "kab_Latn",
|
| 82 |
+
"Jingpho": "kac_Latn",
|
| 83 |
+
"Kamba": "kam_Latn",
|
| 84 |
+
"Kannada": "kan_Knda",
|
| 85 |
+
"Kashmiri (Arabic script)": "kas_Arab",
|
| 86 |
+
"Kashmiri (Devanagari script)": "kas_Deva",
|
| 87 |
+
"Georgian": "kat_Geor",
|
| 88 |
+
"Central Kanuri (Arabic script)": "knc_Arab",
|
| 89 |
+
"Central Kanuri (Latin script)": "knc_Latn",
|
| 90 |
+
"Kazakh": "kaz_Cyrl",
|
| 91 |
+
"Kabiyè": "kbp_Latn",
|
| 92 |
+
"Kabuverdianu": "kea_Latn",
|
| 93 |
+
"Khmer": "khm_Khmr",
|
| 94 |
+
"Kikuyu": "kik_Latn",
|
| 95 |
+
"Kinyarwanda": "kin_Latn",
|
| 96 |
+
"Kyrgyz": "kir_Cyrl",
|
| 97 |
+
"Kimbundu": "kmb_Latn",
|
| 98 |
+
"Northern Kurdish": "kmr_Latn",
|
| 99 |
+
"Kikongo": "kon_Latn",
|
| 100 |
+
"Korean": "kor_Hang",
|
| 101 |
+
"Lao": "lao_Laoo",
|
| 102 |
+
"Ligurian": "lij_Latn",
|
| 103 |
+
"Limburgish": "lim_Latn",
|
| 104 |
+
"Lingala": "lin_Latn",
|
| 105 |
+
"Lithuanian": "lit_Latn",
|
| 106 |
+
"Lombard": "lmo_Latn",
|
| 107 |
+
"Latgalian": "ltg_Latn",
|
| 108 |
+
"Luxembourgish": "ltz_Latn",
|
| 109 |
+
"Luba-Kasai": "lua_Latn",
|
| 110 |
+
"Ganda": "lug_Latn",
|
| 111 |
+
"Luo": "luo_Latn",
|
| 112 |
+
"Mizo": "lus_Latn",
|
| 113 |
+
"Standard Latvian": "lvs_Latn",
|
| 114 |
+
"Magahi": "mag_Deva",
|
| 115 |
+
"Maithili": "mai_Deva",
|
| 116 |
+
"Malayalam": "mal_Mlym",
|
| 117 |
+
"Marathi": "mar_Deva",
|
| 118 |
+
"Minangkabau (Arabic script)": "min_Arab",
|
| 119 |
+
"Minangkabau (Latin script)": "min_Latn",
|
| 120 |
+
"Macedonian": "mkd_Cyrl",
|
| 121 |
+
"Plateau Malagasy": "plt_Latn",
|
| 122 |
+
"Maltese": "mlt_Latn",
|
| 123 |
+
"Meitei (Bengali script)": "mni_Beng",
|
| 124 |
+
"Halh Mongolian": "khk_Cyrl",
|
| 125 |
+
"Mossi": "mos_Latn",
|
| 126 |
+
"Maori": "mri_Latn",
|
| 127 |
+
"Burmese": "mya_Mymr",
|
| 128 |
+
"Dutch": "nld_Latn",
|
| 129 |
+
"Norwegian Nynorsk": "nno_Latn",
|
| 130 |
+
"Norwegian Bokmål": "nob_Latn",
|
| 131 |
+
"Nepali": "npi_Deva",
|
| 132 |
+
"Northern Sotho": "nso_Latn",
|
| 133 |
+
"Nuer": "nus_Latn",
|
| 134 |
+
"Nyanja": "nya_Latn",
|
| 135 |
+
"Occitan": "oci_Latn",
|
| 136 |
+
"West Central Oromo": "gaz_Latn",
|
| 137 |
+
"Odia": "ory_Orya",
|
| 138 |
+
"Pangasinan": "pag_Latn",
|
| 139 |
+
"Eastern Panjabi": "pan_Guru",
|
| 140 |
+
"Papiamento": "pap_Latn",
|
| 141 |
+
"Western Persian": "pes_Arab",
|
| 142 |
+
"Polish": "pol_Latn",
|
| 143 |
+
"Portuguese": "por_Latn",
|
| 144 |
+
"Dari": "prs_Arab",
|
| 145 |
+
"Southern Pashto": "pbt_Arab",
|
| 146 |
+
"Ayacucho Quechua": "quy_Latn",
|
| 147 |
+
"Romanian": "ron_Latn",
|
| 148 |
+
"Rundi": "run_Latn",
|
| 149 |
+
"Russian": "rus_Cyrl",
|
| 150 |
+
"Sango": "sag_Latn",
|
| 151 |
+
"Sanskrit": "san_Deva",
|
| 152 |
+
"Santali": "sat_Olck",
|
| 153 |
+
"Sicilian": "scn_Latn",
|
| 154 |
+
"Shan": "shn_Mymr",
|
| 155 |
+
"Sinhala": "sin_Sinh",
|
| 156 |
+
"Slovak": "slk_Latn",
|
| 157 |
+
"Slovenian": "slv_Latn",
|
| 158 |
+
"Samoan": "smo_Latn",
|
| 159 |
+
"Shona": "sna_Latn",
|
| 160 |
+
"Sindhi": "snd_Arab",
|
| 161 |
+
"Somali": "som_Latn",
|
| 162 |
+
"Southern Sotho": "sot_Latn",
|
| 163 |
+
"Spanish": "spa_Latn",
|
| 164 |
+
"Tosk Albanian": "als_Latn",
|
| 165 |
+
"Sardinian": "srd_Latn",
|
| 166 |
+
"Serbian": "srp_Cyrl",
|
| 167 |
+
"Swati": "ssw_Latn",
|
| 168 |
+
"Sundanese": "sun_Latn",
|
| 169 |
+
"Swedish": "swe_Latn",
|
| 170 |
+
"Swahili": "swh_Latn",
|
| 171 |
+
"Silesian": "szl_Latn",
|
| 172 |
+
"Tamil": "tam_Taml",
|
| 173 |
+
"Tatar": "tat_Cyrl",
|
| 174 |
+
"Telugu": "tel_Telu",
|
| 175 |
+
"Tajik": "tgk_Cyrl",
|
| 176 |
+
"Tagalog": "tgl_Latn",
|
| 177 |
+
"Thai": "tha_Thai",
|
| 178 |
+
"Tigrinya": "tir_Ethi",
|
| 179 |
+
"Tamasheq (Latin script)": "taq_Latn",
|
| 180 |
+
"Tamasheq (Tifinagh script)": "taq_Tfng",
|
| 181 |
+
"Tok Pisin": "tpi_Latn",
|
| 182 |
+
"Tswana": "tsn_Latn",
|
| 183 |
+
"Tsonga": "tso_Latn",
|
| 184 |
+
"Turkmen": "tuk_Latn",
|
| 185 |
+
"Tumbuka": "tum_Latn",
|
| 186 |
+
"Turkish": "tur_Latn",
|
| 187 |
+
"Twi": "twi_Latn",
|
| 188 |
+
"Central Atlas Tamazight": "tzm_Tfng",
|
| 189 |
+
"Uyghur": "uig_Arab",
|
| 190 |
+
"Ukrainian": "ukr_Cyrl",
|
| 191 |
+
"Umbundu": "umb_Latn",
|
| 192 |
+
"Urdu": "urd_Arab",
|
| 193 |
+
"Northern Uzbek": "uzn_Latn",
|
| 194 |
+
"Venetian": "vec_Latn",
|
| 195 |
+
"Vietnamese": "vie_Latn",
|
| 196 |
+
"Waray": "war_Latn",
|
| 197 |
+
"Wolof": "wol_Latn",
|
| 198 |
+
"Xhosa": "xho_Latn",
|
| 199 |
+
"Eastern Yiddish": "ydd_Hebr",
|
| 200 |
+
"Yoruba": "yor_Latn",
|
| 201 |
+
"Yue Chinese": "yue_Hant",
|
| 202 |
+
"Chinese (Simplified)": "zho_Hans",
|
| 203 |
+
"Chinese (Traditional)": "zho_Hant",
|
| 204 |
+
"Standard Malay": "zsm_Latn",
|
| 205 |
+
"Zulu": "zul_Latn",
|
| 206 |
+
}
|
| 207 |
+
mbart_large_languages: dict[str, str] = {
|
| 208 |
+
'Arabic': 'ar_AR',
|
| 209 |
+
'Czech': 'cs_CZ',
|
| 210 |
+
'German': 'de_DE',
|
| 211 |
+
'English': 'en_XX',
|
| 212 |
+
'Spanish': 'es_XX',
|
| 213 |
+
'Estonian': 'et_EE',
|
| 214 |
+
'Finnish': 'fi_FI',
|
| 215 |
+
'French': 'fr_XX',
|
| 216 |
+
'Gujarati': 'gu_IN',
|
| 217 |
+
'Hindi': 'hi_IN',
|
| 218 |
+
'Italian': 'it_IT',
|
| 219 |
+
'Japanese': 'ja_XX',
|
| 220 |
+
'Kazakh': 'kk_KZ',
|
| 221 |
+
'Korean': 'ko_KR',
|
| 222 |
+
'Lithuanian': 'lt_LT',
|
| 223 |
+
'Latvian': 'lv_LV',
|
| 224 |
+
'Burmese': 'my_MM',
|
| 225 |
+
'Nepali': 'ne_NP',
|
| 226 |
+
'Dutch': 'nl_XX',
|
| 227 |
+
'Romanian': 'ro_RO',
|
| 228 |
+
'Russian': 'ru_RU',
|
| 229 |
+
'Sinhala': 'si_LK',
|
| 230 |
+
'Turkish': 'tr_TR',
|
| 231 |
+
'Vietnamese': 'vi_VN',
|
| 232 |
+
'Chinese': 'zh_CN',
|
| 233 |
+
'Afrikaans': 'af_ZA',
|
| 234 |
+
'Azerbaijani': 'az_AZ',
|
| 235 |
+
'Bengali': 'bn_IN',
|
| 236 |
+
'Persian': 'fa_IR',
|
| 237 |
+
'Hebrew': 'he_IL',
|
| 238 |
+
'Croatian': 'hr_HR',
|
| 239 |
+
'Indonesian': 'id_ID',
|
| 240 |
+
'Georgian': 'ka_GE',
|
| 241 |
+
'Khmer': 'km_KH',
|
| 242 |
+
'Macedonian': 'mk_MK',
|
| 243 |
+
'Malayalam': 'ml_IN',
|
| 244 |
+
'Mongolian': 'mn_MN',
|
| 245 |
+
'Marathi': 'mr_IN',
|
| 246 |
+
'Polish': 'pl_PL',
|
| 247 |
+
'Pashto': 'ps_AF',
|
| 248 |
+
'Portuguese': 'pt_XX',
|
| 249 |
+
'Swedish': 'sv_SE',
|
| 250 |
+
'Swahili': 'sw_KE',
|
| 251 |
+
'Tamil': 'ta_IN',
|
| 252 |
+
'Telugu': 'te_IN',
|
| 253 |
+
'Thai': 'th_TH',
|
| 254 |
+
'Tagalog': 'tl_XX',
|
| 255 |
+
'Ukrainian': 'uk_UA',
|
| 256 |
+
'Urdu': 'ur_PK',
|
| 257 |
+
'Xhosa': 'xh_ZA',
|
| 258 |
+
'Galician': 'gl_ES',
|
| 259 |
+
'Slovene': 'sl_SI'
|
| 260 |
+
}
|
| 261 |
+
# language code system: ISO 639-1 standard, two-letter codes to represent languages
|
| 262 |
+
iso_languages: dict[str, str] = {
|
| 263 |
+
"Afrikaans": "af",
|
| 264 |
+
"Albanian": "sq",
|
| 265 |
+
"Amharic": "am",
|
| 266 |
+
"Arabic": "ar",
|
| 267 |
+
"Armenian": "hy",
|
| 268 |
+
"Azerbaijani": "az",
|
| 269 |
+
"Basque": "eu",
|
| 270 |
+
"Belarusian": "be",
|
| 271 |
+
"Bengali": "bn",
|
| 272 |
+
"Bosnian": "bs",
|
| 273 |
+
"Bulgarian": "bg",
|
| 274 |
+
"Catalan": "ca",
|
| 275 |
+
"Cebuano": "ceb",
|
| 276 |
+
"Chinese (Simplified)": "zh-CN",
|
| 277 |
+
"Chinese (Traditional)": "zh-TW",
|
| 278 |
+
"Chinese": "zh",
|
| 279 |
+
"Corsican": "co",
|
| 280 |
+
"Croatian": "hr",
|
| 281 |
+
"Czech": "cs",
|
| 282 |
+
"Danish": "da",
|
| 283 |
+
"Dutch": "nl",
|
| 284 |
+
"English": "en",
|
| 285 |
+
"Esperanto": "eo",
|
| 286 |
+
"Estonian": "et",
|
| 287 |
+
"Finnish": "fi",
|
| 288 |
+
"French": "fr",
|
| 289 |
+
"Galician": "gl",
|
| 290 |
+
"Georgian": "ka",
|
| 291 |
+
"German": "de",
|
| 292 |
+
"Greek": "el",
|
| 293 |
+
"Gujarati": "gu",
|
| 294 |
+
"Haitian Creole": "ht",
|
| 295 |
+
"Hausa": "ha",
|
| 296 |
+
"Hawaiian": "haw",
|
| 297 |
+
"Hebrew": "he",
|
| 298 |
+
"Hindi": "hi",
|
| 299 |
+
"Hungarian": "hu",
|
| 300 |
+
"Icelandic": "is",
|
| 301 |
+
"Igbo": "ig",
|
| 302 |
+
"Indonesian": "id",
|
| 303 |
+
"Irish": "ga",
|
| 304 |
+
"Italian": "it",
|
| 305 |
+
"Japanese": "ja",
|
| 306 |
+
"Javanese": "jv",
|
| 307 |
+
"Kannada": "kn",
|
| 308 |
+
"Kazakh": "kk",
|
| 309 |
+
"Khmer": "km",
|
| 310 |
+
"Kinyarwanda": "rw",
|
| 311 |
+
"Korean": "ko",
|
| 312 |
+
"Kurdish (Kurmanji)": "ku",
|
| 313 |
+
"Kyrgyz": "ky",
|
| 314 |
+
"Lao": "lo",
|
| 315 |
+
"Latin": "la",
|
| 316 |
+
"Latvian": "lv",
|
| 317 |
+
"Lithuanian": "lt",
|
| 318 |
+
"Luxembourgish": "lb",
|
| 319 |
+
"Macedonian": "mk",
|
| 320 |
+
"Malagasy": "mg",
|
| 321 |
+
"Malay": "ms",
|
| 322 |
+
"Malayalam": "ml",
|
| 323 |
+
"Maltese": "mt",
|
| 324 |
+
"Maori": "mi",
|
| 325 |
+
"Marathi": "mr",
|
| 326 |
+
"Mongolian": "mn",
|
| 327 |
+
"Myanmar (Burmese)": "my",
|
| 328 |
+
"Nepali": "ne",
|
| 329 |
+
"Norwegian": "no",
|
| 330 |
+
"Nyanja (Chichewa)": "ny",
|
| 331 |
+
"Odia (Oriya)": "or",
|
| 332 |
+
"Pashto": "ps",
|
| 333 |
+
"Persian": "fa",
|
| 334 |
+
"Polish": "pl",
|
| 335 |
+
"Portuguese": "pt",
|
| 336 |
+
"Punjabi": "pa",
|
| 337 |
+
"Romanian": "ro",
|
| 338 |
+
"Russian": "ru",
|
| 339 |
+
"Samoan": "sm",
|
| 340 |
+
"Scots Gaelic": "gd",
|
| 341 |
+
"Serbian": "sr",
|
| 342 |
+
"Sesotho": "st",
|
| 343 |
+
"Shona": "sn",
|
| 344 |
+
"Sindhi": "sd",
|
| 345 |
+
"Sinhala": "si",
|
| 346 |
+
"Slovak": "sk",
|
| 347 |
+
"Slovenian": "sl",
|
| 348 |
+
"Somali": "so",
|
| 349 |
+
"Spanish": "es",
|
| 350 |
+
"Sundanese": "su",
|
| 351 |
+
"Swahili": "sw",
|
| 352 |
+
"Swedish": "sv",
|
| 353 |
+
"Tagalog (Filipino)": "tl",
|
| 354 |
+
"Tajik": "tg",
|
| 355 |
+
"Tamil": "ta",
|
| 356 |
+
"Tatar": "tt",
|
| 357 |
+
"Telugu": "te",
|
| 358 |
+
"Thai": "th",
|
| 359 |
+
"Turkish": "tr",
|
| 360 |
+
"Turkmen": "tk",
|
| 361 |
+
"Ukrainian": "uk",
|
| 362 |
+
"Urdu": "ur",
|
| 363 |
+
"Uyghur": "ug",
|
| 364 |
+
"Uzbek": "uz",
|
| 365 |
+
"Vietnamese": "vi",
|
| 366 |
+
"Welsh": "cy",
|
| 367 |
+
"Xhosa": "xh",
|
| 368 |
+
"Yiddish": "yi",
|
| 369 |
+
"Yoruba": "yo",
|
| 370 |
+
"Zulu": "zu"
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
# language codes dict sorted by language name
|
| 374 |
+
iso_languages_byname: dict[str, tuple[str, str, str]] = {
|
| 375 |
+
'Abkhazian': ('ab', 'abk', 'abk'),
|
| 376 |
+
'Afar': ('aa', 'aar', 'aar'),
|
| 377 |
+
'Afrikaans': ('af', 'afr', 'afr'),
|
| 378 |
+
'Akan': ('ak', 'aka', 'aka'),
|
| 379 |
+
'Albanian': ('sq', 'alb', 'sqi'),
|
| 380 |
+
'Amharic': ('am', 'amh', 'amh'),
|
| 381 |
+
'Arabic': ('ar', 'ara', 'ara'),
|
| 382 |
+
'Aragonese': ('an', 'arg', 'arg'),
|
| 383 |
+
'Armenian': ('hy', 'arm', 'hye'),
|
| 384 |
+
'Assamese': ('as', 'asm', 'asm'),
|
| 385 |
+
'Avaric': ('av', 'ava', 'ava'),
|
| 386 |
+
'Avestan': ('ae', 'ave', 'ave'),
|
| 387 |
+
'Aymara': ('ay', 'aym', 'aym'),
|
| 388 |
+
'Azerbaijani': ('az', 'aze', 'aze'),
|
| 389 |
+
'Bambara': ('bm', 'bam', 'bam'),
|
| 390 |
+
'Bashkir': ('ba', 'bak', 'bak'),
|
| 391 |
+
'Basque': ('eu', 'baq', 'eus'),
|
| 392 |
+
'Belarusian': ('be', 'bel', 'bel'),
|
| 393 |
+
'Bengali': ('bn', 'ben', 'ben'),
|
| 394 |
+
'Bislama': ('bi', 'bis', 'bis'),
|
| 395 |
+
'Bosnian': ('bs', 'bos', 'bos'),
|
| 396 |
+
'Breton': ('br', 'bre', 'bre'),
|
| 397 |
+
'Bulgarian': ('bg', 'bul', 'bul'),
|
| 398 |
+
'Burmese': ('my', 'bur', 'mya'),
|
| 399 |
+
'Catalan': ('ca', 'cat', 'cat'),
|
| 400 |
+
'Chamorro': ('ch', 'cha', 'cha'),
|
| 401 |
+
'Chechen': ('ce', 'che', 'che'),
|
| 402 |
+
'Chichewa': ('ny', 'nya', 'nya'),
|
| 403 |
+
'Chinese': ('zh', 'chi', 'zho'),
|
| 404 |
+
'Church Slavic': ('cu', 'chu', 'chu'),
|
| 405 |
+
'Chuvash': ('cv', 'chv', 'chv'),
|
| 406 |
+
'Cornish': ('kw', 'cor', 'cor'),
|
| 407 |
+
'Corsican': ('co', 'cos', 'cos'),
|
| 408 |
+
'Cree': ('cr', 'cre', 'cre'),
|
| 409 |
+
'Croatian': ('hr', 'hrv', 'hrv'),
|
| 410 |
+
'Czech': ('cs', 'cze', 'ces'),
|
| 411 |
+
'Danish': ('da', 'dan', 'dan'),
|
| 412 |
+
'Divehi': ('dv', 'div', 'div'),
|
| 413 |
+
'Dutch': ('nl', 'dut', 'nld'),
|
| 414 |
+
'Dzongkha': ('dz', 'dzo', 'dzo'),
|
| 415 |
+
'English': ('en', 'eng', 'eng'),
|
| 416 |
+
'Esperanto': ('eo', 'epo', 'epo'),
|
| 417 |
+
'Estonian': ('et', 'est', 'est'),
|
| 418 |
+
'Ewe': ('ee', 'ewe', 'ewe'),
|
| 419 |
+
'Faroese': ('fo', 'fao', 'fao'),
|
| 420 |
+
'Fijian': ('fj', 'fij', 'fij'),
|
| 421 |
+
'Finnish': ('fi', 'fin', 'fin'),
|
| 422 |
+
'French': ('fr', 'fre', 'fra'),
|
| 423 |
+
'Fulah': ('ff', 'ful', 'ful'),
|
| 424 |
+
'Galician': ('gl', 'glg', 'glg'),
|
| 425 |
+
'Ganda': ('lg', 'lug', 'lug'),
|
| 426 |
+
'Georgian': ('ka', 'geo', 'kat'),
|
| 427 |
+
'German': ('de', 'ger', 'deu'),
|
| 428 |
+
'Greek': ('el', 'gre', 'ell'),
|
| 429 |
+
'Guarani': ('gn', 'grn', 'grn'),
|
| 430 |
+
'Gujarati': ('gu', 'guj', 'guj'),
|
| 431 |
+
'Haitian': ('ht', 'hat', 'hat'),
|
| 432 |
+
'Hausa': ('ha', 'hau', 'hau'),
|
| 433 |
+
'Hebrew': ('he', 'heb', 'heb'),
|
| 434 |
+
'Herero': ('hz', 'her', 'her'),
|
| 435 |
+
'Hindi': ('hi', 'hin', 'hin'),
|
| 436 |
+
'Hiri Motu': ('ho', 'hmo', 'hmo'),
|
| 437 |
+
'Hungarian': ('hu', 'hun', 'hun'),
|
| 438 |
+
'Icelandic': ('is', 'ice', 'isl'),
|
| 439 |
+
'Ido': ('io', 'ido', 'ido'),
|
| 440 |
+
'Igbo': ('ig', 'ibo', 'ibo'),
|
| 441 |
+
'Indonesian': ('id', 'ind', 'ind'),
|
| 442 |
+
'Interlingua': ('ia', 'ina', 'ina'),
|
| 443 |
+
'Interlingue': ('ie', 'ile', 'ile'),
|
| 444 |
+
'Inuktitut': ('iu', 'iku', 'iku'),
|
| 445 |
+
'Inupiaq': ('ik', 'ipk', 'ipk'),
|
| 446 |
+
'Irish': ('ga', 'gle', 'gle'),
|
| 447 |
+
'Italian': ('it', 'ita', 'ita'),
|
| 448 |
+
'Japanese': ('ja', 'jpn', 'jpn'),
|
| 449 |
+
'Javanese': ('jv', 'jav', 'jav'),
|
| 450 |
+
'Kalaallisut': ('kl', 'kal', 'kal'),
|
| 451 |
+
'Kannada': ('kn', 'kan', 'kan'),
|
| 452 |
+
'Kanuri': ('kr', 'kau', 'kau'),
|
| 453 |
+
'Kashmiri': ('ks', 'kas', 'kas'),
|
| 454 |
+
'Kazakh': ('kk', 'kaz', 'kaz'),
|
| 455 |
+
'Khmer': ('km', 'khm', 'khm'),
|
| 456 |
+
'Kikuyu': ('ki', 'kik', 'kik'),
|
| 457 |
+
'Kinyarwanda': ('rw', 'kin', 'kin'),
|
| 458 |
+
'Kirghiz': ('ky', 'kir', 'kir'),
|
| 459 |
+
'Komi': ('kv', 'kom', 'kom'),
|
| 460 |
+
'Kongo': ('kg', 'kon', 'kon'),
|
| 461 |
+
'Korean': ('ko', 'kor', 'kor'),
|
| 462 |
+
'Kuanyama': ('kj', 'kua', 'kua'),
|
| 463 |
+
'Kurdish': ('ku', 'kur', 'kur'),
|
| 464 |
+
'Lao': ('lo', 'lao', 'lao'),
|
| 465 |
+
'Latin': ('la', 'lat', 'lat'),
|
| 466 |
+
'Latvian': ('lv', 'lav', 'lav'),
|
| 467 |
+
'Limburgan': ('li', 'lim', 'lim'),
|
| 468 |
+
'Lingala': ('ln', 'lin', 'lin'),
|
| 469 |
+
'Lithuanian': ('lt', 'lit', 'lit'),
|
| 470 |
+
'Luba-Katanga': ('lu', 'lub', 'lub'),
|
| 471 |
+
'Luxembourgish': ('lb', 'ltz', 'ltz'),
|
| 472 |
+
'Macedonian': ('mk', 'mac', 'mkd'),
|
| 473 |
+
'Malagasy': ('mg', 'mlg', 'mlg'),
|
| 474 |
+
'Malay': ('ms', 'may', 'msa'),
|
| 475 |
+
'Malayalam': ('ml', 'mal', 'mal'),
|
| 476 |
+
'Maltese': ('mt', 'mlt', 'mlt'),
|
| 477 |
+
'Manx': ('gv', 'glv', 'glv'),
|
| 478 |
+
'Maori': ('mi', 'mao', 'mri'),
|
| 479 |
+
'Marathi': ('mr', 'mar', 'mar'),
|
| 480 |
+
'Marshallese': ('mh', 'mah', 'mah'),
|
| 481 |
+
'Mongolian': ('mn', 'mon', 'mon'),
|
| 482 |
+
'Nauru': ('na', 'nau', 'nau'),
|
| 483 |
+
'Navajo': ('nv', 'nav', 'nav'),
|
| 484 |
+
'Ndonga': ('ng', 'ndo', 'ndo'),
|
| 485 |
+
'Nepali': ('ne', 'nep', 'nep'),
|
| 486 |
+
'North Ndebele': ('nd', 'nde', 'nde'),
|
| 487 |
+
'Northern Sami': ('se', 'sme', 'sme'),
|
| 488 |
+
'Norwegian': ('no', 'nor', 'nor'),
|
| 489 |
+
'Norwegian Bokmål': ('nb', 'nob', 'nob'),
|
| 490 |
+
'Norwegian Nynorsk': ('nn', 'nno', 'nno'),
|
| 491 |
+
'Occitan': ('oc', 'oci', 'oci'),
|
| 492 |
+
'Ojibwa': ('oj', 'oji', 'oji'),
|
| 493 |
+
'Oriya': ('or', 'ori', 'ori'),
|
| 494 |
+
'Oromo': ('om', 'orm', 'orm'),
|
| 495 |
+
'Ossetian': ('os', 'oss', 'oss'),
|
| 496 |
+
'Pali': ('pi', 'pli', 'pli'),
|
| 497 |
+
'Panjabi': ('pa', 'pan', 'pan'),
|
| 498 |
+
'Persian': ('fa', 'per', 'fas'),
|
| 499 |
+
'Polish': ('pl', 'pol', 'pol'),
|
| 500 |
+
'Portuguese': ('pt', 'por', 'por'),
|
| 501 |
+
'Pushto': ('ps', 'pus', 'pus'),
|
| 502 |
+
'Quechua': ('qu', 'que', 'que'),
|
| 503 |
+
'Romanian': ('ro', 'rum', 'ron'),
|
| 504 |
+
'Romansh': ('rm', 'roh', 'roh'),
|
| 505 |
+
'Rundi': ('rn', 'run', 'run'),
|
| 506 |
+
'Russian': ('ru', 'rus', 'rus'),
|
| 507 |
+
'Samoan': ('sm', 'smo', 'smo'),
|
| 508 |
+
'Sango': ('sg', 'sag', 'sag'),
|
| 509 |
+
'Sanskrit': ('sa', 'san', 'san'),
|
| 510 |
+
'Sardinian': ('sc', 'srd', 'srd'),
|
| 511 |
+
'Scottish Gaelic': ('gd', 'gla', 'gla'),
|
| 512 |
+
'Serbian': ('sr', 'srp', 'srp'),
|
| 513 |
+
'Shona': ('sn', 'sna', 'sna'),
|
| 514 |
+
'Sichuan Yi': ('ii', 'iii', 'iii'),
|
| 515 |
+
'Sindhi': ('sd', 'snd', 'snd'),
|
| 516 |
+
'Sinhala': ('si', 'sin', 'sin'),
|
| 517 |
+
'Slovak': ('sk', 'slo', 'slk'),
|
| 518 |
+
'Slovenian': ('sl', 'slv', 'slv'),
|
| 519 |
+
'Somali': ('so', 'som', 'som'),
|
| 520 |
+
'South Ndebele': ('nr', 'nbl', 'nbl'),
|
| 521 |
+
'Southern Sotho': ('st', 'sot', 'sot'),
|
| 522 |
+
'Spanish': ('es', 'spa', 'spa'),
|
| 523 |
+
'Sundanese': ('su', 'sun', 'sun'),
|
| 524 |
+
'Swahili': ('sw', 'swa', 'swa'),
|
| 525 |
+
'Swati': ('ss', 'ssw', 'ssw'),
|
| 526 |
+
'Swedish': ('sv', 'swe', 'swe'),
|
| 527 |
+
'Tagalog': ('tl', 'tgl', 'tgl'),
|
| 528 |
+
'Tahitian': ('ty', 'tah', 'tah'),
|
| 529 |
+
'Tajik': ('tg', 'tgk', 'tgk'),
|
| 530 |
+
'Tamil': ('ta', 'tam', 'tam'),
|
| 531 |
+
'Tatar': ('tt', 'tat', 'tat'),
|
| 532 |
+
'Telugu': ('te', 'tel', 'tel'),
|
| 533 |
+
'Thai': ('th', 'tha', 'tha'),
|
| 534 |
+
'Tibetan': ('bo', 'tib', 'bod'),
|
| 535 |
+
'Tigrinya': ('ti', 'tir', 'tir'),
|
| 536 |
+
'Tonga': ('to', 'ton', 'ton'),
|
| 537 |
+
'Tsonga': ('ts', 'tso', 'tso'),
|
| 538 |
+
'Tswana': ('tn', 'tsn', 'tsn'),
|
| 539 |
+
'Turkish': ('tr', 'tur', 'tur'),
|
| 540 |
+
'Turkmen': ('tk', 'tuk', 'tuk'),
|
| 541 |
+
'Twi': ('tw', 'twi', 'twi'),
|
| 542 |
+
'Uighur': ('ug', 'uig', 'uig'),
|
| 543 |
+
'Ukrainian': ('uk', 'ukr', 'ukr'),
|
| 544 |
+
'Urdu': ('ur', 'urd', 'urd'),
|
| 545 |
+
'Uzbek': ('uz', 'uzb', 'uzb'),
|
| 546 |
+
'Venda': ('ve', 'ven', 'ven'),
|
| 547 |
+
'Vietnamese': ('vi', 'vie', 'vie'),
|
| 548 |
+
'Volapük': ('vo', 'vol', 'vol'),
|
| 549 |
+
'Walloon': ('wa', 'wln', 'wln'),
|
| 550 |
+
'Welsh': ('cy', 'wel', 'cym'),
|
| 551 |
+
'Western Frisian': ('fy', 'fry', 'fry'),
|
| 552 |
+
'Wolof': ('wo', 'wol', 'wol'),
|
| 553 |
+
'Xhosa': ('xh', 'xho', 'xho'),
|
| 554 |
+
'Yiddish': ('yi', 'yid', 'yid'),
|
| 555 |
+
'Yoruba': ('yo', 'yor', 'yor'),
|
| 556 |
+
'Zhuang': ('za', 'zha', 'zha'),
|
| 557 |
+
'Zulu': ('zu', 'zul', 'zul')
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
# language codes dict sorted by 2-letter code
|
| 561 |
+
iso_languages_byiso1: dict[str, tuple[str, str, str]] = {
|
| 562 |
+
'ab': ('Abkhazian', 'abk', 'abk'),
|
| 563 |
+
'aa': ('Afar', 'aar', 'aar'),
|
| 564 |
+
'af': ('Afrikaans', 'afr', 'afr'),
|
| 565 |
+
'ak': ('Akan', 'aka', 'aka'),
|
| 566 |
+
'sq': ('Albanian', 'alb', 'sqi'),
|
| 567 |
+
'am': ('Amharic', 'amh', 'amh'),
|
| 568 |
+
'ar': ('Arabic', 'ara', 'ara'),
|
| 569 |
+
'an': ('Aragonese', 'arg', 'arg'),
|
| 570 |
+
'hy': ('Armenian', 'arm', 'hye'),
|
| 571 |
+
'as': ('Assamese', 'asm', 'asm'),
|
| 572 |
+
'av': ('Avaric', 'ava', 'ava'),
|
| 573 |
+
'ae': ('Avestan', 'ave', 'ave'),
|
| 574 |
+
'ay': ('Aymara', 'aym', 'aym'),
|
| 575 |
+
'az': ('Azerbaijani', 'aze', 'aze'),
|
| 576 |
+
'bm': ('Bambara', 'bam', 'bam'),
|
| 577 |
+
'ba': ('Bashkir', 'bak', 'bak'),
|
| 578 |
+
'eu': ('Basque', 'baq', 'eus'),
|
| 579 |
+
'be': ('Belarusian', 'bel', 'bel'),
|
| 580 |
+
'bn': ('Bengali', 'ben', 'ben'),
|
| 581 |
+
'bi': ('Bislama', 'bis', 'bis'),
|
| 582 |
+
'bs': ('Bosnian', 'bos', 'bos'),
|
| 583 |
+
'br': ('Breton', 'bre', 'bre'),
|
| 584 |
+
'bg': ('Bulgarian', 'bul', 'bul'),
|
| 585 |
+
'my': ('Burmese', 'bur', 'mya'),
|
| 586 |
+
'ca': ('Catalan', 'cat', 'cat'),
|
| 587 |
+
'ch': ('Chamorro', 'cha', 'cha'),
|
| 588 |
+
'ce': ('Chechen', 'che', 'che'),
|
| 589 |
+
'ny': ('Chichewa', 'nya', 'nya'),
|
| 590 |
+
'zh': ('Chinese', 'chi', 'zho'),
|
| 591 |
+
'cu': ('Church Slavic', 'chu', 'chu'),
|
| 592 |
+
'cv': ('Chuvash', 'chv', 'chv'),
|
| 593 |
+
'kw': ('Cornish', 'cor', 'cor'),
|
| 594 |
+
'co': ('Corsican', 'cos', 'cos'),
|
| 595 |
+
'cr': ('Cree', 'cre', 'cre'),
|
| 596 |
+
'hr': ('Croatian', 'hrv', 'hrv'),
|
| 597 |
+
'cs': ('Czech', 'cze', 'ces'),
|
| 598 |
+
'da': ('Danish', 'dan', 'dan'),
|
| 599 |
+
'dv': ('Divehi', 'div', 'div'),
|
| 600 |
+
'nl': ('Dutch', 'dut', 'nld'),
|
| 601 |
+
'dz': ('Dzongkha', 'dzo', 'dzo'),
|
| 602 |
+
'en': ('English', 'eng', 'eng'),
|
| 603 |
+
'eo': ('Esperanto', 'epo', 'epo'),
|
| 604 |
+
'et': ('Estonian', 'est', 'est'),
|
| 605 |
+
'ee': ('Ewe', 'ewe', 'ewe'),
|
| 606 |
+
'fo': ('Faroese', 'fao', 'fao'),
|
| 607 |
+
'fj': ('Fijian', 'fij', 'fij'),
|
| 608 |
+
'fi': ('Finnish', 'fin', 'fin'),
|
| 609 |
+
'fr': ('French', 'fre', 'fra'),
|
| 610 |
+
'ff': ('Fulah', 'ful', 'ful'),
|
| 611 |
+
'gl': ('Galician', 'glg', 'glg'),
|
| 612 |
+
'lg': ('Ganda', 'lug', 'lug'),
|
| 613 |
+
'ka': ('Georgian', 'geo', 'kat'),
|
| 614 |
+
'de': ('German', 'ger', 'deu'),
|
| 615 |
+
'el': ('Greek', 'gre', 'ell'),
|
| 616 |
+
'gn': ('Guarani', 'grn', 'grn'),
|
| 617 |
+
'gu': ('Gujarati', 'guj', 'guj'),
|
| 618 |
+
'ht': ('Haitian', 'hat', 'hat'),
|
| 619 |
+
'ha': ('Hausa', 'hau', 'hau'),
|
| 620 |
+
'he': ('Hebrew', 'heb', 'heb'),
|
| 621 |
+
'hz': ('Herero', 'her', 'her'),
|
| 622 |
+
'hi': ('Hindi', 'hin', 'hin'),
|
| 623 |
+
'ho': ('Hiri Motu', 'hmo', 'hmo'),
|
| 624 |
+
'hu': ('Hungarian', 'hun', 'hun'),
|
| 625 |
+
'is': ('Icelandic', 'ice', 'isl'),
|
| 626 |
+
'io': ('Ido', 'ido', 'ido'),
|
| 627 |
+
'ig': ('Igbo', 'ibo', 'ibo'),
|
| 628 |
+
'id': ('Indonesian', 'ind', 'ind'),
|
| 629 |
+
'ia': ('Interlingua', 'ina', 'ina'),
|
| 630 |
+
'ie': ('Interlingue', 'ile', 'ile'),
|
| 631 |
+
'iu': ('Inuktitut', 'iku', 'iku'),
|
| 632 |
+
'ik': ('Inupiaq', 'ipk', 'ipk'),
|
| 633 |
+
'ga': ('Irish', 'gle', 'gle'),
|
| 634 |
+
'it': ('Italian', 'ita', 'ita'),
|
| 635 |
+
'ja': ('Japanese', 'jpn', 'jpn'),
|
| 636 |
+
'jv': ('Javanese', 'jav', 'jav'),
|
| 637 |
+
'kl': ('Kalaallisut', 'kal', 'kal'),
|
| 638 |
+
'kn': ('Kannada', 'kan', 'kan'),
|
| 639 |
+
'kr': ('Kanuri', 'kau', 'kau'),
|
| 640 |
+
'ks': ('Kashmiri', 'kas', 'kas'),
|
| 641 |
+
'kk': ('Kazakh', 'kaz', 'kaz'),
|
| 642 |
+
'km': ('Khmer', 'khm', 'khm'),
|
| 643 |
+
'ki': ('Kikuyu', 'kik', 'kik'),
|
| 644 |
+
'rw': ('Kinyarwanda', 'kin', 'kin'),
|
| 645 |
+
'ky': ('Kirghiz', 'kir', 'kir'),
|
| 646 |
+
'kv': ('Komi', 'kom', 'kom'),
|
| 647 |
+
'kg': ('Kongo', 'kon', 'kon'),
|
| 648 |
+
'ko': ('Korean', 'kor', 'kor'),
|
| 649 |
+
'kj': ('Kuanyama', 'kua', 'kua'),
|
| 650 |
+
'ku': ('Kurdish', 'kur', 'kur'),
|
| 651 |
+
'lo': ('Lao', 'lao', 'lao'),
|
| 652 |
+
'la': ('Latin', 'lat', 'lat'),
|
| 653 |
+
'lv': ('Latvian', 'lav', 'lav'),
|
| 654 |
+
'li': ('Limburgan', 'lim', 'lim'),
|
| 655 |
+
'ln': ('Lingala', 'lin', 'lin'),
|
| 656 |
+
'lt': ('Lithuanian', 'lit', 'lit'),
|
| 657 |
+
'lu': ('Luba-Katanga', 'lub', 'lub'),
|
| 658 |
+
'lb': ('Luxembourgish', 'ltz', 'ltz'),
|
| 659 |
+
'mk': ('Macedonian', 'mac', 'mkd'),
|
| 660 |
+
'mg': ('Malagasy', 'mlg', 'mlg'),
|
| 661 |
+
'ms': ('Malay', 'may', 'msa'),
|
| 662 |
+
'ml': ('Malayalam', 'mal', 'mal'),
|
| 663 |
+
'mt': ('Maltese', 'mlt', 'mlt'),
|
| 664 |
+
'gv': ('Manx', 'glv', 'glv'),
|
| 665 |
+
'mi': ('Maori', 'mao', 'mri'),
|
| 666 |
+
'mr': ('Marathi', 'mar', 'mar'),
|
| 667 |
+
'mh': ('Marshallese', 'mah', 'mah'),
|
| 668 |
+
'mn': ('Mongolian', 'mon', 'mon'),
|
| 669 |
+
'na': ('Nauru', 'nau', 'nau'),
|
| 670 |
+
'nv': ('Navajo', 'nav', 'nav'),
|
| 671 |
+
'ng': ('Ndonga', 'ndo', 'ndo'),
|
| 672 |
+
'ne': ('Nepali', 'nep', 'nep'),
|
| 673 |
+
'nd': ('North Ndebele', 'nde', 'nde'),
|
| 674 |
+
'se': ('Northern Sami', 'sme', 'sme'),
|
| 675 |
+
'no': ('Norwegian', 'nor', 'nor'),
|
| 676 |
+
'nb': ('Norwegian Bokmål', 'nob', 'nob'),
|
| 677 |
+
'nn': ('Norwegian Nynorsk', 'nno', 'nno'),
|
| 678 |
+
'oc': ('Occitan', 'oci', 'oci'),
|
| 679 |
+
'oj': ('Ojibwa', 'oji', 'oji'),
|
| 680 |
+
'or': ('Oriya', 'ori', 'ori'),
|
| 681 |
+
'om': ('Oromo', 'orm', 'orm'),
|
| 682 |
+
'os': ('Ossetian', 'oss', 'oss'),
|
| 683 |
+
'pi': ('Pali', 'pli', 'pli'),
|
| 684 |
+
'pa': ('Panjabi', 'pan', 'pan'),
|
| 685 |
+
'fa': ('Persian', 'per', 'fas'),
|
| 686 |
+
'pl': ('Polish', 'pol', 'pol'),
|
| 687 |
+
'pt': ('Portuguese', 'por', 'por'),
|
| 688 |
+
'ps': ('Pushto', 'pus', 'pus'),
|
| 689 |
+
'qu': ('Quechua', 'que', 'que'),
|
| 690 |
+
'ro': ('Romanian', 'rum', 'ron'),
|
| 691 |
+
'rm': ('Romansh', 'roh', 'roh'),
|
| 692 |
+
'rn': ('Rundi', 'run', 'run'),
|
| 693 |
+
'ru': ('Russian', 'rus', 'rus'),
|
| 694 |
+
'sm': ('Samoan', 'smo', 'smo'),
|
| 695 |
+
'sg': ('Sango', 'sag', 'sag'),
|
| 696 |
+
'sa': ('Sanskrit', 'san', 'san'),
|
| 697 |
+
'sc': ('Sardinian', 'srd', 'srd'),
|
| 698 |
+
'gd': ('Scottish Gaelic', 'gla', 'gla'),
|
| 699 |
+
'sr': ('Serbian', 'srp', 'srp'),
|
| 700 |
+
'sn': ('Shona', 'sna', 'sna'),
|
| 701 |
+
'ii': ('Sichuan Yi', 'iii', 'iii'),
|
| 702 |
+
'sd': ('Sindhi', 'snd', 'snd'),
|
| 703 |
+
'si': ('Sinhala', 'sin', 'sin'),
|
| 704 |
+
'sk': ('Slovak', 'slo', 'slk'),
|
| 705 |
+
'sl': ('Slovenian', 'slv', 'slv'),
|
| 706 |
+
'so': ('Somali', 'som', 'som'),
|
| 707 |
+
'nr': ('South Ndebele', 'nbl', 'nbl'),
|
| 708 |
+
'st': ('Southern Sotho', 'sot', 'sot'),
|
| 709 |
+
'es': ('Spanish', 'spa', 'spa'),
|
| 710 |
+
'su': ('Sundanese', 'sun', 'sun'),
|
| 711 |
+
'sw': ('Swahili', 'swa', 'swa'),
|
| 712 |
+
'ss': ('Swati', 'ssw', 'ssw'),
|
| 713 |
+
'sv': ('Swedish', 'swe', 'swe'),
|
| 714 |
+
'tl': ('Tagalog', 'tgl', 'tgl'),
|
| 715 |
+
'ty': ('Tahitian', 'tah', 'tah'),
|
| 716 |
+
'tg': ('Tajik', 'tgk', 'tgk'),
|
| 717 |
+
'ta': ('Tamil', 'tam', 'tam'),
|
| 718 |
+
'tt': ('Tatar', 'tat', 'tat'),
|
| 719 |
+
'te': ('Telugu', 'tel', 'tel'),
|
| 720 |
+
'th': ('Thai', 'tha', 'tha'),
|
| 721 |
+
'bo': ('Tibetan', 'tib', 'bod'),
|
| 722 |
+
'ti': ('Tigrinya', 'tir', 'tir'),
|
| 723 |
+
'to': ('Tonga', 'ton', 'ton'),
|
| 724 |
+
'ts': ('Tsonga', 'tso', 'tso'),
|
| 725 |
+
'tn': ('Tswana', 'tsn', 'tsn'),
|
| 726 |
+
'tr': ('Turkish', 'tur', 'tur'),
|
| 727 |
+
'tk': ('Turkmen', 'tuk', 'tuk'),
|
| 728 |
+
'tw': ('Twi', 'twi', 'twi'),
|
| 729 |
+
'ug': ('Uighur', 'uig', 'uig'),
|
| 730 |
+
'uk': ('Ukrainian', 'ukr', 'ukr'),
|
| 731 |
+
'ur': ('Urdu', 'urd', 'urd'),
|
| 732 |
+
'uz': ('Uzbek', 'uzb', 'uzb'),
|
| 733 |
+
've': ('Venda', 'ven', 'ven'),
|
| 734 |
+
'vi': ('Vietnamese', 'vie', 'vie'),
|
| 735 |
+
'vo': ('Volapük', 'vol', 'vol'),
|
| 736 |
+
'wa': ('Walloon', 'wln', 'wln'),
|
| 737 |
+
'cy': ('Welsh', 'wel', 'cym'),
|
| 738 |
+
'fy': ('Western Frisian', 'fry', 'fry'),
|
| 739 |
+
'wo': ('Wolof', 'wol', 'wol'),
|
| 740 |
+
'xh': ('Xhosa', 'xho', 'xho'),
|
| 741 |
+
'yi': ('Yiddish', 'yid', 'yid'),
|
| 742 |
+
'yo': ('Yoruba', 'yor', 'yor'),
|
| 743 |
+
'za': ('Zhuang', 'zha', 'zha'),
|
| 744 |
+
'zu': ('Zulu', 'zul', 'zul')
|
| 745 |
+
}
|