Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import re | |
| import string | |
| # ----------------------------- indic_num.py ----------------------------- | |
| supported_lang = {"en", "hi", "gu", "mr", "bn", "te", "ta", "kn", "or", "pa"} | |
| # supported_lang = {'eng', 'hin', 'guj', 'mar', 'ben', 'tel', 'tam', 'kan', 'ori', 'pan'} # Three alphabet lang code | |
| all_num = { | |
| "en": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], | |
| "hi": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], | |
| "gu": ["૦", "૧", "૨", "૩", "૪", "૫", "૬", "૭", "૮", "૯"], | |
| "mr": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], | |
| "bn": ["০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯"], | |
| "te": ["౦", "౧", "౨", "౩", "౪", "౫", "౬", "౭", "౮", "౯"], | |
| "ta": ["0", "௧", "௨", "௩", "௪", "௫", "௬", "௭", "௮", "௯", "௰"], | |
| "kn": ["೦", "೧", "೨", "೩", "೪", "೫", "೬", "೭", "೮", "೯"], | |
| "or": ["୦", "୧", "୨", "୩", "୪", "୫", "୬", "୭", "୮", "୯"], | |
| "pa": ["੦", "੧", "੨", "੩", "੪", "੫", "੬", "੭", "੮", "੯"], | |
| } | |
| num_dict = dict() | |
| num_dict["en"] = { | |
| "0": "zero", | |
| "1": "one", | |
| "2": "two", | |
| "3": "three", | |
| "4": "four", | |
| "5": "five", | |
| "6": "six", | |
| "7": "seven", | |
| "8": "eight", | |
| "9": "nine", | |
| "10": "ten", | |
| "11": "eleven", | |
| "12": "twelve", | |
| "13": "thirteen", | |
| "14": "fourteen", | |
| "15": "fifteen", | |
| "16": "sixteen", | |
| "17": "seventeen", | |
| "18": "eighteen", | |
| "19": "nineteen", | |
| "20": "twenty", | |
| "21": "twenty-one", | |
| "22": "twenty-two", | |
| "23": "twenty-three", | |
| "24": "twenty-four", | |
| "25": "twenty-five", | |
| "26": "twenty-six", | |
| "27": "twenty-seven", | |
| "28": "twenty-eight", | |
| "29": "twenty-nine", | |
| "30": "thirty", | |
| "31": "thirty-one", | |
| "32": "thirty-two", | |
| "33": "thirty-three", | |
| "34": "thirty-four", | |
| "35": "thirty-five", | |
| "36": "thirty-six", | |
| "37": "thirty-seven", | |
| "38": "thirty-eight", | |
| "39": "thirty-nine", | |
| "40": "forty", | |
| "41": "forty-one", | |
| "42": "forty-two", | |
| "43": "forty-three", | |
| "44": "forty-four", | |
| "45": "forty-five", | |
| "46": "forty-six", | |
| "47": "forty-seven", | |
| "48": "forty-eight", | |
| "49": "forty-nine", | |
| "50": "fifty", | |
| "51": "fifty-one", | |
| "52": "fifty-two", | |
| "53": "fifty-three", | |
| "54": "fifty-four", | |
| "55": "fifty-five", | |
| "56": "fifty-six", | |
| "57": "fifty-seven", | |
| "58": "fifty-eight", | |
| "59": "fifty-nine", | |
| "60": "sixty", | |
| "61": "sixty-one", | |
| "62": "sixty-two", | |
| "63": "sixty-three", | |
| "64": "sixty-four", | |
| "65": "sixty-five", | |
| "66": "sixty-six", | |
| "67": "sixty-seven", | |
| "68": "sixty-eight", | |
| "69": "sixty-nine", | |
| "70": "seventy", | |
| "71": "seventy-one", | |
| "72": "seventy-two", | |
| "73": "seventy-three", | |
| "74": "seventy-four", | |
| "75": "seventy-five", | |
| "76": "seventy-six", | |
| "77": "seventy-seven", | |
| "78": "seventy-eight", | |
| "79": "seventy-nine", | |
| "80": "eighty", | |
| "81": "eighty-one", | |
| "82": "eighty-two", | |
| "83": "eighty-three", | |
| "84": "eighty-four", | |
| "85": "eighty-five", | |
| "86": "eighty-six", | |
| "87": "eighty-seven", | |
| "88": "eighty-eight", | |
| "89": "eighty-nine", | |
| "90": "ninety", | |
| "91": "ninety-one", | |
| "92": "ninety-two", | |
| "93": "ninety-three", | |
| "94": "ninety-four", | |
| "95": "ninety-five", | |
| "96": "ninety-six", | |
| "97": "ninety-seven", | |
| "98": "ninety-eight", | |
| "99": "ninety-nine", | |
| "100": "hundred", | |
| "1000": "thousand", | |
| "100000": "lac", | |
| "10000000": "crore", | |
| "1000000000": "arab", | |
| } # English-India | |
| num_dict["hi"] = { | |
| "0": "शून्य", | |
| "1": "एक", | |
| "2": "दो", | |
| "3": "तीन", | |
| "4": "चार", | |
| "5": "पाँच", | |
| "6": "छः", | |
| "7": "सात", | |
| "8": "आठ", | |
| "9": "नौ", | |
| "10": "दस", | |
| "11": "ग्यारह", | |
| "12": "बारह", | |
| "13": "तेरह", | |
| "14": "चौदह", | |
| "15": "पंद्रह", | |
| "16": "सोलह", | |
| "17": "सत्रह", | |
| "18": "अट्ठारह", | |
| "19": "उन्नीस", | |
| "20": "बीस", | |
| "21": "इक्कीस", | |
| "22": "बाईस", | |
| "23": "तेईस", | |
| "24": "चौबिस", | |
| "25": "पच्चीस", | |
| "26": "छब्बीस", | |
| "27": "सत्ताईस", | |
| "28": "अट्ठाईस", | |
| "29": "उनतीस", | |
| "30": "तीस", | |
| "31": "इकतीस", | |
| "32": "बत्तीस", | |
| "33": "तैंतीस", | |
| "34": "चौंतीस", | |
| "35": "पैंतीस", | |
| "36": "छत्तीस", | |
| "37": "सैंतीस", | |
| "38": "अड़तीस", | |
| "39": "उनतालीस", | |
| "40": "चालीस", | |
| "41": "इकतालीस", | |
| "42": "बयालीस", | |
| "43": "तैंतालीस", | |
| "44": "चौंतालीस", | |
| "45": "पैंतालीस", | |
| "46": "छियालीस", | |
| "47": "सैंतालीस", | |
| "48": "अड़तालीस", | |
| "49": "उनचास", | |
| "50": "पचास", | |
| "51": "इक्यावन", | |
| "52": "बावन", | |
| "53": "तिरेपन", | |
| "54": "चौवन", | |
| "55": "पचपन", | |
| "56": "छप्पन", | |
| "57": "सत्तावन", | |
| "58": "अट्ठावन", | |
| "59": "उनसठ", | |
| "60": "साठ", | |
| "61": "इकसठ", | |
| "62": "बासठ", | |
| "63": "तिरेसठ", | |
| "64": "चौंसठ", | |
| "65": "पैंसठ", | |
| "66": "छयासठ", | |
| "67": "सरसठ", | |
| "68": "अड़सठ", | |
| "69": "उनहत्तर", | |
| "70": "सत्तर", | |
| "71": "इकहत्तर", | |
| "72": "बहत्तर", | |
| "73": "तिहत्तर", | |
| "74": "चौहत्तर", | |
| "75": "पचहत्तर", | |
| "76": "छिहत्तर", | |
| "77": "सतहत्तर", | |
| "78": "अठहत्तर", | |
| "79": "उन्यासी", | |
| "80": "अस्सी", | |
| "81": "इक्यासी", | |
| "82": "बयासी", | |
| "83": "तिरासी", | |
| "84": "चौरासी", | |
| "85": "पचासी", | |
| "86": "छियासी", | |
| "87": "सत्तासी", | |
| "88": "अठासी", | |
| "89": "नवासी", | |
| "90": "नब्बे", | |
| "91": "इक्यानवे", | |
| "92": "बानवे", | |
| "93": "तिरानवे", | |
| "94": "चौरानवे", | |
| "95": "पचानवे", | |
| "96": "छियानवे", | |
| "97": "सत्तानवे", | |
| "98": "अट्ठानवे", | |
| "99": "निन्यानवे", | |
| "100": "सौ", | |
| "1000": "हज़ार", | |
| "100000": "लाख", | |
| "10000000": "करोड़", | |
| "1000000000": "अरब", | |
| } # Hindi | |
| num_dict["gu"] = { | |
| "0": "શૂન્ય", | |
| "1": "એક", | |
| "2": "બે", | |
| "3": "ત્રણ", | |
| "4": "ચાર", | |
| "5": "પાંચ", | |
| "6": "છ", | |
| "7": "સાત", | |
| "8": "આઠ", | |
| "9": "નવ", | |
| "10": "દસ", | |
| "11": "અગિયાર", | |
| "12": "બાર", | |
| "13": "તેર", | |
| "14": "ચૌદ", | |
| "15": "પંદર", | |
| "16": "સોળ", | |
| "17": "સત્તર", | |
| "18": "અઢાર", | |
| "19": "ઓગણિસ", | |
| "20": "વીસ", | |
| "21": "એકવીસ", | |
| "22": "બાવીસ", | |
| "23": "તેવીસ", | |
| "24": "ચોવીસ", | |
| "25": "પચ્ચીસ", | |
| "26": "છવીસ", | |
| "27": "સત્તાવીસ", | |
| "28": "અઠ્ઠાવીસ", | |
| "29": "ઓગણત્રીસ", | |
| "30": "ત્રીસ", | |
| "31": "એકત્રીસ", | |
| "32": "બત્રીસ", | |
| "33": "તેત્રીસ", | |
| "34": "ચોત્રીસ", | |
| "35": "પાંત્રીસ", | |
| "36": "છત્રીસ", | |
| "37": "સડત્રીસ", | |
| "38": "અડત્રીસ", | |
| "39": "ઓગણચાલીસ", | |
| "40": "ચાલીસ", | |
| "41": "એકતાલીસ", | |
| "42": "બેતાલીસ", | |
| "43": "ત્રેતાલીસ", | |
| "44": "ચુંમાલીસ", | |
| "45": "પિસ્તાલીસ", | |
| "46": "છેતાલીસ", | |
| "47": "સુડતાલીસ", | |
| "48": "અડતાલીસ", | |
| "49": "ઓગણપચાસ", | |
| "50": "પચાસ", | |
| "51": "એકાવન", | |
| "52": "બાવન", | |
| "53": "ત્રેપન", | |
| "54": "ચોપન", | |
| "55": "પંચાવન", | |
| "56": "છપ્પન", | |
| "57": "સત્તાવન", | |
| "58": "અઠ્ઠાવન", | |
| "59": "ઓગણસાઠ", | |
| "60": "સાઈઠ", | |
| "61": "એકસઠ", | |
| "62": "બાસઠ", | |
| "63": "ત્રેસઠ", | |
| "64": "ચોસઠ", | |
| "65": "પાંસઠ", | |
| "66": "છાસઠ", | |
| "67": "સડસઠ", | |
| "68": "અડસઠ", | |
| "69": "અગણોસિત્તેર", | |
| "70": "સિત્તેર", | |
| "71": "એકોતેર", | |
| "72": "બોતેર", | |
| "73": "તોતેર", | |
| "74": "ચુમોતેર", | |
| "75": "પંચોતેર", | |
| "76": "છોતેર", | |
| "77": "સિત્યોતેર", | |
| "78": "ઇઠ્યોતેર", | |
| "79": "ઓગણાએંસી", | |
| "80": "એંસી", | |
| "81": "એક્યાસી", | |
| "82": "બ્યાસી", | |
| "83": "ત્યાસી", | |
| "84": "ચોર્યાસી", | |
| "85": "પંચાસી", | |
| "86": "છ્યાસી", | |
| "87": "સિત્યાસી", | |
| "88": "ઈઠ્યાસી", | |
| "89": "નેવ્યાસી", | |
| "90": "નેવું", | |
| "91": "એકાણું", | |
| "92": "બાણું", | |
| "93": "ત્રાણું", | |
| "94": "ચોરાણું", | |
| "95": "પંચાણું", | |
| "96": "છન્નું", | |
| "97": "સત્તાણું", | |
| "98": "અઠ્ઠાણું", | |
| "99": "નવ્વાણું", | |
| "100": "સો", | |
| "1000": "હજાર", | |
| "100000": "લાખ", | |
| "1000000": "દસ લાખ", | |
| "10000000": "કરોડ઼", | |
| } # Gujarati | |
| num_dict["mr"] = { | |
| "0": "शून्य", | |
| "1": "एक", | |
| "2": "दोन", | |
| "3": "तीन", | |
| "4": "चार", | |
| "5": "पाच", | |
| "6": "सहा", | |
| "7": "सात", | |
| "8": "आठ", | |
| "9": "नऊ", | |
| "10": "दहा", | |
| "11": "अकरा", | |
| "12": "बारा", | |
| "13": "तेरा", | |
| "14": "चौदा", | |
| "15": "पंधरा", | |
| "16": "सोळा", | |
| "17": "सतरा", | |
| "18": "अठरा", | |
| "19": "एकोणीस", | |
| "20": "वीस", | |
| "21": "एकवीस", | |
| "22": "बावीस", | |
| "23": "तेवीस", | |
| "24": "चोवीस", | |
| "25": "पंचवीस", | |
| "26": "सव्वीस", | |
| "27": "सत्तावीस", | |
| "28": "अठ्ठावीस", | |
| "29": "एकोणतीस", | |
| "30": "तीस", | |
| "31": "एकतीस", | |
| "32": "बत्तीस", | |
| "33": "तेहेतीस", | |
| "34": "चौतीस", | |
| "35": "पस्तीस", | |
| "36": "छत्तीस", | |
| "37": "सदतीस", | |
| "38": "अडतीस", | |
| "39": "एकोणचाळीस", | |
| "40": "चाळीस", | |
| "41": "एक्केचाळीस", | |
| "42": "बेचाळीस", | |
| "43": "त्रेचाळीस", | |
| "44": "चव्वेचाळीस", | |
| "45": "पंचेचाळीस", | |
| "46": "सेहेचाळीस", | |
| "47": "सत्तेचाळीस", | |
| "48": "अठ्ठेचाळीस", | |
| "49": "एकोणपन्नास", | |
| "50": "पन्नास", | |
| "51": "एक्कावन्न", | |
| "52": "बावन्न", | |
| "53": "त्रेपन्न", | |
| "54": "चोपन्न", | |
| "55": "पंचावन्न", | |
| "56": "छप्पन्न", | |
| "57": "सत्तावन्न", | |
| "58": "अठ्ठावन्न", | |
| "59": "एकोणसाठ", | |
| "60": "साठ", | |
| "61": "एकसष्ठ", | |
| "62": "बासष्ठ", | |
| "63": "त्रेसष्ठ", | |
| "64": "चौसष्ठ", | |
| "65": "पासष्ठ", | |
| "66": "सहासष्ठ", | |
| "67": "सदुसष्ठ", | |
| "68": "अडुसष्ठ", | |
| "69": "एकोणसत्तर", | |
| "70": "सत्तर", | |
| "71": "एक्काहत्तर", | |
| "72": "बाहत्तर", | |
| "73": "त्र्याहत्तर", | |
| "74": "चौर्याहत्तर", | |
| "75": "पंच्याहत्तर", | |
| "76": "शहात्तर", | |
| "77": "सत्याहत्तर", | |
| "78": "अठ्ठ्याहत्तर", | |
| "79": "एकोण ऐंशी", | |
| "80": "ऐंशी", | |
| "81": "एक्क्याऐंशी", | |
| "82": "ब्याऐंशी", | |
| "83": "त्र्याऐंशी", | |
| "84": "चौऱ्याऐंशी", | |
| "85": "पंच्याऐंशी", | |
| "86": "शहाऐंशी", | |
| "87": "सत्त्याऐंशी", | |
| "88": "अठ्ठ्याऐंशी", | |
| "89": "एकोणनव्वद", | |
| "90": "नव्वद", | |
| "91": "एक्क्याण्णव", | |
| "92": "ब्याण्णव", | |
| "93": "त्र्याण्णव", | |
| "94": "चौऱ्याण्णव", | |
| "95": "पंच्याण्णव", | |
| "96": "शहाण्णव", | |
| "97": "सत्त्याण्णव", | |
| "98": "अठ्ठ्याण्णव", | |
| "99": "नव्व्याण्णव", | |
| "100": "शे", | |
| "1000": "हजार", | |
| "100000": "लाख", | |
| "10000000": "कोटी", | |
| "1000000000": "अब्ज", | |
| } # Marathi | |
| num_dict["bn"] = { | |
| "0": "শূন্য", | |
| "1": "এক", | |
| "2": "দুই", | |
| "3": "তিন", | |
| "4": "চার", | |
| "5": "পাঁচ", | |
| "6": "ছয়", | |
| "7": "সাত", | |
| "8": "আট", | |
| "9": "নয়", | |
| "10": "দশ", | |
| "11": "এগার", | |
| "12": "বার", | |
| "13": "তের", | |
| "14": "চৌদ্দ", | |
| "15": "পনের", | |
| "16": "ষোল", | |
| "17": "সতের", | |
| "18": "আঠার", | |
| "19": "ঊনিশ", | |
| "20": "বিশ", | |
| "21": "একুশ", | |
| "22": "বাইশ", | |
| "23": "তেইশ", | |
| "24": "চব্বিশ", | |
| "25": "পঁচিশ", | |
| "26": "ছাব্বিশ", | |
| "27": "সাতাশ", | |
| "28": "আঠাশ", | |
| "29": "ঊনত্রিশ", | |
| "30": "ত্রিশ", | |
| "31": "একত্রিশ", | |
| "32": "বত্রিশ", | |
| "33": "তেত্রিশ", | |
| "34": "চৌত্রিশ", | |
| "35": "পঁয়ত্রিশ", | |
| "36": "ছত্রিশ", | |
| "37": "সাঁইত্রিশ", | |
| "38": "আটত্রিশ", | |
| "39": "ঊনচল্লিশ", | |
| "40": "চল্লিশ", | |
| "41": "একচল্লিশ", | |
| "42": "বিয়াল্লিশ", | |
| "43": "তেতাল্লিশ", | |
| "44": "চুয়াল্লিশ", | |
| "45": "পঁয়তাল্লিশ", | |
| "46": "ছেচল্লিশ", | |
| "47": "সাতচল্লিশ", | |
| "48": "আটচল্লিশ", | |
| "49": "ঊনপঞ্চাশ", | |
| "50": "পঞ্চাশ", | |
| "51": "একান্ন", | |
| "52": "বায়ান্ন", | |
| "53": "তিপ্পান্ন", | |
| "54": "চুয়ান্ন", | |
| "55": "পঞ্চান্ন", | |
| "56": "ছাপ্পান্ন", | |
| "57": "সাতান্ন", | |
| "58": "আটান্ন", | |
| "59": "ঊনষাট", | |
| "60": "ষাট", | |
| "61": "একষট্টি", | |
| "62": "বাষট্টি", | |
| "63": "তেষট্টি", | |
| "64": "চৌষট্টি", | |
| "65": "পঁয়ষট্টি", | |
| "66": "ছেষট্টি", | |
| "67": "সাতষট্টি", | |
| "68": "আটষট্টি", | |
| "69": "ঊনসত্তর", | |
| "70": "সত্তর", | |
| "71": "একাত্তর", | |
| "72": "বাহাত্তর", | |
| "73": "তিয়াত্তর", | |
| "74": "চুয়াত্তর", | |
| "75": "পঁচাত্তর", | |
| "76": "ছিয়াত্তর", | |
| "77": "সাতাত্তর", | |
| "78": "আটাত্তর", | |
| "79": "ঊনআশি", | |
| "80": "আশি", | |
| "81": "একাশি", | |
| "82": "বিরাশি", | |
| "83": "তিরাশি", | |
| "84": "চুরাশি", | |
| "85": "পঁচাশি", | |
| "86": "ছিয়াশি", | |
| "87": "সাতাশি", | |
| "88": "আটাশি", | |
| "89": "ঊননব্বই", | |
| "90": "নব্বই", | |
| "91": "একানব্বই", | |
| "92": "বিরানব্বই", | |
| "93": "তিরানব্বই", | |
| "94": "চুরানব্বই", | |
| "95": "পঁচানব্বই", | |
| "96": "ছিয়ানব্বই", | |
| "97": "সাতানব্বই", | |
| "98": "আটানব্বই", | |
| "99": "নিরানব্বই", | |
| "100": "শো", | |
| "1000": "হাজার", | |
| "100000": "লাখ", | |
| "10000000": "কোটি", | |
| "1000000000": "একশ’ কোটি", | |
| } # Bengali | |
| num_dict["te"] = { | |
| "0": "సున్నా", | |
| "1": "ఒకటి", | |
| "2": "రెండు", | |
| "3": "మూడు", | |
| "4": "నాలుగు", | |
| "5": "ఐదు", | |
| "6": "ఆరు", | |
| "7": "ఏడు", | |
| "8": "ఎనిమిది", | |
| "9": "తొమ్మిది", | |
| "10": "పది", | |
| "11": "పదకొండు", | |
| "12": "పన్నెండు", | |
| "13": "పదమూడు", | |
| "14": "పద్నాలుగు", | |
| "15": "పదిహేను", | |
| "16": "పదహారు", | |
| "17": "పదిహేడు", | |
| "18": "పద్దెనిమిది", | |
| "19": "పందొమ్మిది", | |
| "20": "ఇరవై", | |
| "21": "ఇరవై ఒకటి", | |
| "22": "ఇరవై రెండు", | |
| "23": "ఇరవై మూడు", | |
| "24": "ఇరవై నాలుగు", | |
| "25": "ఇరవై ఐదు", | |
| "26": "ఇరవై ఆరు", | |
| "27": "ఇరవై ఏడు", | |
| "28": "ఇరవై ఎనిమిది", | |
| "29": "ఇరవై తొమ్మిది", | |
| "30": "ముప్పై", | |
| "31": "ముప్పై ఒకటి", | |
| "32": "ముప్పై రెండు", | |
| "33": "ముప్పై మూడు", | |
| "34": "ముప్పై నాలుగు", | |
| "35": "ముప్పై ఐదు", | |
| "36": "ముప్పై ఆరు", | |
| "37": "ముప్పై ఏడు", | |
| "38": "ముప్పై ఎనిమిది", | |
| "39": "ముప్పై తొమ్మిది", | |
| "40": "నలభై", | |
| "41": "నలభై ఒకటి", | |
| "42": "నలభై రెండు", | |
| "43": "నలభై మూడు", | |
| "44": "నలభై నాలుగు", | |
| "45": "నలభై ఐదు", | |
| "46": "నలభై ఆరు", | |
| "47": "నలభై ఏడు", | |
| "48": "నలభై ఎనిమిది", | |
| "49": "నలభై తొమ్మిది", | |
| "50": "యాభై", | |
| "51": "యాభై ఒకటి", | |
| "52": "యాభై రెండు", | |
| "53": "యాభై మూడు", | |
| "54": "యాభై నాలుగు", | |
| "55": "యాభై ఐదు", | |
| "56": "యాభై ఆరు", | |
| "57": "యాభై ఏడు", | |
| "58": "యాభై ఎనిమిది", | |
| "59": "యాభై తొమ్మిది", | |
| "60": "అరవై", | |
| "61": "అరవై ఒకటి", | |
| "62": "అరవై రెండు", | |
| "63": "అరవై మూడు", | |
| "64": "అరవై నాలుగు", | |
| "65": "అరవై ఐదు", | |
| "66": "అరవై ఆరు", | |
| "67": "అరవై ఏడు", | |
| "68": "అరవై ఎనిమిది", | |
| "69": "అరవై తొమ్మిది", | |
| "70": "డెబ్బై", | |
| "71": "డెబ్బై ఒకటి", | |
| "72": "డెబ్బై రెండు", | |
| "73": "డెబ్బై మూడు", | |
| "74": "డెబ్బై నాలుగు", | |
| "75": "డెబ్బై ఐదు", | |
| "76": "డెబ్బై ఆరు", | |
| "77": "డెబ్బై ఏడు", | |
| "78": "డెబ్బై ఎనిమిది", | |
| "79": "డెబ్బై తొమ్మిది", | |
| "80": "ఎనభై", | |
| "81": "ఎనభై ఒకటి", | |
| "82": "ఎనభై రెండు", | |
| "83": "ఎనభై మూడు", | |
| "84": "ఎనభై నాలుగు", | |
| "85": "ఎనభై ఐదు", | |
| "86": "ఎనభై ఆరు", | |
| "87": "ఎనభై ఏడు", | |
| "88": "ఎనభై ఎనిమిది", | |
| "89": "ఎనభై తొమ్మిది", | |
| "90": "తొంభై", | |
| "91": "తొంభై ఒకటి", | |
| "92": "తొంభై రెండు", | |
| "93": "తొంభై మూడు", | |
| "94": "తొంభై నాలుగు", | |
| "95": "తొంభై ఐదు", | |
| "96": "తొంభై ఆరు", | |
| "97": "తొంభై ఏడు", | |
| "98": "తొంభై ఎనిమిది", | |
| "99": "తొంభై తొమ్మిది", | |
| "100": "వందల", | |
| "1000": "వేల", | |
| "100000": "లక్షల", | |
| "10000000": "కోట్ల", | |
| "1000000000": "బిలియన్", | |
| } # Telugu | |
| num_dict["ta"] = { | |
| "0": "பூஜ்ஜியம்", | |
| "1": "ஒன்று", | |
| "2": "இரண்டு", | |
| "3": "மூன்று", | |
| "4": "நான்கு", | |
| "5": "ஐந்து", | |
| "6": "ஆறு", | |
| "7": "ஏழு", | |
| "8": "எட்டு", | |
| "9": "ஒன்பது", | |
| "10": "பத்து", | |
| "11": "பதினொன்று", | |
| "12": "பன்னிரண்டு", | |
| "13": "பதிமூன்று", | |
| "14": "பதினான்கு", | |
| "15": "பதினைந்து", | |
| "16": "பதினாறு", | |
| "17": "பதினேழு", | |
| "18": "பதினெட்டு", | |
| "19": "பத்தொன்பது", | |
| "20": "இருபது", | |
| "21": "இருபது ஒன்று", | |
| "22": "இருபத்து இரண்டு", | |
| "23": "இருபத்து மூன்று", | |
| "24": "இருபத்து நான்கு", | |
| "25": "இருபத்து ஐந்து", | |
| "26": "இருபத்து ஆறு", | |
| "27": "இருபத்து ஏழு", | |
| "28": "இருபத்து எட்டு", | |
| "29": "இருபத்து ஒன்பது", | |
| "30": "முப்பது", | |
| "31": "முப்பத்து ஒன்று", | |
| "32": "முப்பத்து இரண்டு", | |
| "33": "முப்பத்து மூன்று", | |
| "34": "முப்பத்து நான்கு", | |
| "35": "முப்பத்து ஐந்து", | |
| "36": "முப்பத்து ஆறு", | |
| "37": "முப்பத்து ஏழு", | |
| "38": "முப்பத்து எட்டு", | |
| "39": "முப்பத்து ஒன்பது", | |
| "40": "நாற்பது", | |
| "41": "நாற்பத்து ஒன்று", | |
| "42": "நாற்பத்து இரண்டு", | |
| "43": "நாற்பத்து மூன்று", | |
| "44": "நாற்பத்து நான்கு", | |
| "45": "நாற்பத்து ஐந்து", | |
| "46": "நாற்பத்து ஆறு", | |
| "47": " நாற்பத்து ஏழு", | |
| "48": "நாற்பத்து எட்டு", | |
| "49": "நாற்பத்து ஒன்பது", | |
| "50": "ஐம்பது", | |
| "51": "ஐம்பத்து ஒன்று", | |
| "52": "ஐம்பத்து இரண்டு", | |
| "53": "ஐம்பத்து மூன்று", | |
| "54": "ஐம்பத்து நான்கு", | |
| "55": "ஐம்பத்து ஐந்து", | |
| "56": "ஐம்பத்து ஆறு", | |
| "57": "ஐம்பத்து ஏழு", | |
| "58": "ஐம்பத்து எட்டு", | |
| "59": "ஐம்பத்து ஒன்பது", | |
| "60": "அறுபது", | |
| "61": "அறுபத்து ஒன்று", | |
| "62": "அறுபத்து இரண்டு", | |
| "63": "அறுபத்து மூன்று", | |
| "64": "அறுபத்து நான்கு", | |
| "65": "அறுபத்து ஐந்து", | |
| "66": "அறுபத்து ஆறு", | |
| "67": "அறுபத்து ஏழு", | |
| "68": "அறுபத்து எட்டு", | |
| "69": "அறுபத்து ஒன்பது", | |
| "70": "எழுபது", | |
| "71": "எழுபத்தி ஒன்று", | |
| "72": "எழுபத்தி இரண்டு", | |
| "73": "எழுபத்தி முச்சக்கர", | |
| "74": "எழுபத்தி நான்கு", | |
| "75": "எழுபத்தி ஐந்து", | |
| "76": "எழுபத்தி ஆறு", | |
| "77": "எழுபத்தி ஏழு", | |
| "78": "எழுபத்தி எட்டு", | |
| "79": "எழுபத்தி ஒன்பது", | |
| "80": "எண்பது", | |
| "81": "எண்பத்தியொன்று", | |
| "82": "எண்பத்திரண்டு", | |
| "83": "எண்பத்திமூன்று", | |
| "84": "என்பதினான்கு", | |
| "85": "என்பதினைந்து", | |
| "86": "எண்பத்திஆறு", | |
| "87": "எண்பத்திஏழு", | |
| "88": "எண்பத்தியெட்டு", | |
| "89": "எண்பத்தியொன்பது", | |
| "90": "தொன்னூறு", | |
| "91": "தொண்ணூற்றியொன்று", | |
| "92": "தொண்ணூற்றிரண்டு", | |
| "93": "தொண்ணூற்றிமூன்று", | |
| "94": "தொண்ணூற்றிநான்கு", | |
| "95": "தொண்ணூற்றிஐந்து", | |
| "96": "தொண்ணூற்றியாறு", | |
| "97": "தொண்ணூற்றியேழு", | |
| "98": "தொண்ணூற்றியெட்டு", | |
| "99": "தொண்ணூற்றிஒன்பது", | |
| "100": "நூறு", | |
| "1000": "ஆயிரம்", | |
| "100000": "இலட்சம்", | |
| "10000000": "கோடி", | |
| "1000000000": "பில்லியன்", | |
| } # Tamil | |
| num_dict["kn"] = { | |
| "0": "ಸೊನ್ನೆ", | |
| "1": "ಒಂದು", | |
| "2": "ಎರಡು", | |
| "3": "ಮೂರು", | |
| "4": "ನಾಲ್ಕು", | |
| "5": "ಅಯ್ದು", | |
| "6": "ಆರು", | |
| "7": "ಏಳು", | |
| "8": "ಎಂಟು", | |
| "9": "ಒಂಬತ್ತು", | |
| "10": "ಹತ್ತು", | |
| "11": "ಹನ್ನೊಂದು", | |
| "12": "ಹನ್ನೆರಡು", | |
| "13": "ಹದಿಮೂರು", | |
| "14": "ಹದಿನಾಲ್ಕು", | |
| "15": "ಹದಿನೈದು", | |
| "16": "ಹದಿನಾರು", | |
| "17": "ಹದಿನೇಳು", | |
| "18": "ಹದಿನೆಂಟು", | |
| "19": "ಹತ್ತೊಂಬತ್ತು", | |
| "20": "ಇಪ್ಪತ್ತು", | |
| "21": "ಇಪ್ಪತ್ತ್’ಒಂದು", | |
| "22": "ಇಪ್ಪತ್ತ್’ಎರಡು", | |
| "23": "ಇಪ್ಪತ್ತ್’ಮೂರು", | |
| "24": "ಇಪ್ಪತ್ತ್’ನಾಲ್ಕು", | |
| "25": "ಇಪ್ಪತ್ತ್’ಐದು", | |
| "26": "ಇಪ್ಪತ್ತ್’ಆರು", | |
| "27": "ಇಪ್ಪತ್ತ್’ಏಳು", | |
| "28": "ಇಪ್ಪತ್ತ್’ಎಂಟು", | |
| "29": "ಇಪ್ಪತ್ತ್’ಒಂಬತ್ತು", | |
| "30": "ಮೂವತ್ತು", | |
| "31": "ಮುವತ್ತ್’ಒಂದು", | |
| "32": "ಮುವತ್ತ್’ಎರಡು", | |
| "33": "ಮುವತ್ತ್’ಮೂರು", | |
| "34": "ಮೂವತ್ತ್’ನಾಲ್ಕು", | |
| "35": "ಮೂವತ್ತ್’ಐದು", | |
| "36": "ಮೂವತ್ತ್’ಆರು", | |
| "37": "ಮೂವತ್ತ್’ಏಳು", | |
| "38": "ಮೂವತ್ತ್’ಎಂಟು", | |
| "39": "ಮೂವತ್ತ್’ಒಂಬತ್ತು", | |
| "40": "ನಲವತ್ತು", | |
| "41": "ನಲವತ್ತೊಂದು", | |
| "42": "ನಲವತ್ತ್ ಎರಡು", | |
| "43": "ನಲವತ್ತ್ ಮೂರು", | |
| "44": "ನಲವತ್ತ್ ನಾಲ್ಕು", | |
| "45": "ನಲವತ್ತೈದು", | |
| "46": "ನಲವತ್ತಾರು", | |
| "47": "ನಲವತ್ತೇಳು", | |
| "48": "ನಲವತ್ತೆಂಟು", | |
| "49": "ನಲವತ್ತೊಂಬತ್ತು", | |
| "50": "ಐವತ್ತು", | |
| "51": "ಐವತ್ತೊಂದು", | |
| "52": "ಐವತ್ತೆರಡು", | |
| "53": "ಐವತ್ತಮೂರು", | |
| "54": "ಐವತ್ತ್ನಾಲ್ಕು", | |
| "55": "ಐವತ್ತೈದು", | |
| "56": "ಐವತ್ತಾರು", | |
| "57": "ಐವತ್ತೇಳು", | |
| "58": "ಐವತ್ತೆಂಟು", | |
| "59": "ಐವತ್ತೊಂಬತ್ತು", | |
| "60": "ಅರವತ್ತು", | |
| "61": "ಅರವತ್ತೊಂದು", | |
| "62": "ಅರವತ್ತೆರಡು", | |
| "63": "ಅರವತ್ತ್ ಮೂರು", | |
| "64": "ಅರವತ್ತ್ ನಾಲ್ಕು", | |
| "65": "ಅರವತ್ತೈದು", | |
| "66": "ಅರವತ್ತಾರು", | |
| "67": "ಅರವತ್ತೇಳು", | |
| "68": "ಅರವತ್ತೆಂಟು", | |
| "69": "ಅರವತ್ತೊಂಬತ್ತು", | |
| "70": "ಎಪ್ಪತ್ತು", | |
| "71": "ಎಪ್ಪತ್ತೊಂದು", | |
| "72": "ಎಪ್ಪತ್ತೆರಡು", | |
| "73": "ಎಪ್ಪತ್ತ್ ಮೂರು", | |
| "74": "ಎಪ್ಪತ್ತ್ ನಾಲ್ಕು", | |
| "75": "ಎಪ್ಪತ್ತೈದು", | |
| "76": "ಎಪ್ಪತ್ತಾರು", | |
| "77": "ಎಪ್ಪತ್ತೇಳು", | |
| "78": "ಎಪ್ಪತ್ತೆಂಟು", | |
| "79": "ಎಪ್ಪತ್ತೊಂಬತ್ತು", | |
| "80": "ಎಂಬತ್ತು", | |
| "81": "ಎಂಬತ್ತೊಂದು", | |
| "82": "ಎಂಬತ್ತೆರಡು", | |
| "83": "ಎಂಬತ್ತ್ ಮೂರು", | |
| "84": "ಎಂಬತ್ತ್ ನಾಲ್ಕು", | |
| "85": "ಎಂಬತ್ತೈದು", | |
| "86": "ಎಂಬತ್ತಾರು", | |
| "87": "ಎಂಬತ್ತೇಳು", | |
| "88": "ಎಂಬತ್ತೆಂಟು", | |
| "89": "ಎಂಬತ್ತೊಂಬತ್ತು", | |
| "90": "ತೊಂಬತ್ತು", | |
| "91": "ತೊಂಬತ್ತೊಂದು", | |
| "92": "ತೊಂಬತ್ತೆರಡು", | |
| "93": "ತೊಂಬತ್ತ ಮೂರು", | |
| "94": "ತೊಂಬತ್ತ ನಾಲ್ಕು", | |
| "95": "ತೊಂಬತ್ತೈದು", | |
| "96": "ತೊಂಬತ್ತಾರು", | |
| "97": "ತೊಂಬತ್ತೇಳು", | |
| "98": "ತೊಂಬತ್ತೆಂಟು", | |
| "99": "ತೊಂಬತ್ತೊಂಬತ್ತು", | |
| "100": "ನೂರ", | |
| "1000": "ಸಾವಿರದ", | |
| "100000": "ಲಕ್ಷದ", | |
| "10000000": "ಕೋಟಿ", | |
| "1000000000": "ಶತಕೋಟಿ", | |
| } # Kannada | |
| num_dict["or"] = { | |
| "0": "ଶୁନ୍ୟ", | |
| "1": "ଏକ", | |
| "2": "ଦୁଇ", | |
| "3": "ତିନି", | |
| "4": "ଚାରି", | |
| "5": "ପାଞ୍ଚ", | |
| "6": "ଛଅ", | |
| "7": "ସାତ", | |
| "8": "ଆଠ", | |
| "9": "ନଅ", | |
| "10": "ନଅ", | |
| "11": "ଏଗାର", | |
| "12": "ବାର", | |
| "13": "ତେର", | |
| "14": "ଚଉଦ", | |
| "15": "ପନ୍ଦର", | |
| "16": "ଷୋହଳ", | |
| "17": "ସତର", | |
| "18": "ଅଠର", | |
| "19": "ଊଣାଇଶ", | |
| "20": "କୋଡିଏ", | |
| "21": "ଏକୋଇଶି", | |
| "22": "ବାଇଶି", | |
| "23": "ତେଇଶି", | |
| "24": "ଚବିଶି", | |
| "25": "ପଚିଶି", | |
| "26": "ଛବିଶି", | |
| "27": "ସତାଇଶି", | |
| "28": "ଅଠାଇଶି", | |
| "29": "ଅଣତିରିଶି", | |
| "30": "ତିରିଶି", | |
| "31": "ଏକତିରିଶି", | |
| "32": "ବତିଶି", | |
| "33": "ତେତିଶି", | |
| "34": "ଚଉତିରିଶି", | |
| "35": "ପଞ୍ଚତିରିଶି", | |
| "36": "ଛତିଶି", | |
| "37": "ସଂଇତିରିଶି", | |
| "38": "ଅଠତିରିଶି", | |
| "39": "ଅଣଚାଳିଶି", | |
| "40": "ଚାଳିଶି", | |
| "41": "ଏକଚାଳିଶି", | |
| "42": "ବୟାଳିଶି", | |
| "43": "ତେୟାଳିଶି", | |
| "44": "ଚଉରାଳିଶି", | |
| "45": "ପଞ୍ଚଚାଳିଶି", | |
| "46": "ଛୟାଳିଶି", | |
| "47": "ସତଚାଳିଶି", | |
| "48": "ଅଠଚାଳିଶି", | |
| "49": "ଅଣଚାଶ", | |
| "50": "ପଚାଶ", | |
| "51": "ଏକାବନ", | |
| "52": "ବାଉନ", | |
| "53": "ତେପନ", | |
| "54": "ଚଉବନ", | |
| "55": "ପଞ୍ଚାବନ", | |
| "56": "ଛପନ", | |
| "57": "ସତାବନ", | |
| "58": "ଅଠାବନ", | |
| "59": "ଅଣଷଠି", | |
| "60": "ଷାଠିଏ", | |
| "61": "ଏକଷଠି", | |
| "62": "ବାଷଠି", | |
| "63": "ତେଷଠି", | |
| "64": "ଚଉଷଠି", | |
| "65": "ପଞ୍ଚଷଠି", | |
| "66": "ଛଅଷଠି", | |
| "67": "ସତଷଠି", | |
| "68": "ଅଠଷଠି", | |
| "69": "ଅଣସ୍ତରୀ", | |
| "70": "ସତୂରୀ", | |
| "71": "ଏକସ୍ତରୀ", | |
| "72": "ବାସ୍ତରୀ", | |
| "73": "ତେସ୍ତରୀ", | |
| "74": "ଚଉସ୍ତରୀ", | |
| "75": "ପଞ୍ଚସ୍ତରୀ", | |
| "76": "ଛଅସ୍ତରୀ", | |
| "77": "ସତସ୍ତରୀ", | |
| "78": "ଅଠସ୍ତରୀ", | |
| "79": "ଅଣାଅଶୀ", | |
| "80": "ଅଶୀ", | |
| "81": "ଏକାଅଶୀ", | |
| "82": "ବୟାଅଶୀ", | |
| "83": "ତେୟାଅଶୀ", | |
| "84": "ଚଉରାଅଶୀ", | |
| "85": "ପଞ୍ଚାଅଶୀ", | |
| "86": "ଛୟାଅଶୀ", | |
| "87": "ସତାଅଶୀ", | |
| "88": "ଅଠାଅଶୀ", | |
| "89": "ଅଣାନବେ", | |
| "90": "ନବେ", | |
| "91": "ଏକାନବେ", | |
| "92": "ବୟାନବେ", | |
| "93": "ତେୟାନବେ", | |
| "94": "ଚଉରାନବେ", | |
| "95": "ପଞ୍ଚାନବେ", | |
| "96": "ଛୟାନବେ", | |
| "97": "ସତାନବେ", | |
| "98": "ଅଠାନବେ", | |
| "99": "ଅନେଶତ", | |
| "100": "ଶହେ", | |
| "1000": "ହଜାର", | |
| "100000": "ଲକ୍ଷ", | |
| "10000000": "କୋଟି", | |
| "1000000000": "କୋଟି", | |
| } # Oriya | |
| num_dict["pa"] = { | |
| "0": "ਸਿਫਰ ", | |
| "1": "ਇੱਕ", | |
| "2": "ਦੋ", | |
| "3": "ਤਿੰਨ", | |
| "4": "ਚਾਰ", | |
| "5": "ਪੰਜ", | |
| "6": "ਛੇ", | |
| "7": "ਸੱਤ", | |
| "8": "ਅੱਠ", | |
| "9": "ਨੌਂ", | |
| "10": "ਦੱਸ", | |
| "11": "ਗਿਆਰਾਂ", | |
| "12": "ਬਾਰਾਂ", | |
| "13": "ਤੇਰਾਂ", | |
| "14": "ਚੌਦਾਂ", | |
| "15": "ਪੰਦਰਾਂ", | |
| "16": "ਸੋਲ਼ਾਂ", | |
| "17": "ਸਤਾਰਾਂ", | |
| "18": "ਅਠਾਰਾਂ", | |
| "19": "ਉਨੀ", | |
| "20": "ਵੀਹ", | |
| "21": "ਇੱਕੀ", | |
| "22": "ਬਾਈ", | |
| "23": "ਤੇਈ", | |
| "24": "ਚੌਵੀ", | |
| "25": "ਪੰਝੀ", | |
| "26": "ਛੱਬੀ", | |
| "27": "ਸਤਾਈ", | |
| "28": "ਅਠਾਈ", | |
| "29": "ਉਨੱਤੀ", | |
| "30": "ਤੀਹ", | |
| "31": "ਇਕੱਤੀ", | |
| "32": "ਬੱਤੀ", | |
| "33": "ਤੇਤੀ", | |
| "34": "ਚੌਂਤੀ", | |
| "35": "ਪੈਂਤੀ", | |
| "36": "ਛੱਤੀ", | |
| "37": "ਸੈਂਤੀ", | |
| "38": "ਅਠੱਤੀ", | |
| "39": "ਉਨਤਾਲੀ", | |
| "40": "ਚਾਲੀ", | |
| "41": "ਇਕਤਾਲੀ", | |
| "42": "ਬਤਾਲੀ", | |
| "43": "ਤਰਤਾਲੀ", | |
| "44": "ਚੌਤਾਲੀ", | |
| "45": "ਪੰਜਤਾਲੀ", | |
| "46": "ਛਿਆਲੀ", | |
| "47": "ਸੰਤਾਲੀ", | |
| "48": "ਅੱਠਤਾਲੀ", | |
| "49": "ਉਣਿੰਜਾ", | |
| "50": "ਪੰਜਾਹ", | |
| "51": "ਇਕਵਿੰਜਾ", | |
| "52": "ਬਵਿੰਜਾ", | |
| "53": "ਤਰਵਿੰਜਾ", | |
| "54": "ਚਰਿੰਜਾ", | |
| "55": "ਪਚਵਿੰਜਾ", | |
| "56": "ਛਪਿੰਜਾ", | |
| "57": "ਸਤਵਿੰਜਾ", | |
| "58": "ਅੱਠਵਿੰਜਾ", | |
| "59": "ਉਣਾਠ", | |
| "60": "ਸੱਠ", | |
| "61": "ਇਕਾਠ", | |
| "62": "ਬਾਠ੍ਹ", | |
| "63": "ਤਰੇਠ੍ਹ", | |
| "64": "ਚੌਠ੍ਹ", | |
| "65": "ਪੈਂਠ", | |
| "66": "ਛਿਆਠ", | |
| "67": "ਸਤਾਹਠ", | |
| "68": "ਅੱਠਾਠ", | |
| "69": "ਉਣੱਤਰ", | |
| "70": "ਸੱਤਰ", | |
| "71": "ਇਕ੍ਹੱਤਰ", | |
| "72": "ਬਹੱਤਰ", | |
| "73": "ਤਹੱਤਰ", | |
| "74": "ਚੌਹੱਤਰ", | |
| "75": "ਪੰਜੱਤਰ", | |
| "76": "ਛਿਹੱਤਰ", | |
| "77": "ਸਤੱਤਰ", | |
| "78": "ਅਠੱਤਰ", | |
| "79": "ਉਣਾਸੀ", | |
| "80": "ਅੱਸੀ", | |
| "81": "ਇਕਾਸੀ", | |
| "82": "ਬਿਆਸੀ", | |
| "83": "ਤਰਾਸੀ", | |
| "84": "ਚਰਾਸੀ", | |
| "85": "ਪੰਜਾਸੀ", | |
| "86": "ਛਿਆਸੀ", | |
| "87": "ਸਤਾਸੀ", | |
| "88": "ਅਠਾਸੀ", | |
| "89": "ਉਣਾਨਵੇਂ", | |
| "90": "ਨੱਬੇ", | |
| "91": "ਇਕਾਨਵੇਂ", | |
| "92": "ਬਿਆਨਵੇਂ", | |
| "93": "ਤਰਾਨਵੇਂ", | |
| "94": "ਚਰਾਨਵੇਂ", | |
| "95": "ਪਚਾਨਵੇਂ", | |
| "96": "ਛਿਆਨਵੇਂ", | |
| "97": "ਸਤਾਨਵੇਂ", | |
| "98": "ਅਠਾਨਵੇਂ", | |
| "99": "ਨਿੜਾਨਵੇਂ", | |
| "100": "ਸੌ", | |
| "1000": "ਹਜਾਰ", | |
| "100000": "ਲੱਖ", | |
| "10000000": "ਕਰੋੜ", | |
| "1000000000": "ਅਰਬ", | |
| } # Punjabi | |
| # --------------------------- num_to_word.py ------------------------------ | |
| """ | |
| Method to convert Numbers to Words | |
| for indian languages | |
| Use cases:- | |
| 1) Speech recognition pre-processing | |
| 2) Language modeling Data pre-processing | |
| ------------------------- | |
| check indic_numbers.py to add support | |
| for any indian language | |
| """ | |
| def language_specific_exception(words, lang, combiner): | |
| """ | |
| Language Specific Exception will come here | |
| """ | |
| def occurs_at_end(piece): | |
| return words[-len(piece) :] == piece | |
| if lang == "mr": | |
| words = words.replace("एक" + combiner + "शे", "शंभर") | |
| elif lang == "gu": | |
| words = words.replace("બે" + combiner + "સો", "બસ્સો") | |
| elif lang == "te": | |
| exception_dict = { | |
| "1": "ఒక", | |
| "100": "వంద", | |
| "100+": "వందలు", | |
| "1000": "వెయ్యి", | |
| "1000+": "వేలు", | |
| "100000": "లక్ష", | |
| "100000+": "లక్షలు", | |
| "10000000": "కోటి", | |
| "10000000+": "కోట్లు", | |
| } | |
| test_case = ["100", "1000", "100000", "10000000"] | |
| for test in test_case: | |
| test_word = num_dict["te"][test] | |
| match = num_dict["te"]["1"] + combiner + test_word | |
| # for numbers like : 100, 1000, 100000 | |
| if words == match: | |
| return exception_dict[test] | |
| # for numbers like : 200, 4000, 800000 | |
| elif occurs_at_end(test_word): | |
| words = words.replace(test_word, exception_dict[test + "+"]) | |
| # for numbers like : 105, 1076, 123993 | |
| elif not occurs_at_end(match): | |
| replacement = exception_dict["1"] + combiner + exception_dict[test] | |
| words = words.replace(match, replacement) | |
| # Exception case for 101...199 | |
| special_case = "ఒక" + combiner + "వంద" | |
| words = words.replace(special_case, "నూట") | |
| elif lang == "kn": | |
| # special case for 100 | |
| if words == ("ಒಂದು" + combiner + "ನೂರ"): | |
| return "ನೂರು" | |
| exception_dict = { | |
| "ನೂರ": "ನೂರು", | |
| "ಸಾವಿರದ": "ಸಾವಿರ", | |
| "ಲಕ್ಷದ": "ಲಕ್ಷ", | |
| "ಕೋಟಿಯ": "ಕೋಟಿ", | |
| } | |
| for expt in exception_dict: | |
| if occurs_at_end(expt): | |
| words = words.replace(expt, exception_dict[expt]) | |
| return words | |
| def num_to_word(num, lang, separator=", ", combiner=" "): | |
| """ | |
| Main Method | |
| :param num: Number digits from any indian language | |
| :param lang: Language Code from supported Language | |
| :param separator: Separator character i.e. separator = '-' --> 'two hundred-sixty' | |
| :param combiner: combine number with position i.e. combiner = '-' --> 'two-hundred sixty' | |
| :return: UTF-8 String of numbers in words | |
| """ | |
| lang = lang.lower() | |
| num = str(num) | |
| # Load dictionary according to language code | |
| assert lang in supported_lang, "Language not supported" | |
| num_dic = num_dict[lang] | |
| # dash default combiner for english-india | |
| if (lang == "en") & (combiner == " "): | |
| combiner = "-" | |
| # Remove punctuations from numbers | |
| num = str(num).replace(",", "").replace(" ", "") | |
| # return word as it is if not number | |
| if not num.isdecimal(): | |
| return num | |
| # Replace native language numbers with english digits | |
| for language in supported_lang: | |
| for num_index in range(10): | |
| num = num.replace(all_num[language][num_index], all_num["en"][num_index]) | |
| # Assert that input contains only integer number | |
| for digit in num: | |
| assert digit in all_num["en"], "Give proper input" | |
| # Process | |
| # For Number longer than 9 digits | |
| def all_two_digit(digits_2): | |
| if len(digits_2) <= 1: # Provided only one/zero digit | |
| return num_dic.get(digits_2, "") | |
| elif digits_2 == "00": # Two Zero provided | |
| return num_dic["0"] + separator + num_dic["0"] | |
| elif digits_2[0] == "0": # First digit is zero | |
| return num_dic["0"] + separator + num_dic[digits_2[1]] | |
| else: # Both digit provided | |
| return num_dic[digits_2] | |
| # For Number less than 9 digits | |
| def two_digit(digits_2): | |
| digits_2 = digits_2.lstrip("0") | |
| if len(digits_2) != 0: | |
| return num_dic[digits_2] | |
| else: | |
| return "" | |
| def all_digit(digits): | |
| digits = digits.lstrip("0") | |
| digit_len = len(digits) | |
| if digit_len > 3: | |
| num_of_digits_to_process = (digit_len % 2) + 1 | |
| process_digits = digits[:num_of_digits_to_process] | |
| base = str(10 ** (int(digit_len / 2) * 2 - 1)) | |
| remain_digits = digits[num_of_digits_to_process:] | |
| return ( | |
| num_dic[process_digits] | |
| + combiner | |
| + num_dic[base] | |
| + separator | |
| + all_digit(remain_digits) | |
| ) | |
| elif len(digits) == 3: | |
| return ( | |
| num_dic[digits[:1]] | |
| + combiner | |
| + num_dic["100"] | |
| + separator | |
| + two_digit(digits[1:]) | |
| ) | |
| else: | |
| return two_digit(digits) | |
| num = num.lstrip("0") | |
| full_digit_len = len(num) | |
| if full_digit_len == 0: | |
| output = num_dic["0"] | |
| elif full_digit_len <= 9: | |
| output = all_digit(num) | |
| else: | |
| iteration = round(full_digit_len / 2) | |
| output = all_two_digit(num[:2]) # First to digit | |
| for i in range(1, iteration): | |
| output = ( | |
| output + separator + all_two_digit(num[i * 2 : (i + 1) * 2]) | |
| ) # Next two digit pairs | |
| remaining_digits = num[iteration * 2 :] | |
| if not all_two_digit(remaining_digits) == "": | |
| output = ( | |
| output + separator + all_two_digit(remaining_digits) | |
| ) # remaining Last one/two digits | |
| output = output.strip(separator) | |
| output = language_specific_exception(output, lang, combiner) | |
| return output | |
| # --------------------------------- num_to_word_on_a_sent --------------------------------- | |
| def is_digit(word, digit_pattern): | |
| return re.search(digit_pattern, word) | |
| def remove_punct(sent): | |
| clean = re.sub("[%s]" % re.escape(string.punctuation), " ", sent) | |
| return " ".join([word for word in clean.split() if word]) | |
| def normalize_nums(text, lang): | |
| """ | |
| text: str (eg) | |
| lang: lang code ['en', 'hi'] | |
| returns: str | |
| (eg) | |
| """ | |
| if lang in supported_lang: | |
| text = text.replace('-',' - ') # space separate hyphen | |
| words = text.split() | |
| lang_digits = [str(i) for i in range(0, 10)] | |
| digit_pattern = "[" + "".join(lang_digits) + "]" | |
| num_indices = [ | |
| ind for ind, word in enumerate(words) if is_digit(word, digit_pattern) | |
| ] | |
| words_up = [ | |
| num_to_word(word, lang, separator=" ", combiner=" ") | |
| if ind in num_indices | |
| else word | |
| for ind, word in enumerate(words) | |
| ] | |
| return " ".join(words_up) | |
| else: | |
| return text | |
| if __name__ == "__main__": | |
| print(normalize_nums("रीटा के पास 16 बिल्लियाँ हैं।", "hi")) | |