Spaces:

giyos1212
/

Help_Me_3

Paused

App Files Files Community

Help_Me_3 / app /utils /district_matcher.py

giyos1212

Upload 72 files

98b6d67 verified 26 days ago

raw

history blame contribute delete

6.52 kB

	# app/utils/district_matcher.py
	"""
	District Matcher - Noto'g'ri yozilgan tuman nomlarini topish
	Fuzzy matching ishlatiladi
	"""

	import logging
	from typing import Optional
	from difflib import SequenceMatcher

	logger = logging.getLogger(__name__)

	# Toshkent tumanlari (barcha variantlar bilan)
	# DISTRICT_VARIANTS - KO'PROQ VARIANTLAR BILAN
	DISTRICT_VARIANTS = {
	"chilonzor": [
	"chilonzor", "chilanazor", "chillonzor", "chilanzor", "chilinzor",
	"chilanzar", "chilinzar", "chilonzar", "chilanzur"
	],
	"yunusobod": [
	"yunusobod", "yunusabad", "yunusabod", "yunusobod", "iunusobod",
	"yunus obod", "yunus abad", "yunusabat", "iunusabad"
	],
	"mirzo_ulugbek": [
	"mirzo ulugbek", "mirzo ulug'bek", "mirzo ulugʻbek", "mirza ulugbek",
	"ttg", "mirzo ulug bek", "mirza ulug'bek", "ulugbek", "ulug'bek"
	],
	"shayxontohur": [
	"shayxontohur", "shayxontoxur", "shayhontohur", "shayxantoxur",
	"sayxontohur", "sheyhontoxur", "shayxon tohur", "shayxon toxur",
	"shayx tohur", "shayx toxur"
	],
	"yakkasaroy": [
	"yakkasaroy", "yakkasaray", "yakasaroy", "yakkosaroy", "iakkasaroy",
	"yakka saroy", "yakka saray", "yakkasarai"
	],
	"mirobod": [
	"mirobod", "mirabod", "mirobad", "mirabod", "mirobad",
	"mir obod", "mir abad", "mirabat"
	],
	"yashnobod": [
	"yashnobod", "yashnabad", "yeshnobod", "yashnabod", "yashnobad",
	"yash nobod", "yash nabad", "yashnabat"
	],
	"sergeli": [
	"sergeli", "sergili", "sirgeli", "sergeley", "sirgili",
	"sergel", "sergil"
	],
	"bektemir": [
	"bektemir", "bektemar", "bektimir", "bektamir", "bektemur",
	"bek temir", "bek tamir", "bektamur"
	],
	"uchtepa": [
	"uchtepa", "uchtepe", "uchtepa", "uchtipi", "uchtepo",
	"uch tepa", "uch tepe", "uchtipa"
	],
	"olmazor": [
	"olmazor", "olmazor", "almazor", "olmozor", "almazor",
	"olma zor", "alma zor", "olmazar"
	],
	"yangihayot": [
	"yangihayot", "yangihayat", "yangi hayot", "yangixayot", "yangihoyot",
	"yangi xayot", "yangi hayat", "yangihayat"
	]
	}


	def normalize_text(text: str) -> str:
	"""
	Matnni normalizatsiya qilish (kichik harf, probel olib tashlash)

	Args:
	text: Asl matn

	Returns:
	Normalized matn
	"""
	if not text:
	return ""

	# Kichik harf
	text = text.lower().strip()

	# Ko'p probellarni bitta probelga
	text = " ".join(text.split())

	# "tumani" so'zini olib tashlash
	text = text.replace(" tumani", "").replace(" tuman", "")

	return text


	def similarity_score(str1: str, str2: str) -> float:
	"""
	Ikki string orasidagi o'xshashlik (0.0 - 1.0)

	Args:
	str1: Birinchi string
	str2: Ikkinchi string

	Returns:
	Similarity score (1.0 = 100% o'xshash)
	"""
	return SequenceMatcher(None, str1, str2).ratio()


	def find_district_fuzzy(user_text: str, threshold: float = 0.5) -> Optional[str]:
	"""
	Noto'g'ri yozilgan tuman nomini topish (YAXSHILANGAN FUZZY MATCHING)

	Args:
	user_text: Bemorning kiritgan matni (masalan: "chillonzor" yoki "yunusabad")
	threshold: Minimal o'xshashlik darajasi (0.5 = 50%) ← PASTROQ!

	Returns:
	District ID (masalan: "chilonzor") yoki None
	"""
	try:
	if not user_text:
	return None

	# Matnni normalizatsiya qilish
	normalized_input = normalize_text(user_text)
	logger.info(f"🏙️ Tuman qidirilmoqda: '{user_text}' → '{normalized_input}'")

	if len(normalized_input) < 3:
	logger.warning("⚠️ Matn juda qisqa")
	return None

	# Eng yaxshi moslikni topish
	best_match = None
	best_score = 0.0

	for district_id, variants in DISTRICT_VARIANTS.items():
	for variant in variants:
	# 1. To'liq fuzzy match
	score = similarity_score(normalized_input, variant)

	# 2. Substring match (bonus)
	if normalized_input in variant or variant in normalized_input:
	score = max(score, 0.85)

	# 3. So'z boshi match (bonus)
	if variant.startswith(normalized_input[:4]) or normalized_input.startswith(variant[:4]):
	score = max(score, 0.75)

	if score > best_score:
	best_score = score
	best_match = district_id

	# Threshold tekshirish
	if best_score >= threshold:
	logger.info(f"✅ Tuman topildi: '{best_match}' (score: {best_score:.2f})")
	return best_match
	else:
	logger.warning(f"⚠️ Tuman topilmadi (best score: {best_score:.2f} < {threshold})")
	return None

	except Exception as e:
	logger.error(f"❌ District matching xatoligi: {e}")
	return None


	def get_district_display_name(district_id: str) -> str:
	"""
	District ID'dan to'liq nom olish

	Args:
	district_id: "chilonzor"

	Returns:
	"Chilonzor tumani"
	"""
	district_names = {
	"chilonzor": "Chilonzor tumani",
	"yunusobod": "Yunusobod tumani",
	"mirzo_ulugbek": "Mirzo Ulug'bek tumani",
	"shayxontohur": "Shayxontohur tumani",
	"yakkasaroy": "Yakkasaroy tumani",
	"mirobod": "Mirobod tumani",
	"yashnobod": "Yashnobod tumani",
	"sergeli": "Sergeli tumani",
	"bektemir": "Bektemir tumani",
	"uchtepa": "Uchtepa tumani",
	"olmazor": "Olmazor tumani",
	"yangihayot": "Yangihayot tumani"
	}

	return district_names.get(district_id, district_id)


	def list_all_districts_text() -> str:
	"""
	Barcha tumanlar nomini matn sifatida qaytarish (AI uchun)

	Returns:
	"Chilonzor, Yunusobod, Mirzo Ulug'bek, ..."
	"""
	districts = [
	"Chilonzor", "Yunusobod", "Mirzo Ulug'bek", "Shayxontohur",
	"Yakkasaroy", "Mirobod", "Yashnobod", "Sergeli",
	"Bektemir", "Uchtepa", "Olmazor", "Yangihayot"
	]

	return ", ".join(districts)