Help_Me_3 / app /utils /district_matcher.py
giyos1212's picture
Upload 72 files
98b6d67 verified
# app/utils/district_matcher.py
"""
District Matcher - Noto'g'ri yozilgan tuman nomlarini topish
Fuzzy matching ishlatiladi
"""
import logging
from typing import Optional
from difflib import SequenceMatcher
logger = logging.getLogger(__name__)
# Toshkent tumanlari (barcha variantlar bilan)
# DISTRICT_VARIANTS - KO'PROQ VARIANTLAR BILAN
DISTRICT_VARIANTS = {
"chilonzor": [
"chilonzor", "chilanazor", "chillonzor", "chilanzor", "chilinzor",
"chilanzar", "chilinzar", "chilonzar", "chilanzur"
],
"yunusobod": [
"yunusobod", "yunusabad", "yunusabod", "yunusobod", "iunusobod",
"yunus obod", "yunus abad", "yunusabat", "iunusabad"
],
"mirzo_ulugbek": [
"mirzo ulugbek", "mirzo ulug'bek", "mirzo ulugʻbek", "mirza ulugbek",
"ttg", "mirzo ulug bek", "mirza ulug'bek", "ulugbek", "ulug'bek"
],
"shayxontohur": [
"shayxontohur", "shayxontoxur", "shayhontohur", "shayxantoxur",
"sayxontohur", "sheyhontoxur", "shayxon tohur", "shayxon toxur",
"shayx tohur", "shayx toxur"
],
"yakkasaroy": [
"yakkasaroy", "yakkasaray", "yakasaroy", "yakkosaroy", "iakkasaroy",
"yakka saroy", "yakka saray", "yakkasarai"
],
"mirobod": [
"mirobod", "mirabod", "mirobad", "mirabod", "mirobad",
"mir obod", "mir abad", "mirabat"
],
"yashnobod": [
"yashnobod", "yashnabad", "yeshnobod", "yashnabod", "yashnobad",
"yash nobod", "yash nabad", "yashnabat"
],
"sergeli": [
"sergeli", "sergili", "sirgeli", "sergeley", "sirgili",
"sergel", "sergil"
],
"bektemir": [
"bektemir", "bektemar", "bektimir", "bektamir", "bektemur",
"bek temir", "bek tamir", "bektamur"
],
"uchtepa": [
"uchtepa", "uchtepe", "uchtepa", "uchtipi", "uchtepo",
"uch tepa", "uch tepe", "uchtipa"
],
"olmazor": [
"olmazor", "olmazor", "almazor", "olmozor", "almazor",
"olma zor", "alma zor", "olmazar"
],
"yangihayot": [
"yangihayot", "yangihayat", "yangi hayot", "yangixayot", "yangihoyot",
"yangi xayot", "yangi hayat", "yangihayat"
]
}
def normalize_text(text: str) -> str:
"""
Matnni normalizatsiya qilish (kichik harf, probel olib tashlash)
Args:
text: Asl matn
Returns:
Normalized matn
"""
if not text:
return ""
# Kichik harf
text = text.lower().strip()
# Ko'p probellarni bitta probelga
text = " ".join(text.split())
# "tumani" so'zini olib tashlash
text = text.replace(" tumani", "").replace(" tuman", "")
return text
def similarity_score(str1: str, str2: str) -> float:
"""
Ikki string orasidagi o'xshashlik (0.0 - 1.0)
Args:
str1: Birinchi string
str2: Ikkinchi string
Returns:
Similarity score (1.0 = 100% o'xshash)
"""
return SequenceMatcher(None, str1, str2).ratio()
def find_district_fuzzy(user_text: str, threshold: float = 0.5) -> Optional[str]:
"""
Noto'g'ri yozilgan tuman nomini topish (YAXSHILANGAN FUZZY MATCHING)
Args:
user_text: Bemorning kiritgan matni (masalan: "chillonzor" yoki "yunusabad")
threshold: Minimal o'xshashlik darajasi (0.5 = 50%) ← PASTROQ!
Returns:
District ID (masalan: "chilonzor") yoki None
"""
try:
if not user_text:
return None
# Matnni normalizatsiya qilish
normalized_input = normalize_text(user_text)
logger.info(f"🏙️ Tuman qidirilmoqda: '{user_text}' → '{normalized_input}'")
if len(normalized_input) < 3:
logger.warning("⚠️ Matn juda qisqa")
return None
# Eng yaxshi moslikni topish
best_match = None
best_score = 0.0
for district_id, variants in DISTRICT_VARIANTS.items():
for variant in variants:
# 1. To'liq fuzzy match
score = similarity_score(normalized_input, variant)
# 2. Substring match (bonus)
if normalized_input in variant or variant in normalized_input:
score = max(score, 0.85)
# 3. So'z boshi match (bonus)
if variant.startswith(normalized_input[:4]) or normalized_input.startswith(variant[:4]):
score = max(score, 0.75)
if score > best_score:
best_score = score
best_match = district_id
# Threshold tekshirish
if best_score >= threshold:
logger.info(f"✅ Tuman topildi: '{best_match}' (score: {best_score:.2f})")
return best_match
else:
logger.warning(f"⚠️ Tuman topilmadi (best score: {best_score:.2f} < {threshold})")
return None
except Exception as e:
logger.error(f"❌ District matching xatoligi: {e}")
return None
def get_district_display_name(district_id: str) -> str:
"""
District ID'dan to'liq nom olish
Args:
district_id: "chilonzor"
Returns:
"Chilonzor tumani"
"""
district_names = {
"chilonzor": "Chilonzor tumani",
"yunusobod": "Yunusobod tumani",
"mirzo_ulugbek": "Mirzo Ulug'bek tumani",
"shayxontohur": "Shayxontohur tumani",
"yakkasaroy": "Yakkasaroy tumani",
"mirobod": "Mirobod tumani",
"yashnobod": "Yashnobod tumani",
"sergeli": "Sergeli tumani",
"bektemir": "Bektemir tumani",
"uchtepa": "Uchtepa tumani",
"olmazor": "Olmazor tumani",
"yangihayot": "Yangihayot tumani"
}
return district_names.get(district_id, district_id)
def list_all_districts_text() -> str:
"""
Barcha tumanlar nomini matn sifatida qaytarish (AI uchun)
Returns:
"Chilonzor, Yunusobod, Mirzo Ulug'bek, ..."
"""
districts = [
"Chilonzor", "Yunusobod", "Mirzo Ulug'bek", "Shayxontohur",
"Yakkasaroy", "Mirobod", "Yashnobod", "Sergeli",
"Bektemir", "Uchtepa", "Olmazor", "Yangihayot"
]
return ", ".join(districts)