Spaces:

giyos1212
/

Help_Me_3

Paused

File size: 6,521 Bytes

98b6d67

# app/utils/district_matcher.py
"""

District Matcher - Noto'g'ri yozilgan tuman nomlarini topish

Fuzzy matching ishlatiladi

"""

import logging
from typing import Optional
from difflib import SequenceMatcher

logger = logging.getLogger(__name__)

# Toshkent tumanlari (barcha variantlar bilan)
# DISTRICT_VARIANTS - KO'PROQ VARIANTLAR BILAN
DISTRICT_VARIANTS = {
    "chilonzor": [
        "chilonzor", "chilanazor", "chillonzor", "chilanzor", "chilinzor", 
        "chilanzar", "chilinzar", "chilonzar", "chilanzur"
    ],
    "yunusobod": [
        "yunusobod", "yunusabad", "yunusabod", "yunusobod", "iunusobod",
        "yunus obod", "yunus abad", "yunusabat", "iunusabad"
    ],
    "mirzo_ulugbek": [
        "mirzo ulugbek", "mirzo ulug'bek", "mirzo ulugʻbek", "mirza ulugbek", 
        "ttg", "mirzo ulug bek", "mirza ulug'bek", "ulugbek", "ulug'bek"
    ],
    "shayxontohur": [
        "shayxontohur", "shayxontoxur", "shayhontohur", "shayxantoxur", 
        "sayxontohur", "sheyhontoxur", "shayxon tohur", "shayxon toxur",
        "shayx tohur", "shayx toxur"
    ],
    "yakkasaroy": [
        "yakkasaroy", "yakkasaray", "yakasaroy", "yakkosaroy", "iakkasaroy",
        "yakka saroy", "yakka saray", "yakkasarai"
    ],
    "mirobod": [
        "mirobod", "mirabod", "mirobad", "mirabod", "mirobad",
        "mir obod", "mir abad", "mirabat"
    ],
    "yashnobod": [
        "yashnobod", "yashnabad", "yeshnobod", "yashnabod", "yashnobad",
        "yash nobod", "yash nabad", "yashnabat"
    ],
    "sergeli": [
        "sergeli", "sergili", "sirgeli", "sergeley", "sirgili",
        "sergel", "sergil"
    ],
    "bektemir": [
        "bektemir", "bektemar", "bektimir", "bektamir", "bektemur",
        "bek temir", "bek tamir", "bektamur"
    ],
    "uchtepa": [
        "uchtepa", "uchtepe", "uchtepa", "uchtipi", "uchtepo",
        "uch tepa", "uch tepe", "uchtipa"
    ],
    "olmazor": [
        "olmazor", "olmazor", "almazor", "olmozor", "almazor",
        "olma zor", "alma zor", "olmazar"
    ],
    "yangihayot": [
        "yangihayot", "yangihayat", "yangi hayot", "yangixayot", "yangihoyot",
        "yangi xayot", "yangi hayat", "yangihayat"
    ]
}


def normalize_text(text: str) -> str:
    """

    Matnni normalizatsiya qilish (kichik harf, probel olib tashlash)

    

    Args:

        text: Asl matn

        

    Returns:

        Normalized matn

    """
    if not text:
        return ""
    
    # Kichik harf
    text = text.lower().strip()
    
    # Ko'p probellarni bitta probelga
    text = " ".join(text.split())
    
    # "tumani" so'zini olib tashlash
    text = text.replace(" tumani", "").replace(" tuman", "")
    
    return text


def similarity_score(str1: str, str2: str) -> float:
    """

    Ikki string orasidagi o'xshashlik (0.0 - 1.0)

    

    Args:

        str1: Birinchi string

        str2: Ikkinchi string

        

    Returns:

        Similarity score (1.0 = 100% o'xshash)

    """
    return SequenceMatcher(None, str1, str2).ratio()


def find_district_fuzzy(user_text: str, threshold: float = 0.5) -> Optional[str]:
    """

    Noto'g'ri yozilgan tuman nomini topish (YAXSHILANGAN FUZZY MATCHING)

    

    Args:

        user_text: Bemorning kiritgan matni (masalan: "chillonzor" yoki "yunusabad")

        threshold: Minimal o'xshashlik darajasi (0.5 = 50%) ← PASTROQ!

        

    Returns:

        District ID (masalan: "chilonzor") yoki None

    """
    try:
        if not user_text:
            return None
        
        # Matnni normalizatsiya qilish
        normalized_input = normalize_text(user_text)
        logger.info(f"🏙️ Tuman qidirilmoqda: '{user_text}' → '{normalized_input}'")
        
        if len(normalized_input) < 3:
            logger.warning("⚠️ Matn juda qisqa")
            return None
        
        # Eng yaxshi moslikni topish
        best_match = None
        best_score = 0.0
        
        for district_id, variants in DISTRICT_VARIANTS.items():
            for variant in variants:
                # 1. To'liq fuzzy match
                score = similarity_score(normalized_input, variant)
                
                # 2. Substring match (bonus)
                if normalized_input in variant or variant in normalized_input:
                    score = max(score, 0.85)
                
                # 3. So'z boshi match (bonus)
                if variant.startswith(normalized_input[:4]) or normalized_input.startswith(variant[:4]):
                    score = max(score, 0.75)
                
                if score > best_score:
                    best_score = score
                    best_match = district_id
        
        # Threshold tekshirish
        if best_score >= threshold:
            logger.info(f"✅ Tuman topildi: '{best_match}' (score: {best_score:.2f})")
            return best_match
        else:
            logger.warning(f"⚠️ Tuman topilmadi (best score: {best_score:.2f} < {threshold})")
            return None
            
    except Exception as e:
        logger.error(f"❌ District matching xatoligi: {e}")
        return None


def get_district_display_name(district_id: str) -> str:
    """

    District ID'dan to'liq nom olish

    

    Args:

        district_id: "chilonzor"

        

    Returns:

        "Chilonzor tumani"

    """
    district_names = {
        "chilonzor": "Chilonzor tumani",
        "yunusobod": "Yunusobod tumani",
        "mirzo_ulugbek": "Mirzo Ulug'bek tumani",
        "shayxontohur": "Shayxontohur tumani",
        "yakkasaroy": "Yakkasaroy tumani",
        "mirobod": "Mirobod tumani",
        "yashnobod": "Yashnobod tumani",
        "sergeli": "Sergeli tumani",
        "bektemir": "Bektemir tumani",
        "uchtepa": "Uchtepa tumani",
        "olmazor": "Olmazor tumani",
        "yangihayot": "Yangihayot tumani"
    }
    
    return district_names.get(district_id, district_id)


def list_all_districts_text() -> str:
    """

    Barcha tumanlar nomini matn sifatida qaytarish (AI uchun)

    

    Returns:

        "Chilonzor, Yunusobod, Mirzo Ulug'bek, ..."

    """
    districts = [
        "Chilonzor", "Yunusobod", "Mirzo Ulug'bek", "Shayxontohur",
        "Yakkasaroy", "Mirobod", "Yashnobod", "Sergeli",
        "Bektemir", "Uchtepa", "Olmazor", "Yangihayot"
    ]
    
    return ", ".join(districts)