File size: 6,521 Bytes
98b6d67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# app/utils/district_matcher.py
"""

District Matcher - Noto'g'ri yozilgan tuman nomlarini topish

Fuzzy matching ishlatiladi

"""

import logging
from typing import Optional
from difflib import SequenceMatcher

logger = logging.getLogger(__name__)

# Toshkent tumanlari (barcha variantlar bilan)
# DISTRICT_VARIANTS - KO'PROQ VARIANTLAR BILAN
DISTRICT_VARIANTS = {
    "chilonzor": [
        "chilonzor", "chilanazor", "chillonzor", "chilanzor", "chilinzor", 
        "chilanzar", "chilinzar", "chilonzar", "chilanzur"
    ],
    "yunusobod": [
        "yunusobod", "yunusabad", "yunusabod", "yunusobod", "iunusobod",
        "yunus obod", "yunus abad", "yunusabat", "iunusabad"
    ],
    "mirzo_ulugbek": [
        "mirzo ulugbek", "mirzo ulug'bek", "mirzo ulugʻbek", "mirza ulugbek", 
        "ttg", "mirzo ulug bek", "mirza ulug'bek", "ulugbek", "ulug'bek"
    ],
    "shayxontohur": [
        "shayxontohur", "shayxontoxur", "shayhontohur", "shayxantoxur", 
        "sayxontohur", "sheyhontoxur", "shayxon tohur", "shayxon toxur",
        "shayx tohur", "shayx toxur"
    ],
    "yakkasaroy": [
        "yakkasaroy", "yakkasaray", "yakasaroy", "yakkosaroy", "iakkasaroy",
        "yakka saroy", "yakka saray", "yakkasarai"
    ],
    "mirobod": [
        "mirobod", "mirabod", "mirobad", "mirabod", "mirobad",
        "mir obod", "mir abad", "mirabat"
    ],
    "yashnobod": [
        "yashnobod", "yashnabad", "yeshnobod", "yashnabod", "yashnobad",
        "yash nobod", "yash nabad", "yashnabat"
    ],
    "sergeli": [
        "sergeli", "sergili", "sirgeli", "sergeley", "sirgili",
        "sergel", "sergil"
    ],
    "bektemir": [
        "bektemir", "bektemar", "bektimir", "bektamir", "bektemur",
        "bek temir", "bek tamir", "bektamur"
    ],
    "uchtepa": [
        "uchtepa", "uchtepe", "uchtepa", "uchtipi", "uchtepo",
        "uch tepa", "uch tepe", "uchtipa"
    ],
    "olmazor": [
        "olmazor", "olmazor", "almazor", "olmozor", "almazor",
        "olma zor", "alma zor", "olmazar"
    ],
    "yangihayot": [
        "yangihayot", "yangihayat", "yangi hayot", "yangixayot", "yangihoyot",
        "yangi xayot", "yangi hayat", "yangihayat"
    ]
}


def normalize_text(text: str) -> str:
    """

    Matnni normalizatsiya qilish (kichik harf, probel olib tashlash)

    

    Args:

        text: Asl matn

        

    Returns:

        Normalized matn

    """
    if not text:
        return ""
    
    # Kichik harf
    text = text.lower().strip()
    
    # Ko'p probellarni bitta probelga
    text = " ".join(text.split())
    
    # "tumani" so'zini olib tashlash
    text = text.replace(" tumani", "").replace(" tuman", "")
    
    return text


def similarity_score(str1: str, str2: str) -> float:
    """

    Ikki string orasidagi o'xshashlik (0.0 - 1.0)

    

    Args:

        str1: Birinchi string

        str2: Ikkinchi string

        

    Returns:

        Similarity score (1.0 = 100% o'xshash)

    """
    return SequenceMatcher(None, str1, str2).ratio()


def find_district_fuzzy(user_text: str, threshold: float = 0.5) -> Optional[str]:
    """

    Noto'g'ri yozilgan tuman nomini topish (YAXSHILANGAN FUZZY MATCHING)

    

    Args:

        user_text: Bemorning kiritgan matni (masalan: "chillonzor" yoki "yunusabad")

        threshold: Minimal o'xshashlik darajasi (0.5 = 50%) ← PASTROQ!

        

    Returns:

        District ID (masalan: "chilonzor") yoki None

    """
    try:
        if not user_text:
            return None
        
        # Matnni normalizatsiya qilish
        normalized_input = normalize_text(user_text)
        logger.info(f"🏙️ Tuman qidirilmoqda: '{user_text}' → '{normalized_input}'")
        
        if len(normalized_input) < 3:
            logger.warning("⚠️ Matn juda qisqa")
            return None
        
        # Eng yaxshi moslikni topish
        best_match = None
        best_score = 0.0
        
        for district_id, variants in DISTRICT_VARIANTS.items():
            for variant in variants:
                # 1. To'liq fuzzy match
                score = similarity_score(normalized_input, variant)
                
                # 2. Substring match (bonus)
                if normalized_input in variant or variant in normalized_input:
                    score = max(score, 0.85)
                
                # 3. So'z boshi match (bonus)
                if variant.startswith(normalized_input[:4]) or normalized_input.startswith(variant[:4]):
                    score = max(score, 0.75)
                
                if score > best_score:
                    best_score = score
                    best_match = district_id
        
        # Threshold tekshirish
        if best_score >= threshold:
            logger.info(f"✅ Tuman topildi: '{best_match}' (score: {best_score:.2f})")
            return best_match
        else:
            logger.warning(f"⚠️ Tuman topilmadi (best score: {best_score:.2f} < {threshold})")
            return None
            
    except Exception as e:
        logger.error(f"❌ District matching xatoligi: {e}")
        return None


def get_district_display_name(district_id: str) -> str:
    """

    District ID'dan to'liq nom olish

    

    Args:

        district_id: "chilonzor"

        

    Returns:

        "Chilonzor tumani"

    """
    district_names = {
        "chilonzor": "Chilonzor tumani",
        "yunusobod": "Yunusobod tumani",
        "mirzo_ulugbek": "Mirzo Ulug'bek tumani",
        "shayxontohur": "Shayxontohur tumani",
        "yakkasaroy": "Yakkasaroy tumani",
        "mirobod": "Mirobod tumani",
        "yashnobod": "Yashnobod tumani",
        "sergeli": "Sergeli tumani",
        "bektemir": "Bektemir tumani",
        "uchtepa": "Uchtepa tumani",
        "olmazor": "Olmazor tumani",
        "yangihayot": "Yangihayot tumani"
    }
    
    return district_names.get(district_id, district_id)


def list_all_districts_text() -> str:
    """

    Barcha tumanlar nomini matn sifatida qaytarish (AI uchun)

    

    Returns:

        "Chilonzor, Yunusobod, Mirzo Ulug'bek, ..."

    """
    districts = [
        "Chilonzor", "Yunusobod", "Mirzo Ulug'bek", "Shayxontohur",
        "Yakkasaroy", "Mirobod", "Yashnobod", "Sergeli",
        "Bektemir", "Uchtepa", "Olmazor", "Yangihayot"
    ]
    
    return ", ".join(districts)