File size: 3,213 Bytes
bf0cb78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad769eb
bf0cb78
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr

df_clean = pd.read_csv("df_clean.csv")


df = df_clean[['custom_title', 'listing_price_formatted_amount', 'location_reverse_geocode_city']]

# Clean price column
df['listing_price_formatted_amount'] = pd.to_numeric(df['listing_price_formatted_amount'], errors='coerce')


tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/AraT5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("UBC-NLP/AraT5-base")


darija_map = {
    "bghit": "أريد", "dar": "منزل", "f": "في", "drahem": "مال", "kra": "إيجار",
    "souma": "سعر", "ta7t": "أقل من", "taht": "أقل من", "ghalya": "غالية", "rkhisa": "رخيصة",
    "wahran": "وهران", "oran": "وهران", "alger": "الجزائر", "dzayer": "الجزائر",
    "maison": "منزل", "appartement": "شقة", "appartmn": "شقة", "habit": "سكن",
    "kayn": "يوجد", "chouf": "شاهد", "dir": "قم", "loué": "إيجار",
    "men": "من", "fi": "في", "cha9a": "شقة", "bda": "يبدأ", "3andi": "عندي",
    "nheb": "أريد", "kima": "مثل", "tsoum": "تكلفة", "chwiya": "قليل", "bzaaf": "كثير"
}

def translate_darija_to_standard(text):
    words = text.lower().split()
    translated = [darija_map.get(word.strip(), word) for word in words]
    return " ".join(translated)

def search_properties(query):
    df_filtered = df.copy()

    # City filters
    if "وهران" in query:
        df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("وهران", na=False)]
    if "الجزائر" in query:
        df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("الجزائر", na=False)]

    # Price filtering
    if "أقل من 10000" in query:
        df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 10000]
    elif "أقل من 15000" in query:
        df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 15000]
    elif "أقل من 20000" in query:
        df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 20000]

    return df_filtered.head(3).to_dict("records")


def generate_response(user_input):
    std_query = translate_darija_to_standard(user_input)
    results = search_properties(std_query)

    if len(results) > 0:
        response = f"✅ Kayn {len(results)} 3roud li ykhdmouk:\n\n"
        for i, prop in enumerate(results, 1):
            title = prop['custom_title']
            price = int(prop['listing_price_formatted_amount'])
            city = prop['location_reverse_geocode_city']
            
            response += f"{i}. 🏠 {title}\n   💰 {price} دج\n   📍 {city}\n   "
        response += "📩 T7eb nwarik kter?"
    else:
        response = "🫤 Ma l9it walou f had condition... Jرب بكلمات خرا ولا soum akther."

    return response

gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="🤖 Bot Dziria pour l’immobilier",
    description="سقسي على الكرا بدارجة، و خوك هنا يجاوبك 😄",
    theme="default"
).launch()