dziri_chatbot / app.py
zakariadjafri's picture
Update app.py
ad769eb verified
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
df_clean = pd.read_csv("df_clean.csv")
df = df_clean[['custom_title', 'listing_price_formatted_amount', 'location_reverse_geocode_city']]
# Clean price column
df['listing_price_formatted_amount'] = pd.to_numeric(df['listing_price_formatted_amount'], errors='coerce')
tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/AraT5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("UBC-NLP/AraT5-base")
darija_map = {
"bghit": "أريد", "dar": "منزل", "f": "في", "drahem": "مال", "kra": "إيجار",
"souma": "سعر", "ta7t": "أقل من", "taht": "أقل من", "ghalya": "غالية", "rkhisa": "رخيصة",
"wahran": "وهران", "oran": "وهران", "alger": "الجزائر", "dzayer": "الجزائر",
"maison": "منزل", "appartement": "شقة", "appartmn": "شقة", "habit": "سكن",
"kayn": "يوجد", "chouf": "شاهد", "dir": "قم", "loué": "إيجار",
"men": "من", "fi": "في", "cha9a": "شقة", "bda": "يبدأ", "3andi": "عندي",
"nheb": "أريد", "kima": "مثل", "tsoum": "تكلفة", "chwiya": "قليل", "bzaaf": "كثير"
}
def translate_darija_to_standard(text):
words = text.lower().split()
translated = [darija_map.get(word.strip(), word) for word in words]
return " ".join(translated)
def search_properties(query):
df_filtered = df.copy()
# City filters
if "وهران" in query:
df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("وهران", na=False)]
if "الجزائر" in query:
df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("الجزائر", na=False)]
# Price filtering
if "أقل من 10000" in query:
df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 10000]
elif "أقل من 15000" in query:
df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 15000]
elif "أقل من 20000" in query:
df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 20000]
return df_filtered.head(3).to_dict("records")
def generate_response(user_input):
std_query = translate_darija_to_standard(user_input)
results = search_properties(std_query)
if len(results) > 0:
response = f"✅ Kayn {len(results)} 3roud li ykhdmouk:\n\n"
for i, prop in enumerate(results, 1):
title = prop['custom_title']
price = int(prop['listing_price_formatted_amount'])
city = prop['location_reverse_geocode_city']
response += f"{i}. 🏠 {title}\n 💰 {price} دج\n 📍 {city}\n "
response += "📩 T7eb nwarik kter?"
else:
response = "🫤 Ma l9it walou f had condition... Jرب بكلمات خرا ولا soum akther."
return response
gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="🤖 Bot Dziria pour l’immobilier",
description="سقسي على الكرا بدارجة، و خوك هنا يجاوبك 😄",
theme="default"
).launch()