Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import gradio as gr | |
| df_clean = pd.read_csv("df_clean.csv") | |
| df = df_clean[['custom_title', 'listing_price_formatted_amount', 'location_reverse_geocode_city']] | |
| # Clean price column | |
| df['listing_price_formatted_amount'] = pd.to_numeric(df['listing_price_formatted_amount'], errors='coerce') | |
| tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/AraT5-base") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("UBC-NLP/AraT5-base") | |
| darija_map = { | |
| "bghit": "أريد", "dar": "منزل", "f": "في", "drahem": "مال", "kra": "إيجار", | |
| "souma": "سعر", "ta7t": "أقل من", "taht": "أقل من", "ghalya": "غالية", "rkhisa": "رخيصة", | |
| "wahran": "وهران", "oran": "وهران", "alger": "الجزائر", "dzayer": "الجزائر", | |
| "maison": "منزل", "appartement": "شقة", "appartmn": "شقة", "habit": "سكن", | |
| "kayn": "يوجد", "chouf": "شاهد", "dir": "قم", "loué": "إيجار", | |
| "men": "من", "fi": "في", "cha9a": "شقة", "bda": "يبدأ", "3andi": "عندي", | |
| "nheb": "أريد", "kima": "مثل", "tsoum": "تكلفة", "chwiya": "قليل", "bzaaf": "كثير" | |
| } | |
| def translate_darija_to_standard(text): | |
| words = text.lower().split() | |
| translated = [darija_map.get(word.strip(), word) for word in words] | |
| return " ".join(translated) | |
| def search_properties(query): | |
| df_filtered = df.copy() | |
| # City filters | |
| if "وهران" in query: | |
| df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("وهران", na=False)] | |
| if "الجزائر" in query: | |
| df_filtered = df_filtered[df_filtered['location_reverse_geocode_city'].str.contains("الجزائر", na=False)] | |
| # Price filtering | |
| if "أقل من 10000" in query: | |
| df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 10000] | |
| elif "أقل من 15000" in query: | |
| df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 15000] | |
| elif "أقل من 20000" in query: | |
| df_filtered = df_filtered[df_filtered['listing_price_formatted_amount'] < 20000] | |
| return df_filtered.head(3).to_dict("records") | |
| def generate_response(user_input): | |
| std_query = translate_darija_to_standard(user_input) | |
| results = search_properties(std_query) | |
| if len(results) > 0: | |
| response = f"✅ Kayn {len(results)} 3roud li ykhdmouk:\n\n" | |
| for i, prop in enumerate(results, 1): | |
| title = prop['custom_title'] | |
| price = int(prop['listing_price_formatted_amount']) | |
| city = prop['location_reverse_geocode_city'] | |
| response += f"{i}. 🏠 {title}\n 💰 {price} دج\n 📍 {city}\n " | |
| response += "📩 T7eb nwarik kter?" | |
| else: | |
| response = "🫤 Ma l9it walou f had condition... Jرب بكلمات خرا ولا soum akther." | |
| return response | |
| gr.Interface( | |
| fn=generate_response, | |
| inputs="text", | |
| outputs="text", | |
| title="🤖 Bot Dziria pour l’immobilier", | |
| description="سقسي على الكرا بدارجة، و خوك هنا يجاوبك 😄", | |
| theme="default" | |
| ).launch() | |