Spaces:

Lesterchia174
/

HDB_Price_Predictor

Sleeping

File size: 14,573 Bytes

import gradio as gr
import pandas as pd
import numpy as np
import joblib
import plotly.graph_objects as go
import plotly.express as px
from huggingface_hub import hf_hub_download
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Try to import xgboost, but fallback to scikit-learn
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("✅ XGBoost is available")
except ImportError:
    XGB_AVAILABLE = False
    print("⚠️  XGBoost not available, using scikit-learn models")
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import LinearRegression

def create_dummy_model(model_type):
    """Create a realistic dummy model that has all required methods"""
    class RealisticDummyModel:
        def __init__(self, model_type):
            self.model_type = model_type
            self.n_features_in_ = 9
            self.feature_names_in_ = [
                'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
                'transaction_year', 'flat_type_encoded', 'town_encoded',
                'flat_model_encoded', 'dummy_feature'
            ]
            # Add methods that might be called by joblib or other code
            self.get_params = lambda deep=True: {}
            self.set_params = lambda **params: self
        
        def predict(self, X):
            # Realistic prediction logic
            if isinstance(X, np.ndarray) and len(X.shape) == 2:
                X = X[0]  # Take first row if it's a 2D array
            
            floor_area = X[0]
            storey_level = X[1]
            flat_age = X[2]
            town_encoded = X[6]
            flat_type_encoded = X[5]
            
            base_price = floor_area * (4800 + town_encoded * 200)
            storey_bonus = storey_level * 2500
            age_discount = flat_age * 1800
            
            if self.model_type == "xgboost":
                price = base_price + storey_bonus - age_discount + 35000
                if storey_level > 20: price += 15000
                if flat_age < 10: price += 20000
            else:
                price = base_price + storey_bonus - age_discount - 25000
            
            return np.array([max(300000, price)])

    return RealisticDummyModel(model_type)()

def safe_joblib_load(filepath):
    """Safely load joblib file with error handling"""
    try:
        model = joblib.load(filepath)
        print(f"✅ Successfully loaded model from {filepath}")
        
        # Check if model has required methods
        if not hasattr(model, 'predict'):
            print("❌ Loaded object doesn't have predict method")
            return None
            
        # Add missing methods if needed
        if not hasattr(model, 'get_params'):
            model.get_params = lambda deep=True: {}
        if not hasattr(model, 'set_params'):
            model.set_params = lambda **params: model
            
        return model
        
    except Exception as e:
        print(f"❌ Error loading model from {filepath}: {e}")
        return None

def load_models():
    """Load models with robust error handling"""
    models = {}
    
    # Try to load XGBoost model
    try:
        xgboost_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="best_model_xgboost.joblib",
            repo_type="space"
        )
        models['xgboost'] = safe_joblib_load(xgboost_path)
        if models['xgboost'] is None:
            print("⚠️  Creating dummy model for XGBoost")
            models['xgboost'] = create_dummy_model("xgboost")
        else:
            print("✅ XGBoost model loaded and validated")
            
    except Exception as e:
        print(f"❌ Error downloading XGBoost model: {e}")
        print("⚠️  Creating dummy model for XGBoost")
        models['xgboost'] = create_dummy_model("xgboost")
    
    # Try to load Linear Regression model
    try:
        linear_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="linear_regression.joblib",
            repo_type="space"
        )
        models['linear_regression'] = safe_joblib_load(linear_path)
        if models['linear_regression'] is None:
            print("⚠️  Creating dummy model for Linear Regression")
            models['linear_regression'] = create_dummy_model("linear_regression")
        else:
            print("✅ Linear Regression model loaded and validated")
            
    except Exception as e:
        print(f"❌ Error downloading Linear Regression model: {e}")
        print("⚠️  Creating dummy model for Linear Regression")
        models['linear_regression'] = create_dummy_model("linear_regression")
    
    return models

def load_data():
    """Load data using Hugging Face Hub"""
    try:
        data_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv",
            repo_type="space"
        )
        df = pd.read_csv(data_path)
        print("✅ Data loaded successfully via Hugging Face Hub")
        return df
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return create_sample_data()

def create_sample_data():
    """Create sample data if real data isn't available"""
    np.random.seed(42)
    towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
    flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
    flat_models = ['Improved', 'Model A', 'New Generation']
    
    data = []
    for _ in range(100):
        town = np.random.choice(towns)
        flat_type = np.random.choice(flat_types)
        flat_model = np.random.choice(flat_models)
        floor_area = np.random.randint(85, 150)
        storey = np.random.randint(1, 25)
        age = np.random.randint(0, 40)
        
        base_price = floor_area * 5000
        town_bonus = towns.index(town) * 20000
        storey_bonus = storey * 2000
        age_discount = age * 1500
        flat_type_bonus = flat_types.index(flat_type) * 30000
        
        resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
        resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
        
        data.append({
            'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
            'floor_area_sqm': floor_area, 'storey_level': storey,
            'flat_age': age, 'resale_price': resale_price
        })
    
    return pd.DataFrame(data)

def preprocess_input(user_input, model_type='xgboost'):
    """Preprocess user input for prediction with correct feature mapping"""
    # Flat type mapping
    flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4,
                         '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7}

    # Town mapping
    town_mapping = {
        'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3,
        'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7,
        'CHOA CHU KANG': 8, 'ANG MO KIO': 9
    }

    # Flat model mapping
    flat_model_mapping = {
        'Model A': 0, 'Improved': 1, 'New Generation': 2,
        'Standard': 3, 'Premium': 4
    }

    # Create input array with features
    input_features = [
        user_input['floor_area_sqm'],           # Feature 1
        user_input['storey_level'],             # Feature 2
        user_input['flat_age'],                 # Feature 3
        99 - user_input['flat_age'],            # Feature 4: remaining_lease
        2025,                                   # Feature 5: transaction_year
        flat_type_mapping.get(user_input['flat_type'], 4),  # Feature 6: flat_type_ordinal
        town_mapping.get(user_input['town'], 0),           # Feature 7: town_encoded
        flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded
        1                                       # Feature 9: (placeholder)
    ]

    return np.array([input_features])

def create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr):
    """Create market insights visualization with both model predictions"""
    if data is None or len(data) == 0:
        return None

    similar_properties = data[
        (data['flat_type'] == user_input['flat_type']) &
        (data['town'] == user_input['town'])
    ]

    if len(similar_properties) < 5:
        similar_properties = data[data['flat_type'] == user_input['flat_type']]

    if len(similar_properties) > 0:
        fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price',
                         color='flat_model',
                         title=f"Market Position: {user_input['flat_type']} in {user_input['town']}",
                         labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'})

        # Add both model predictions
        fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_xgb],
                                 mode='markers',
                                 marker=dict(symbol='star', size=20, color='red',
                                             line=dict(width=2, color='darkred')),
                                 name='XGBoost Prediction'))

        fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_lr],
                                 mode='markers',
                                 marker=dict(symbol='diamond', size=20, color='blue',
                                             line=dict(width=2, color='darkblue')),
                                 name='Linear Regression Prediction'))

        fig.update_layout(template="plotly_white", height=400, showlegend=True)
        return fig
    return None

def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice):
    """Main prediction function for Gradio with robust error handling"""
    user_input = {
        'town': town,
        'flat_type': flat_type,
        'flat_model': flat_model,
        'floor_area_sqm': floor_area_sqm,
        'storey_level': storey_level,
        'flat_age': flat_age
    }

    try:
        processed_input = preprocess_input(user_input)
        
        # Get predictions from both models with error handling
        try:
            predicted_price_xgb = max(0, float(models['xgboost'].predict(processed_input)[0]))
        except Exception as e:
            print(f"❌ XGBoost prediction error: {e}")
            predicted_price_xgb = 400000  # Fallback value
        
        try:
            predicted_price_lr = max(0, float(models['linear_regression'].predict(processed_input)[0]))
        except Exception as e:
            print(f"❌ Linear Regression prediction error: {e}")
            predicted_price_lr = 380000  # Fallback value
        
        # Use selected model's prediction
        if model_choice == "XGBoost":
            final_price = predicted_price_xgb
            model_name = "XGBoost"
        else:
            final_price = predicted_price_lr
            model_name = "Linear Regression"

        # Create insights
        remaining_lease = 99 - flat_age
        price_per_sqm = final_price / floor_area_sqm

        insights = f"""
        **Property Summary:**
        - Location: {town}
        - Type: {flat_type}
        - Model: {flat_model}
        - Area: {floor_area_sqm} sqm
        - Floor: Level {storey_level}
        - Age: {flat_age} years
        - Remaining Lease: {remaining_lease} years
        - Price per sqm: ${price_per_sqm:,.0f}

        **Model Predictions:**
        - XGBoost: ${predicted_price_xgb:,.0f}

        **Financing Eligibility:**
        """

        if remaining_lease >= 60:
            insights += "✅ Bank loan eligible"
        elif remaining_lease >= 20:
            insights += "⚠️ HDB loan eligible only"
        else:
            insights += "❌ Limited financing options"

        # Create chart with both predictions
        chart = create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr)

        return f"${final_price:,.0f}", chart, insights

    except Exception as e:
        error_msg = f"Prediction failed. Error: {str(e)}"
        print(error_msg)
        return "Error: Prediction failed", None, error_msg

# Preload models and data
print("Loading models and data...")
models = load_models()
data = load_data()

# Define Gradio interface
towns_list = [
    'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST',
    'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO'
]

flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM']
flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium']

# Create Gradio interface
with gr.Blocks(title="🏠 HDB Price Predictor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🏠 HDB Price Predictor")
    gr.Markdown("Predict HDB resale prices using different machine learning models")

    with gr.Row():
        with gr.Column():
            town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO")
            flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM")
            flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved")
            floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
            storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
            flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
            model_choice = gr.Radio(label="Select Model", 
                                   choices=["XGBoost"],  #,"Linear Regression" 
                                   value="XGBoost")

            predict_btn = gr.Button("🔮 Predict Price", variant="primary")

        with gr.Column():
            predicted_price = gr.Label(label="💰 Predicted Price")
            insights = gr.Markdown(label="📋 Property Summary")

    with gr.Row():
        chart_output = gr.Plot(label="📈 Market Insights")

    # Connect button to function
    predict_btn.click(
        fn=predict_hdb_price,
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice],
        outputs=[predicted_price, chart_output, insights]
    )

# To run in Colab
if __name__ == "__main__":
    demo.launch(share=True)