Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| from pathlib import Path | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Try to import xgboost, but fallback to scikit-learn | |
| try: | |
| import xgboost as xgb | |
| XGB_AVAILABLE = True | |
| print("โ XGBoost is available") | |
| except ImportError: | |
| XGB_AVAILABLE = False | |
| print("โ ๏ธ XGBoost not available, using scikit-learn models") | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.linear_model import LinearRegression | |
| def create_dummy_model(model_type): | |
| """Create a realistic dummy model that has all required methods""" | |
| class RealisticDummyModel: | |
| def __init__(self, model_type): | |
| self.model_type = model_type | |
| self.n_features_in_ = 9 | |
| self.feature_names_in_ = [ | |
| 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease', | |
| 'transaction_year', 'flat_type_encoded', 'town_encoded', | |
| 'flat_model_encoded', 'dummy_feature' | |
| ] | |
| # Add methods that might be called by joblib or other code | |
| self.get_params = lambda deep=True: {} | |
| self.set_params = lambda **params: self | |
| def predict(self, X): | |
| # Realistic prediction logic | |
| if isinstance(X, np.ndarray) and len(X.shape) == 2: | |
| X = X[0] # Take first row if it's a 2D array | |
| floor_area = X[0] | |
| storey_level = X[1] | |
| flat_age = X[2] | |
| town_encoded = X[6] | |
| flat_type_encoded = X[5] | |
| base_price = floor_area * (4800 + town_encoded * 200) | |
| storey_bonus = storey_level * 2500 | |
| age_discount = flat_age * 1800 | |
| if self.model_type == "xgboost": | |
| price = base_price + storey_bonus - age_discount + 35000 | |
| if storey_level > 20: price += 15000 | |
| if flat_age < 10: price += 20000 | |
| else: | |
| price = base_price + storey_bonus - age_discount - 25000 | |
| return np.array([max(300000, price)]) | |
| return RealisticDummyModel(model_type)() | |
| def safe_joblib_load(filepath): | |
| """Safely load joblib file with error handling""" | |
| try: | |
| model = joblib.load(filepath) | |
| print(f"โ Successfully loaded model from {filepath}") | |
| # Check if model has required methods | |
| if not hasattr(model, 'predict'): | |
| print("โ Loaded object doesn't have predict method") | |
| return None | |
| # Add missing methods if needed | |
| if not hasattr(model, 'get_params'): | |
| model.get_params = lambda deep=True: {} | |
| if not hasattr(model, 'set_params'): | |
| model.set_params = lambda **params: model | |
| return model | |
| except Exception as e: | |
| print(f"โ Error loading model from {filepath}: {e}") | |
| return None | |
| def load_models(): | |
| """Load models with robust error handling""" | |
| models = {} | |
| # Try to load XGBoost model | |
| try: | |
| xgboost_path = hf_hub_download( | |
| repo_id="Lesterchia174/HDB_Price_Predictor", | |
| filename="best_model_xgboost.joblib", | |
| repo_type="space" | |
| ) | |
| models['xgboost'] = safe_joblib_load(xgboost_path) | |
| if models['xgboost'] is None: | |
| print("โ ๏ธ Creating dummy model for XGBoost") | |
| models['xgboost'] = create_dummy_model("xgboost") | |
| else: | |
| print("โ XGBoost model loaded and validated") | |
| except Exception as e: | |
| print(f"โ Error downloading XGBoost model: {e}") | |
| print("โ ๏ธ Creating dummy model for XGBoost") | |
| models['xgboost'] = create_dummy_model("xgboost") | |
| # Try to load Linear Regression model | |
| try: | |
| linear_path = hf_hub_download( | |
| repo_id="Lesterchia174/HDB_Price_Predictor", | |
| filename="linear_regression.joblib", | |
| repo_type="space" | |
| ) | |
| models['linear_regression'] = safe_joblib_load(linear_path) | |
| if models['linear_regression'] is None: | |
| print("โ ๏ธ Creating dummy model for Linear Regression") | |
| models['linear_regression'] = create_dummy_model("linear_regression") | |
| else: | |
| print("โ Linear Regression model loaded and validated") | |
| except Exception as e: | |
| print(f"โ Error downloading Linear Regression model: {e}") | |
| print("โ ๏ธ Creating dummy model for Linear Regression") | |
| models['linear_regression'] = create_dummy_model("linear_regression") | |
| return models | |
| def load_data(): | |
| """Load data using Hugging Face Hub""" | |
| try: | |
| data_path = hf_hub_download( | |
| repo_id="Lesterchia174/HDB_Price_Predictor", | |
| filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv", | |
| repo_type="space" | |
| ) | |
| df = pd.read_csv(data_path) | |
| print("โ Data loaded successfully via Hugging Face Hub") | |
| return df | |
| except Exception as e: | |
| print(f"โ Error loading data: {e}") | |
| return create_sample_data() | |
| def create_sample_data(): | |
| """Create sample data if real data isn't available""" | |
| np.random.seed(42) | |
| towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST'] | |
| flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE'] | |
| flat_models = ['Improved', 'Model A', 'New Generation'] | |
| data = [] | |
| for _ in range(100): | |
| town = np.random.choice(towns) | |
| flat_type = np.random.choice(flat_types) | |
| flat_model = np.random.choice(flat_models) | |
| floor_area = np.random.randint(85, 150) | |
| storey = np.random.randint(1, 25) | |
| age = np.random.randint(0, 40) | |
| base_price = floor_area * 5000 | |
| town_bonus = towns.index(town) * 20000 | |
| storey_bonus = storey * 2000 | |
| age_discount = age * 1500 | |
| flat_type_bonus = flat_types.index(flat_type) * 30000 | |
| resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus | |
| resale_price = max(300000, resale_price + np.random.randint(-20000, 20000)) | |
| data.append({ | |
| 'town': town, 'flat_type': flat_type, 'flat_model': flat_model, | |
| 'floor_area_sqm': floor_area, 'storey_level': storey, | |
| 'flat_age': age, 'resale_price': resale_price | |
| }) | |
| return pd.DataFrame(data) | |
| def preprocess_input(user_input, model_type='xgboost'): | |
| """Preprocess user input for prediction with correct feature mapping""" | |
| # Flat type mapping | |
| flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4, | |
| '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7} | |
| # Town mapping | |
| town_mapping = { | |
| 'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3, | |
| 'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7, | |
| 'CHOA CHU KANG': 8, 'ANG MO KIO': 9 | |
| } | |
| # Flat model mapping | |
| flat_model_mapping = { | |
| 'Model A': 0, 'Improved': 1, 'New Generation': 2, | |
| 'Standard': 3, 'Premium': 4 | |
| } | |
| # Create input array with features | |
| input_features = [ | |
| user_input['floor_area_sqm'], # Feature 1 | |
| user_input['storey_level'], # Feature 2 | |
| user_input['flat_age'], # Feature 3 | |
| 99 - user_input['flat_age'], # Feature 4: remaining_lease | |
| 2025, # Feature 5: transaction_year | |
| flat_type_mapping.get(user_input['flat_type'], 4), # Feature 6: flat_type_ordinal | |
| town_mapping.get(user_input['town'], 0), # Feature 7: town_encoded | |
| flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded | |
| 1 # Feature 9: (placeholder) | |
| ] | |
| return np.array([input_features]) | |
| def create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr): | |
| """Create market insights visualization with both model predictions""" | |
| if data is None or len(data) == 0: | |
| return None | |
| similar_properties = data[ | |
| (data['flat_type'] == user_input['flat_type']) & | |
| (data['town'] == user_input['town']) | |
| ] | |
| if len(similar_properties) < 5: | |
| similar_properties = data[data['flat_type'] == user_input['flat_type']] | |
| if len(similar_properties) > 0: | |
| fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price', | |
| color='flat_model', | |
| title=f"Market Position: {user_input['flat_type']} in {user_input['town']}", | |
| labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'}) | |
| # Add both model predictions | |
| fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_xgb], | |
| mode='markers', | |
| marker=dict(symbol='star', size=20, color='red', | |
| line=dict(width=2, color='darkred')), | |
| name='XGBoost Prediction')) | |
| fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_lr], | |
| mode='markers', | |
| marker=dict(symbol='diamond', size=20, color='blue', | |
| line=dict(width=2, color='darkblue')), | |
| name='Linear Regression Prediction')) | |
| fig.update_layout(template="plotly_white", height=400, showlegend=True) | |
| return fig | |
| return None | |
| def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice): | |
| """Main prediction function for Gradio with robust error handling""" | |
| user_input = { | |
| 'town': town, | |
| 'flat_type': flat_type, | |
| 'flat_model': flat_model, | |
| 'floor_area_sqm': floor_area_sqm, | |
| 'storey_level': storey_level, | |
| 'flat_age': flat_age | |
| } | |
| try: | |
| processed_input = preprocess_input(user_input) | |
| # Get predictions from both models with error handling | |
| try: | |
| predicted_price_xgb = max(0, float(models['xgboost'].predict(processed_input)[0])) | |
| except Exception as e: | |
| print(f"โ XGBoost prediction error: {e}") | |
| predicted_price_xgb = 400000 # Fallback value | |
| try: | |
| predicted_price_lr = max(0, float(models['linear_regression'].predict(processed_input)[0])) | |
| except Exception as e: | |
| print(f"โ Linear Regression prediction error: {e}") | |
| predicted_price_lr = 380000 # Fallback value | |
| # Use selected model's prediction | |
| if model_choice == "XGBoost": | |
| final_price = predicted_price_xgb | |
| model_name = "XGBoost" | |
| else: | |
| final_price = predicted_price_lr | |
| model_name = "Linear Regression" | |
| # Create insights | |
| remaining_lease = 99 - flat_age | |
| price_per_sqm = final_price / floor_area_sqm | |
| insights = f""" | |
| **Property Summary:** | |
| - Location: {town} | |
| - Type: {flat_type} | |
| - Model: {flat_model} | |
| - Area: {floor_area_sqm} sqm | |
| - Floor: Level {storey_level} | |
| - Age: {flat_age} years | |
| - Remaining Lease: {remaining_lease} years | |
| - Price per sqm: ${price_per_sqm:,.0f} | |
| **Model Predictions:** | |
| - XGBoost: ${predicted_price_xgb:,.0f} | |
| **Financing Eligibility:** | |
| """ | |
| if remaining_lease >= 60: | |
| insights += "โ Bank loan eligible" | |
| elif remaining_lease >= 20: | |
| insights += "โ ๏ธ HDB loan eligible only" | |
| else: | |
| insights += "โ Limited financing options" | |
| # Create chart with both predictions | |
| chart = create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr) | |
| return f"${final_price:,.0f}", chart, insights | |
| except Exception as e: | |
| error_msg = f"Prediction failed. Error: {str(e)}" | |
| print(error_msg) | |
| return "Error: Prediction failed", None, error_msg | |
| # Preload models and data | |
| print("Loading models and data...") | |
| models = load_models() | |
| data = load_data() | |
| # Define Gradio interface | |
| towns_list = [ | |
| 'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST', | |
| 'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO' | |
| ] | |
| flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM'] | |
| flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium'] | |
| # Create Gradio interface | |
| with gr.Blocks(title="๐ HDB Price Predictor", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# ๐ HDB Price Predictor") | |
| gr.Markdown("Predict HDB resale prices using different machine learning models") | |
| with gr.Row(): | |
| with gr.Column(): | |
| town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO") | |
| flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM") | |
| flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved") | |
| floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5) | |
| storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1) | |
| flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1) | |
| model_choice = gr.Radio(label="Select Model", | |
| choices=["XGBoost"], #,"Linear Regression" | |
| value="XGBoost") | |
| predict_btn = gr.Button("๐ฎ Predict Price", variant="primary") | |
| with gr.Column(): | |
| predicted_price = gr.Label(label="๐ฐ Predicted Price") | |
| insights = gr.Markdown(label="๐ Property Summary") | |
| with gr.Row(): | |
| chart_output = gr.Plot(label="๐ Market Insights") | |
| # Connect button to function | |
| predict_btn.click( | |
| fn=predict_hdb_price, | |
| inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice], | |
| outputs=[predicted_price, chart_output, insights] | |
| ) | |
| # To run in Colab | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |