File size: 14,573 Bytes
3cd69dc
 
 
 
 
 
 
 
 
 
 
 
7f830f6
 
 
 
 
 
 
 
 
 
 
4ad0eff
7f830f6
4ad0eff
 
 
 
 
 
 
 
 
7f830f6
 
 
4ad0eff
 
 
7f830f6
 
 
 
 
 
 
 
4ad0eff
 
 
 
 
 
 
 
 
 
 
 
7f830f6
4ad0eff
7f830f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ad0eff
3cd69dc
7f830f6
3cd69dc
4ad0eff
7f830f6
3cd69dc
 
 
 
 
 
7f830f6
 
 
 
 
 
 
3cd69dc
7f830f6
 
 
4ad0eff
7f830f6
3b8d137
 
 
 
 
 
 
 
 
 
 
 
7f830f6
3b8d137
 
 
 
4ad0eff
3cd69dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ad0eff
3cd69dc
 
 
 
 
 
 
 
4ad0eff
3cd69dc
 
 
 
 
4ad0eff
3cd69dc
 
4ad0eff
3cd69dc
 
 
 
 
4ad0eff
3cd69dc
 
4ad0eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b8d137
 
 
 
 
4ad0eff
 
 
 
 
 
7f830f6
4ad0eff
 
 
 
 
 
 
 
 
 
 
 
7f830f6
 
 
 
 
 
 
3b8d137
 
 
 
 
4ad0eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3000545
4ad0eff
 
3cd69dc
4ad0eff
7f830f6
 
 
 
 
 
 
 
3cd69dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ad0eff
d7bd915
3cd69dc
 
 
 
 
 
 
 
 
ca45592
3cd69dc
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import gradio as gr
import pandas as pd
import numpy as np
import joblib
import plotly.graph_objects as go
import plotly.express as px
from huggingface_hub import hf_hub_download
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Try to import xgboost, but fallback to scikit-learn
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("โœ… XGBoost is available")
except ImportError:
    XGB_AVAILABLE = False
    print("โš ๏ธ  XGBoost not available, using scikit-learn models")
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import LinearRegression

def create_dummy_model(model_type):
    """Create a realistic dummy model that has all required methods"""
    class RealisticDummyModel:
        def __init__(self, model_type):
            self.model_type = model_type
            self.n_features_in_ = 9
            self.feature_names_in_ = [
                'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
                'transaction_year', 'flat_type_encoded', 'town_encoded',
                'flat_model_encoded', 'dummy_feature'
            ]
            # Add methods that might be called by joblib or other code
            self.get_params = lambda deep=True: {}
            self.set_params = lambda **params: self
        
        def predict(self, X):
            # Realistic prediction logic
            if isinstance(X, np.ndarray) and len(X.shape) == 2:
                X = X[0]  # Take first row if it's a 2D array
            
            floor_area = X[0]
            storey_level = X[1]
            flat_age = X[2]
            town_encoded = X[6]
            flat_type_encoded = X[5]
            
            base_price = floor_area * (4800 + town_encoded * 200)
            storey_bonus = storey_level * 2500
            age_discount = flat_age * 1800
            
            if self.model_type == "xgboost":
                price = base_price + storey_bonus - age_discount + 35000
                if storey_level > 20: price += 15000
                if flat_age < 10: price += 20000
            else:
                price = base_price + storey_bonus - age_discount - 25000
            
            return np.array([max(300000, price)])

    return RealisticDummyModel(model_type)()

def safe_joblib_load(filepath):
    """Safely load joblib file with error handling"""
    try:
        model = joblib.load(filepath)
        print(f"โœ… Successfully loaded model from {filepath}")
        
        # Check if model has required methods
        if not hasattr(model, 'predict'):
            print("โŒ Loaded object doesn't have predict method")
            return None
            
        # Add missing methods if needed
        if not hasattr(model, 'get_params'):
            model.get_params = lambda deep=True: {}
        if not hasattr(model, 'set_params'):
            model.set_params = lambda **params: model
            
        return model
        
    except Exception as e:
        print(f"โŒ Error loading model from {filepath}: {e}")
        return None

def load_models():
    """Load models with robust error handling"""
    models = {}
    
    # Try to load XGBoost model
    try:
        xgboost_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="best_model_xgboost.joblib",
            repo_type="space"
        )
        models['xgboost'] = safe_joblib_load(xgboost_path)
        if models['xgboost'] is None:
            print("โš ๏ธ  Creating dummy model for XGBoost")
            models['xgboost'] = create_dummy_model("xgboost")
        else:
            print("โœ… XGBoost model loaded and validated")
            
    except Exception as e:
        print(f"โŒ Error downloading XGBoost model: {e}")
        print("โš ๏ธ  Creating dummy model for XGBoost")
        models['xgboost'] = create_dummy_model("xgboost")
    
    # Try to load Linear Regression model
    try:
        linear_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="linear_regression.joblib",
            repo_type="space"
        )
        models['linear_regression'] = safe_joblib_load(linear_path)
        if models['linear_regression'] is None:
            print("โš ๏ธ  Creating dummy model for Linear Regression")
            models['linear_regression'] = create_dummy_model("linear_regression")
        else:
            print("โœ… Linear Regression model loaded and validated")
            
    except Exception as e:
        print(f"โŒ Error downloading Linear Regression model: {e}")
        print("โš ๏ธ  Creating dummy model for Linear Regression")
        models['linear_regression'] = create_dummy_model("linear_regression")
    
    return models

def load_data():
    """Load data using Hugging Face Hub"""
    try:
        data_path = hf_hub_download(
            repo_id="Lesterchia174/HDB_Price_Predictor",
            filename="base_hdb_resale_prices_2015Jan-2025Jun_processed.csv",
            repo_type="space"
        )
        df = pd.read_csv(data_path)
        print("โœ… Data loaded successfully via Hugging Face Hub")
        return df
    except Exception as e:
        print(f"โŒ Error loading data: {e}")
        return create_sample_data()

def create_sample_data():
    """Create sample data if real data isn't available"""
    np.random.seed(42)
    towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
    flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
    flat_models = ['Improved', 'Model A', 'New Generation']
    
    data = []
    for _ in range(100):
        town = np.random.choice(towns)
        flat_type = np.random.choice(flat_types)
        flat_model = np.random.choice(flat_models)
        floor_area = np.random.randint(85, 150)
        storey = np.random.randint(1, 25)
        age = np.random.randint(0, 40)
        
        base_price = floor_area * 5000
        town_bonus = towns.index(town) * 20000
        storey_bonus = storey * 2000
        age_discount = age * 1500
        flat_type_bonus = flat_types.index(flat_type) * 30000
        
        resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
        resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
        
        data.append({
            'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
            'floor_area_sqm': floor_area, 'storey_level': storey,
            'flat_age': age, 'resale_price': resale_price
        })
    
    return pd.DataFrame(data)

def preprocess_input(user_input, model_type='xgboost'):
    """Preprocess user input for prediction with correct feature mapping"""
    # Flat type mapping
    flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4,
                         '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7}

    # Town mapping
    town_mapping = {
        'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3,
        'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7,
        'CHOA CHU KANG': 8, 'ANG MO KIO': 9
    }

    # Flat model mapping
    flat_model_mapping = {
        'Model A': 0, 'Improved': 1, 'New Generation': 2,
        'Standard': 3, 'Premium': 4
    }

    # Create input array with features
    input_features = [
        user_input['floor_area_sqm'],           # Feature 1
        user_input['storey_level'],             # Feature 2
        user_input['flat_age'],                 # Feature 3
        99 - user_input['flat_age'],            # Feature 4: remaining_lease
        2025,                                   # Feature 5: transaction_year
        flat_type_mapping.get(user_input['flat_type'], 4),  # Feature 6: flat_type_ordinal
        town_mapping.get(user_input['town'], 0),           # Feature 7: town_encoded
        flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded
        1                                       # Feature 9: (placeholder)
    ]

    return np.array([input_features])

def create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr):
    """Create market insights visualization with both model predictions"""
    if data is None or len(data) == 0:
        return None

    similar_properties = data[
        (data['flat_type'] == user_input['flat_type']) &
        (data['town'] == user_input['town'])
    ]

    if len(similar_properties) < 5:
        similar_properties = data[data['flat_type'] == user_input['flat_type']]

    if len(similar_properties) > 0:
        fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price',
                         color='flat_model',
                         title=f"Market Position: {user_input['flat_type']} in {user_input['town']}",
                         labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'})

        # Add both model predictions
        fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_xgb],
                                 mode='markers',
                                 marker=dict(symbol='star', size=20, color='red',
                                             line=dict(width=2, color='darkred')),
                                 name='XGBoost Prediction'))

        fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_lr],
                                 mode='markers',
                                 marker=dict(symbol='diamond', size=20, color='blue',
                                             line=dict(width=2, color='darkblue')),
                                 name='Linear Regression Prediction'))

        fig.update_layout(template="plotly_white", height=400, showlegend=True)
        return fig
    return None

def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice):
    """Main prediction function for Gradio with robust error handling"""
    user_input = {
        'town': town,
        'flat_type': flat_type,
        'flat_model': flat_model,
        'floor_area_sqm': floor_area_sqm,
        'storey_level': storey_level,
        'flat_age': flat_age
    }

    try:
        processed_input = preprocess_input(user_input)
        
        # Get predictions from both models with error handling
        try:
            predicted_price_xgb = max(0, float(models['xgboost'].predict(processed_input)[0]))
        except Exception as e:
            print(f"โŒ XGBoost prediction error: {e}")
            predicted_price_xgb = 400000  # Fallback value
        
        try:
            predicted_price_lr = max(0, float(models['linear_regression'].predict(processed_input)[0]))
        except Exception as e:
            print(f"โŒ Linear Regression prediction error: {e}")
            predicted_price_lr = 380000  # Fallback value
        
        # Use selected model's prediction
        if model_choice == "XGBoost":
            final_price = predicted_price_xgb
            model_name = "XGBoost"
        else:
            final_price = predicted_price_lr
            model_name = "Linear Regression"

        # Create insights
        remaining_lease = 99 - flat_age
        price_per_sqm = final_price / floor_area_sqm

        insights = f"""
        **Property Summary:**
        - Location: {town}
        - Type: {flat_type}
        - Model: {flat_model}
        - Area: {floor_area_sqm} sqm
        - Floor: Level {storey_level}
        - Age: {flat_age} years
        - Remaining Lease: {remaining_lease} years
        - Price per sqm: ${price_per_sqm:,.0f}

        **Model Predictions:**
        - XGBoost: ${predicted_price_xgb:,.0f}

        **Financing Eligibility:**
        """

        if remaining_lease >= 60:
            insights += "โœ… Bank loan eligible"
        elif remaining_lease >= 20:
            insights += "โš ๏ธ HDB loan eligible only"
        else:
            insights += "โŒ Limited financing options"

        # Create chart with both predictions
        chart = create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr)

        return f"${final_price:,.0f}", chart, insights

    except Exception as e:
        error_msg = f"Prediction failed. Error: {str(e)}"
        print(error_msg)
        return "Error: Prediction failed", None, error_msg

# Preload models and data
print("Loading models and data...")
models = load_models()
data = load_data()

# Define Gradio interface
towns_list = [
    'SENGKANG', 'WOODLANDS', 'TAMPINES', 'PUNGGOL', 'JURONG WEST',
    'YISHUN', 'BEDOK', 'HOUGANG', 'CHOA CHU KANG', 'ANG MO KIO'
]

flat_types = ['3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', '2 ROOM', '1 ROOM']
flat_models = ['Model A', 'Improved', 'New Generation', 'Standard', 'Premium']

# Create Gradio interface
with gr.Blocks(title="๐Ÿ  HDB Price Predictor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ๐Ÿ  HDB Price Predictor")
    gr.Markdown("Predict HDB resale prices using different machine learning models")

    with gr.Row():
        with gr.Column():
            town = gr.Dropdown(label="Town", choices=sorted(towns_list), value="ANG MO KIO")
            flat_type = gr.Dropdown(label="Flat Type", choices=sorted(flat_types), value="4 ROOM")
            flat_model = gr.Dropdown(label="Flat Model", choices=sorted(flat_models), value="Improved")
            floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
            storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
            flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
            model_choice = gr.Radio(label="Select Model", 
                                   choices=["XGBoost"],  #,"Linear Regression" 
                                   value="XGBoost")

            predict_btn = gr.Button("๐Ÿ”ฎ Predict Price", variant="primary")

        with gr.Column():
            predicted_price = gr.Label(label="๐Ÿ’ฐ Predicted Price")
            insights = gr.Markdown(label="๐Ÿ“‹ Property Summary")

    with gr.Row():
        chart_output = gr.Plot(label="๐Ÿ“ˆ Market Insights")

    # Connect button to function
    predict_btn.click(
        fn=predict_hdb_price,
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice],
        outputs=[predicted_price, chart_output, insights]
    )

# To run in Colab
if __name__ == "__main__":
    demo.launch(share=True)