Lesterchia174 commited on
Commit
4ad0eff
·
verified ·
1 Parent(s): 1ce841e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -52
app.py CHANGED
@@ -19,11 +19,46 @@ from pathlib import Path
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Load models using Hugging Face Hub (handles Xet pointers)
23
  def load_models():
24
  """Load models using Hugging Face Hub library"""
25
  models = {}
26
-
27
  try:
28
  # Download XGBoost model (handles Xet pointer automatically)
29
  xgboost_path = hf_hub_download(
@@ -34,11 +69,12 @@ def load_models():
34
  models['xgboost'] = joblib.load(xgboost_path)
35
  print("✅ XGBoost model loaded successfully via Hugging Face Hub")
36
  print(f" File size: {os.path.getsize(xgboost_path)} bytes")
37
-
38
  except Exception as e:
39
  print(f"❌ Error loading XGBoost model: {e}")
 
40
  models['xgboost'] = None
41
-
42
  try:
43
  # Download Linear Regression model
44
  linear_path = hf_hub_download(
@@ -46,14 +82,21 @@ def load_models():
46
  filename="linear_regression.joblib",
47
  repo_type="space"
48
  )
49
- models['linear_regression'] = joblib.load(linear_path)
50
- print("✅ Linear Regression model loaded successfully via Hugging Face Hub")
51
- print(f" File size: {os.path.getsize(linear_path)} bytes")
52
-
 
 
 
 
 
 
 
53
  except Exception as e:
54
  print(f"❌ Error loading Linear Regression model: {e}")
55
  models['linear_regression'] = None
56
-
57
  return models
58
 
59
  def load_data():
@@ -68,7 +111,7 @@ def load_data():
68
  df = pd.read_csv(data_path)
69
  print("✅ Data loaded successfully via Hugging Face Hub")
70
  return df
71
-
72
  except Exception as e:
73
  print(f"❌ Error loading data: {e}")
74
  # Fallback to creating sample data
@@ -81,7 +124,7 @@ def create_sample_data():
81
  towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
82
  flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
83
  flat_models = ['Improved', 'Model A', 'New Generation']
84
-
85
  data = []
86
  for _ in range(100):
87
  town = np.random.choice(towns)
@@ -90,22 +133,22 @@ def create_sample_data():
90
  floor_area = np.random.randint(85, 150)
91
  storey = np.random.randint(1, 25)
92
  age = np.random.randint(0, 40)
93
-
94
  base_price = floor_area * 5000
95
  town_bonus = towns.index(town) * 20000
96
  storey_bonus = storey * 2000
97
  age_discount = age * 1500
98
  flat_type_bonus = flat_types.index(flat_type) * 30000
99
-
100
  resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
101
  resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
102
-
103
  data.append({
104
  'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
105
  'floor_area_sqm': floor_area, 'storey_level': storey,
106
  'flat_age': age, 'resale_price': resale_price
107
  })
108
-
109
  return pd.DataFrame(data)
110
 
111
  # Preload models and data
@@ -114,50 +157,152 @@ models = load_models()
114
  data = load_data()
115
 
116
  # If models failed to load, create dummy ones
117
- if models['xgboost'] is None:
118
  print("⚠️ Creating dummy XGBoost model for demonstration")
119
  models['xgboost'] = create_dummy_model("xgboost")
120
 
121
- if models['linear_regression'] is None:
122
  print("⚠️ Creating dummy Linear Regression model for demonstration")
123
  models['linear_regression'] = create_dummy_model("linear_regression")
124
 
125
- def create_dummy_model(model_type):
126
- """Create a realistic dummy model"""
127
- class RealisticDummyModel:
128
- def __init__(self, model_type):
129
- self.model_type = model_type
130
- self.n_features_in_ = 9
131
- self.feature_names_in_ = [
132
- 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
133
- 'transaction_year', 'flat_type_encoded', 'town_encoded',
134
- 'flat_model_encoded', 'dummy_feature'
135
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- def predict(self, X):
138
- # Realistic prediction logic
139
- floor_area = X[0][0]
140
- storey_level = X[0][1]
141
- flat_age = X[0][2]
142
- town_encoded = X[0][6]
143
- flat_type_encoded = X[0][5]
144
-
145
- base_price = floor_area * (4800 + town_encoded * 200)
146
- storey_bonus = storey_level * 2500
147
- age_discount = flat_age * 1800
148
-
149
- if self.model_type == "xgboost":
150
- price = base_price + storey_bonus - age_discount + 35000
151
- if storey_level > 20: price += 15000
152
- if flat_age < 10: price += 20000
153
- else:
154
- price = base_price + storey_bonus - age_discount - 25000
155
-
156
- return max(300000, price)
157
-
158
- return RealisticDummyModel(model_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- # ... [rest of your functions remain the same: preprocess_input, create_market_insights_chart, predict_hdb_price] ...
 
161
 
162
  # Define Gradio interface
163
  towns_list = [
@@ -181,8 +326,8 @@ with gr.Blocks(title="🏠 HDB Price Predictor", theme=gr.themes.Soft()) as demo
181
  floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
182
  storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
183
  flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
184
- model_choice = gr.Radio(label="Select Model",
185
- choices=["XGBoost", "Linear Regression"],
186
  value="XGBoost")
187
 
188
  predict_btn = gr.Button("🔮 Predict Price", variant="primary")
 
19
  import warnings
20
  warnings.filterwarnings('ignore')
21
 
22
+ def create_dummy_model(model_type):
23
+ """Create a realistic dummy model"""
24
+ class RealisticDummyModel:
25
+ def __init__(self, model_type):
26
+ self.model_type = model_type
27
+ self.n_features_in_ = 9
28
+ self.feature_names_in_ = [
29
+ 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
30
+ 'transaction_year', 'flat_type_encoded', 'town_encoded',
31
+ 'flat_model_encoded', 'dummy_feature'
32
+ ]
33
+
34
+ def predict(self, X):
35
+ # Realistic prediction logic
36
+ floor_area = X[0][0]
37
+ storey_level = X[0][1]
38
+ flat_age = X[0][2]
39
+ town_encoded = X[0][6]
40
+ flat_type_encoded = X[0][5]
41
+
42
+ base_price = floor_area * (4800 + town_encoded * 200)
43
+ storey_bonus = storey_level * 2500
44
+ age_discount = flat_age * 1800
45
+
46
+ if self.model_type == "xgboost":
47
+ price = base_price + storey_bonus - age_discount + 35000
48
+ if storey_level > 20: price += 15000
49
+ if flat_age < 10: price += 20000
50
+ else:
51
+ price = base_price + storey_bonus - age_discount - 25000
52
+
53
+ return max(300000, price)
54
+
55
+ return RealisticDummyModel(model_type)
56
+
57
  # Load models using Hugging Face Hub (handles Xet pointers)
58
  def load_models():
59
  """Load models using Hugging Face Hub library"""
60
  models = {}
61
+
62
  try:
63
  # Download XGBoost model (handles Xet pointer automatically)
64
  xgboost_path = hf_hub_download(
 
69
  models['xgboost'] = joblib.load(xgboost_path)
70
  print("✅ XGBoost model loaded successfully via Hugging Face Hub")
71
  print(f" File size: {os.path.getsize(xgboost_path)} bytes")
72
+
73
  except Exception as e:
74
  print(f"❌ Error loading XGBoost model: {e}")
75
+ print("⚠️ This usually means xgboost package is not installed")
76
  models['xgboost'] = None
77
+
78
  try:
79
  # Download Linear Regression model
80
  linear_path = hf_hub_download(
 
82
  filename="linear_regression.joblib",
83
  repo_type="space"
84
  )
85
+ # Try to load without xgboost dependency
86
+ try:
87
+ models['linear_regression'] = joblib.load(linear_path)
88
+ print("✅ Linear Regression model loaded successfully via Hugging Face Hub")
89
+ except Exception as e:
90
+ if "xgboost" in str(e).lower():
91
+ print("❌ Linear Regression model also requires xgboost")
92
+ models['linear_regression'] = None
93
+ else:
94
+ raise e
95
+
96
  except Exception as e:
97
  print(f"❌ Error loading Linear Regression model: {e}")
98
  models['linear_regression'] = None
99
+
100
  return models
101
 
102
  def load_data():
 
111
  df = pd.read_csv(data_path)
112
  print("✅ Data loaded successfully via Hugging Face Hub")
113
  return df
114
+
115
  except Exception as e:
116
  print(f"❌ Error loading data: {e}")
117
  # Fallback to creating sample data
 
124
  towns = ['ANG MO KIO', 'BEDOK', 'TAMPINES', 'WOODLANDS', 'JURONG WEST']
125
  flat_types = ['4 ROOM', '5 ROOM', 'EXECUTIVE']
126
  flat_models = ['Improved', 'Model A', 'New Generation']
127
+
128
  data = []
129
  for _ in range(100):
130
  town = np.random.choice(towns)
 
133
  floor_area = np.random.randint(85, 150)
134
  storey = np.random.randint(1, 25)
135
  age = np.random.randint(0, 40)
136
+
137
  base_price = floor_area * 5000
138
  town_bonus = towns.index(town) * 20000
139
  storey_bonus = storey * 2000
140
  age_discount = age * 1500
141
  flat_type_bonus = flat_types.index(flat_type) * 30000
142
+
143
  resale_price = base_price + town_bonus + storey_bonus - age_discount + flat_type_bonus
144
  resale_price = max(300000, resale_price + np.random.randint(-20000, 20000))
145
+
146
  data.append({
147
  'town': town, 'flat_type': flat_type, 'flat_model': flat_model,
148
  'floor_area_sqm': floor_area, 'storey_level': storey,
149
  'flat_age': age, 'resale_price': resale_price
150
  })
151
+
152
  return pd.DataFrame(data)
153
 
154
  # Preload models and data
 
157
  data = load_data()
158
 
159
  # If models failed to load, create dummy ones
160
+ if models.get('xgboost') is None:
161
  print("⚠️ Creating dummy XGBoost model for demonstration")
162
  models['xgboost'] = create_dummy_model("xgboost")
163
 
164
+ if models.get('linear_regression') is None:
165
  print("⚠️ Creating dummy Linear Regression model for demonstration")
166
  models['linear_regression'] = create_dummy_model("linear_regression")
167
 
168
+ def preprocess_input(user_input, model_type='xgboost'):
169
+ """Preprocess user input for prediction with correct feature mapping"""
170
+ # Flat type mapping
171
+ flat_type_mapping = {'1 ROOM': 1, '2 ROOM': 2, '3 ROOM': 3, '4 ROOM': 4,
172
+ '5 ROOM': 5, 'EXECUTIVE': 6, 'MULTI-GENERATION': 7}
173
+
174
+ # Town mapping
175
+ town_mapping = {
176
+ 'SENGKANG': 0, 'WOODLANDS': 1, 'TAMPINES': 2, 'PUNGGOL': 3,
177
+ 'JURONG WEST': 4, 'YISHUN': 5, 'BEDOK': 6, 'HOUGANG': 7,
178
+ 'CHOA CHU KANG': 8, 'ANG MO KIO': 9
179
+ }
180
+
181
+ # Flat model mapping
182
+ flat_model_mapping = {
183
+ 'Model A': 0, 'Improved': 1, 'New Generation': 2,
184
+ 'Standard': 3, 'Premium': 4
185
+ }
186
+
187
+ # Create input array with features
188
+ input_features = [
189
+ user_input['floor_area_sqm'], # Feature 1
190
+ user_input['storey_level'], # Feature 2
191
+ user_input['flat_age'], # Feature 3
192
+ 99 - user_input['flat_age'], # Feature 4: remaining_lease
193
+ 2025, # Feature 5: transaction_year
194
+ flat_type_mapping.get(user_input['flat_type'], 4), # Feature 6: flat_type_ordinal
195
+ town_mapping.get(user_input['town'], 0), # Feature 7: town_encoded
196
+ flat_model_mapping.get(user_input['flat_model'], 0), # Feature 8: flat_model_encoded
197
+ 1 # Feature 9: (placeholder)
198
+ ]
199
+
200
+ return np.array([input_features])
201
+
202
+ def create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr):
203
+ """Create market insights visualization with both model predictions"""
204
+ if data is None or len(data) == 0:
205
+ return None
206
+
207
+ similar_properties = data[
208
+ (data['flat_type'] == user_input['flat_type']) &
209
+ (data['town'] == user_input['town'])
210
+ ]
211
+
212
+ if len(similar_properties) < 5:
213
+ similar_properties = data[data['flat_type'] == user_input['flat_type']]
214
+
215
+ if len(similar_properties) > 0:
216
+ fig = px.scatter(similar_properties, x='floor_area_sqm', y='resale_price',
217
+ color='flat_model',
218
+ title=f"Market Position: {user_input['flat_type']} in {user_input['town']}",
219
+ labels={'floor_area_sqm': 'Floor Area (sqm)', 'resale_price': 'Resale Price (SGD)'})
220
+
221
+ # Add both model predictions
222
+ fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_xgb],
223
+ mode='markers',
224
+ marker=dict(symbol='star', size=20, color='red',
225
+ line=dict(width=2, color='darkred')),
226
+ name='XGBoost Prediction'))
227
+
228
+ fig.add_trace(go.Scatter(x=[user_input['floor_area_sqm']], y=[predicted_price_lr],
229
+ mode='markers',
230
+ marker=dict(symbol='diamond', size=20, color='blue',
231
+ line=dict(width=2, color='darkblue')),
232
+ name='Linear Regression Prediction'))
233
+
234
+ fig.update_layout(template="plotly_white", height=400, showlegend=True)
235
+ return fig
236
+ return None
237
+
238
+ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice):
239
+ """Main prediction function for Gradio"""
240
+ user_input = {
241
+ 'town': town,
242
+ 'flat_type': flat_type,
243
+ 'flat_model': flat_model,
244
+ 'floor_area_sqm': floor_area_sqm,
245
+ 'storey_level': storey_level,
246
+ 'flat_age': flat_age
247
+ }
248
+
249
+ if models['xgboost'] is None or models['linear_regression'] is None:
250
+ return "Error: Models not loaded", None, "Models failed to load. Please check the model files."
251
 
252
+ try:
253
+ processed_input = preprocess_input(user_input)
254
+
255
+ # Get predictions from both models
256
+ predicted_price_xgb = max(0, models['xgboost'].predict(processed_input)[0])
257
+ predicted_price_lr = max(0, models['linear_regression'].predict(processed_input)[0])
258
+
259
+ # Use selected model's prediction
260
+ if model_choice == "XGBoost":
261
+ final_price = predicted_price_xgb
262
+ model_name = "XGBoost"
263
+ else:
264
+ final_price = predicted_price_lr
265
+ model_name = "Linear Regression"
266
+
267
+ # Create insights
268
+ remaining_lease = 99 - flat_age
269
+ price_per_sqm = final_price / floor_area_sqm
270
+
271
+ insights = f"""
272
+ **Property Summary:**
273
+ - Location: {town}
274
+ - Type: {flat_type}
275
+ - Model: {flat_model}
276
+ - Area: {floor_area_sqm} sqm
277
+ - Floor: Level {storey_level}
278
+ - Age: {flat_age} years
279
+ - Remaining Lease: {remaining_lease} years
280
+ - Price per sqm: ${price_per_sqm:,.0f}
281
+
282
+ **Model Predictions:**
283
+ - XGBoost: ${predicted_price_xgb:,.0f}
284
+ - Linear Regression: ${predicted_price_lr:,.0f}
285
+ - Difference: ${abs(predicted_price_xgb - predicted_price_lr):,.0f}
286
+
287
+ **Selected Model: {model_choice}**
288
+
289
+ **Financing Eligibility:**
290
+ """
291
+
292
+ if remaining_lease >= 60:
293
+ insights += "✅ Bank loan eligible"
294
+ elif remaining_lease >= 20:
295
+ insights += "⚠️ HDB loan eligible only"
296
+ else:
297
+ insights += "❌ Limited financing options"
298
+
299
+ # Create chart with both predictions
300
+ chart = create_market_insights_chart(data, user_input, predicted_price_xgb, predicted_price_lr)
301
+
302
+ return f"${final_price:,.0f}", chart, insights
303
 
304
+ except Exception as e:
305
+ return f"Error: {str(e)}", None, f"Prediction failed. Error: {str(e)}"
306
 
307
  # Define Gradio interface
308
  towns_list = [
 
326
  floor_area_sqm = gr.Slider(label="Floor Area (sqm)", minimum=30, maximum=200, value=95, step=5)
327
  storey_level = gr.Slider(label="Storey Level", minimum=1, maximum=50, value=8, step=1)
328
  flat_age = gr.Slider(label="Flat Age (years)", minimum=0, maximum=99, value=15, step=1)
329
+ model_choice = gr.Radio(label="Select Model",
330
+ choices=["XGBoost", "Linear Regression"],
331
  value="XGBoost")
332
 
333
  predict_btn = gr.Button("🔮 Predict Price", variant="primary")