File size: 17,878 Bytes
2855f37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
# -*- coding: utf-8 -*-
# ==================================================
# app.py - Gradio App for Hugging Face Spaces
# Campus Placement Prediction
# ==================================================
import gradio as gr
import pandas as pd
import joblib
import numpy as np
import matplotlib.pyplot as plt # Only needed for figure type hint potentially
import seaborn as sns # Not directly used if images are pre-generated
import os
import warnings
warnings.filterwarnings('ignore')
# --- Configuration: Relative Paths for HF Spaces ---
# Ensure these files are uploaded to your HF Space repository
MODEL_FILENAME = 'placement_model_pipeline.joblib'
LABEL_ENCODER_FILENAME = 'placement_label_encoder.joblib'
FEATURES_FILENAME = 'placement_model_features.joblib'
DATA_FILE = 'Campus_Selection.csv' # Original data file
PLOT_DIR = 'plots' # Subdirectory for plots
FEATURE_IMPORTANCE_PLOT = os.path.join(PLOT_DIR, 'feature_importance.png')
PLACEMENT_PIE_CHART = os.path.join(PLOT_DIR, 'placement_distribution.png')
CORRELATION_HEATMAP = os.path.join(PLOT_DIR, 'correlation_heatmap.png')
# --- Global Variables to Hold Loaded Objects ---
pipeline = None
label_encoder = None
feature_names = None
df_original = None
df_head = pd.DataFrame() # Default empty dataframe
dataset_stats = "Dataset information not available."
# --- Load Model and Preprocessing Objects ---
print("Attempting to load model artifacts...")
try:
if os.path.exists(MODEL_FILENAME):
pipeline = joblib.load(MODEL_FILENAME)
print(f"- Loaded: {MODEL_FILENAME}")
else:
print(f"Error: Model file not found at {MODEL_FILENAME}")
# gr.Error(f"Model file '{MODEL_FILENAME}' not found. Cannot make predictions.") # Use if you want error banner on load
if os.path.exists(LABEL_ENCODER_FILENAME):
label_encoder = joblib.load(LABEL_ENCODER_FILENAME)
print(f"- Loaded: {LABEL_ENCODER_FILENAME}")
else:
print(f"Error: Label encoder file not found at {LABEL_ENCODER_FILENAME}")
# gr.Error(f"Label encoder file '{LABEL_ENCODER_FILENAME}' not found.")
if os.path.exists(FEATURES_FILENAME):
feature_names = joblib.load(FEATURES_FILENAME)
print(f"- Loaded: {FEATURES_FILENAME}")
else:
print(f"Error: Feature names file not found at {FEATURES_FILENAME}")
# gr.Error(f"Feature names file '{FEATURES_FILENAME}' not found.")
if pipeline and label_encoder and feature_names:
print("All essential model artifacts loaded successfully.")
else:
print("Warning: One or more essential model artifacts failed to load. Prediction functionality may be limited.")
except Exception as e:
print(f"Error loading model artifacts: {e}")
# Optionally raise a Gradio error to be visible in the UI on load
# gr.Error(f"Failed to load model artifacts: {e}")
# --- Load Original Data for Overview Tab ---
print("Attempting to load original dataset...")
try:
if os.path.exists(DATA_FILE):
df_original = pd.read_csv(DATA_FILE)
df_head = df_original.head(10)
dataset_stats = f"**Number of Records:** {len(df_original)}\n\n**Columns:** {len(df_original.columns)}"
print(f"- Loaded: {DATA_FILE}")
else:
print(f"Warning: Original data file '{DATA_FILE}' not found for overview tab.")
dataset_stats = f"Original dataset file '{DATA_FILE}' not found."
except Exception as e:
print(f"Error loading original dataset: {e}")
dataset_stats = f"Error loading original dataset: {e}"
# --- Check if Plot Files Exist (for warnings in UI) ---
plots_exist = {
"feature_importance": os.path.exists(FEATURE_IMPORTANCE_PLOT),
"pie_chart": os.path.exists(PLACEMENT_PIE_CHART),
"heatmap": os.path.exists(CORRELATION_HEATMAP)
}
print(f"Plot file existence check: {plots_exist}")
# --- Define Prediction Function ---
def predict_placement(*args):
"""
Predicts placement status based on input features.
Returns:
- Profile Summary (Markdown)
- Prediction Result (Markdown)
- Probability Plot (Matplotlib Figure or None)
"""
# Check if essential objects are loaded
if pipeline is None or label_encoder is None or feature_names is None:
message = "β οΈ **Error:** Model artifacts not loaded correctly. Cannot perform prediction."
print(message)
return (message, "", None) # Return error message and no plot
# Create a DataFrame from the inputs with correct column names
try:
input_data = pd.DataFrame([args], columns=feature_names)
except ValueError as e:
message = f"β οΈ **Error:** Input data mismatch with expected features. Details: {e}"
print(message)
return (message, "", None)
# Prepare Profile Summary String
profile_md = "### π§βπ Student Profile Summary\n" + "-"*25 + "\n"
for i, feature in enumerate(feature_names):
label = feature.replace('_p', ' %').replace('_b', ' Board').replace('_s', ' Stream').replace('_t', ' Type').replace('workex', 'Work Experience').replace('etest', 'Employability Test').replace('ssc', 'SSC').replace('hsc', 'HSC').replace('mba', 'MBA').replace('degree','Degree').replace('specialisation','Specialisation').replace('gender','Gender').replace('_',' ').title()
profile_md += f"**{label}:** {args[i]}\n"
# Convert numerical inputs (sliders/numbers) to numeric types
numerical_cols_in_features = [
'ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p'
]
try:
for col in numerical_cols_in_features:
if col in input_data.columns:
input_data[col] = pd.to_numeric(input_data[col])
except ValueError as e:
error_msg = f"Error: Invalid numeric value provided. Details: {e}"
print(error_msg)
return (profile_md, f"β οΈ **Prediction Error:**\n{error_msg}", None)
# Make prediction probability
try:
pred_proba = pipeline.predict_proba(input_data)[0]
predicted_class_index = np.argmax(pred_proba)
predicted_status = label_encoder.inverse_transform([predicted_class_index])[0]
confidence = pred_proba[predicted_class_index]
# Format prediction result
if predicted_status == 'Placed':
result_md = f"## β
Prediction: PLACED\n**Confidence:** {confidence:.2%}"
else:
result_md = f"## β Prediction: NOT PLACED\n**Confidence:** {confidence:.2%}"
# Create probability bar chart
fig, ax = plt.subplots(figsize=(5, 3)) # Smaller plot for UI
statuses = label_encoder.classes_
probabilities = pred_proba
colors = ['#ff9999', '#66b3ff'] # Ensure colors match labels if needed
# Ensure correct color mapping if classes aren't always ['Not Placed', 'Placed']
status_color_map = {label_encoder.classes_[0]: colors[0], label_encoder.classes_[1]: colors[1]}
bar_colors = [status_color_map[status] for status in statuses]
bars = ax.bar(statuses, probabilities, color=bar_colors)
ax.set_ylim(0, 1)
ax.set_ylabel('Probability')
ax.set_title('Placement Probability')
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2%}',
ha='center', va='bottom', fontsize=9)
plt.tight_layout()
# IMPORTANT: Close the plot to prevent it from displaying in logs or consuming memory
# We return the figure object for Gradio to render
# plt.close(fig) # DO NOT CLOSE HERE - Gradio needs the figure object
return profile_md, result_md, fig # Return figure object
except Exception as e:
error_msg = f"An error occurred during prediction: {e}"
print(f"Error during prediction: {e}")
print(f"Input data:\n{input_data.to_string()}")
print(f"Input data types:\n{input_data.dtypes}")
# Ensure plot is closed if an error occurs before returning
try: plt.close(fig)
except NameError: pass # fig might not be defined if error happened early
return (profile_md, f"β οΈ **Prediction Error:**\n{error_msg}", None)
# --- Build Gradio Interface using Blocks ---
app_title = "π Campus Placement Predictor"
app_description = """
Predict student placement based on academic performance, background, work experience, and MBA specialization.
Input the details below and click 'Predict'. Explore other tabs for insights.
"""
css = """
.gradio-container { font-family: 'IBM Plex Sans', sans-serif; max-width: 1200px; margin: auto; }
.gr-button { color: white; border-color: #007bff; background: #007bff; }
footer { visibility: hidden; }
.gr-label { font-weight: bold; }
h1 { text-align: center; }
"""
# Define default values (can be adjusted)
default_ssc_p = 70.0
default_hsc_p = 70.0
default_degree_p = 70.0
default_etest_p = 70.0
default_mba_p = 65.0
# Start Gradio Blocks UI Definition
app_ui = gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title=app_title, css=css)
with app_ui:
gr.Markdown(f"<h1>{app_title}</h1>")
gr.Markdown(app_description)
# Define Input Components (organized)
input_components_map = {}
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("**Personal & Secondary**")
input_components_map['gender'] = gr.Radio(label="Gender", choices=['M', 'F'], value='M')
input_components_map['ssc_p'] = gr.Slider(label="SSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_ssc_p)
input_components_map['ssc_b'] = gr.Dropdown(label="SSC Board", choices=['Central', 'Others'], value='Central')
with gr.Column(scale=1):
gr.Markdown("**Higher Secondary**")
input_components_map['hsc_p'] = gr.Slider(label="HSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_hsc_p)
input_components_map['hsc_b'] = gr.Dropdown(label="HSC Board", choices=['Central', 'Others'], value='Central')
input_components_map['hsc_s'] = gr.Dropdown(label="HSC Stream", choices=['Commerce', 'Science', 'Arts'], value='Commerce')
with gr.Column(scale=1):
gr.Markdown("**Degree & Experience**")
input_components_map['degree_p'] = gr.Slider(label="Degree Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_degree_p)
input_components_map['degree_t'] = gr.Dropdown(label="Degree Type", choices=['Comm&Mgmt', 'Sci&Tech', 'Others'], value='Comm&Mgmt')
input_components_map['workex'] = gr.Radio(label="Work Experience", choices=['No', 'Yes'], value='No')
with gr.Column(scale=1):
gr.Markdown("**Employability & MBA**")
input_components_map['etest_p'] = gr.Slider(label="Employability Test %", minimum=0.0, maximum=100.0, step=0.1, value=default_etest_p)
input_components_map['specialisation'] = gr.Dropdown(label="MBA Specialization", choices=['Mkt&Fin', 'Mkt&HR'], value='Mkt&Fin')
input_components_map['mba_p'] = gr.Slider(label="MBA Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_mba_p)
# --- Order Input Components based on loaded feature_names ---
ordered_input_components = []
if feature_names:
missing_features = []
for name in feature_names:
component = input_components_map.get(name)
if component:
ordered_input_components.append(component)
else:
missing_features.append(name)
print(f"Warning: UI component for feature '{name}' not defined in input_components_map.")
if missing_features:
gr.Warning(f"Missing UI components for features: {', '.join(missing_features)}. Predictions might fail.")
elif len(ordered_input_components) != len(feature_names):
gr.Warning("Mismatch between number of UI components and expected features.")
else:
# Fallback if feature_names couldn't load - order might be wrong!
ordered_input_components = list(input_components_map.values())
gr.Warning("Feature names file not loaded. Input order may be incorrect, predictions might fail.")
predict_button = gr.Button("π Predict Placement Status")
# Define Output Components within Tabs
with gr.Tabs():
with gr.TabItem("π Prediction Results"):
with gr.Row():
out_profile = gr.Markdown(label="Input Summary")
with gr.Column():
out_prediction = gr.Markdown(label="Prediction")
out_plot = gr.Plot(label="Probability Distribution") # Displays the matplotlib fig
with gr.TabItem("π‘ Feature Importance"):
gr.Markdown("## Feature Importance Analysis")
gr.Markdown("Shows which factors most influence the placement prediction (based on the trained model). Higher values indicate greater influence.")
if plots_exist["feature_importance"]:
gr.Image(FEATURE_IMPORTANCE_PLOT, label="Feature Importance Plot", show_label=False)
else:
gr.Warning(f"Feature importance plot not found at '{FEATURE_IMPORTANCE_PLOT}'. Please ensure it was generated and uploaded.")
gr.Markdown("""
*Insights based on typical results for this type of problem:*
- **Academic Performance:** SSC %, HSC %, and Degree % are often strong predictors.
- **Employability Test:** Performance in standardized tests (etest_p) is usually critical.
- **Work Experience:** Can provide a significant advantage.
- **MBA Performance:** MBA % reinforces the importance of consistent academic achievement.
""")
with gr.TabItem("π Dataset Overview"):
gr.Markdown("## Dataset Overview")
gr.Markdown("A quick look at the data used to train the model.")
with gr.Row():
with gr.Column(scale=2): # Give more space to dataframe
gr.Markdown("**Data Sample**")
if df_original is not None:
gr.DataFrame(df_head, label="First 10 Rows", row_count=(10, "fixed"), wrap=True, interactive=False)
else:
gr.Warning(f"Original dataset '{DATA_FILE}' not found.")
gr.Markdown("**Basic Stats**")
gr.Markdown(dataset_stats)
with gr.Column(scale=1):
gr.Markdown("**Placement Distribution**")
if plots_exist["pie_chart"]:
gr.Image(PLACEMENT_PIE_CHART, label="Placement Distribution", show_label=False)
else:
gr.Warning(f"Placement distribution plot not found at '{PLACEMENT_PIE_CHART}'.")
gr.Markdown("**Correlation Analysis**")
if plots_exist["heatmap"]:
gr.Image(CORRELATION_HEATMAP, label="Correlation Heatmap", show_label=False)
else:
gr.Warning(f"Correlation heatmap not found at '{CORRELATION_HEATMAP}'.")
# --- Link Button Click to Function ---
predict_button.click(
fn=predict_placement,
inputs=ordered_input_components, # Use the ordered list
outputs=[out_profile, out_prediction, out_plot]
)
# --- Add Examples ---
# Ensure example values match the order and type of ordered_input_components
if feature_names: # Only add examples if we know the correct feature order
example_list = [
# M, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p, degree_t, workex, etest_p, specialisation, mba_p -> default order if no feature_names
['M', 67.0, 'Others', 91.0, 'Others', 'Commerce', 58.0, 'Sci&Tech', 'No', 55.0, 'Mkt&HR', 58.8], # Row 1 (Placed)
['M', 56.0, 'Central', 52.0, 'Central', 'Science', 52.0, 'Sci&Tech', 'No', 66.0, 'Mkt&HR', 59.43], # Row 4 (Not Placed)
['F', 77.0, 'Central', 87.0, 'Central', 'Commerce', 59.0, 'Comm&Mgmt', 'No', 68.0, 'Mkt&Fin', 68.63], # Row 14 (Placed)
['F', 52.0, 'Central', 64.0, 'Central', 'Commerce', 61.0, 'Comm&Mgmt', 'No', 55.0, 'Mkt&Fin', 62.93], # Row 187 (Not Placed)
['M', 84.0, 'Others', 90.9, 'Others', 'Science', 64.5, 'Sci&Tech', 'No', 86.04, 'Mkt&Fin', 59.42] # Row 79 (Placed)
]
# Remap examples based on actual feature_names order if necessary (though the default order matches here)
# This step is complex if the order differs significantly. Assuming the order defined in UI matches feature_names for simplicity now.
final_examples = example_list
gr.Examples(
examples=final_examples,
inputs=ordered_input_components,
outputs=[out_profile, out_prediction, out_plot],
fn=predict_placement,
cache_examples=False # Caching might be ok if function is pure
)
# --- Launch the App ---
# This is the standard way to launch in HF Spaces (app variable must be defined)
# app_ui.launch() # No debug=True for production on Spaces
# If running locally for testing before pushing to HF:
if __name__ == "__main__":
print("Launching Gradio app locally...")
app_ui.launch(debug=True) # Use debug=True for local testing
# app_ui.launch() # Use this for standard local deployment without debug prints |