File size: 17,878 Bytes
2855f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# -*- coding: utf-8 -*-
# ==================================================
# app.py - Gradio App for Hugging Face Spaces
# Campus Placement Prediction
# ==================================================

import gradio as gr
import pandas as pd
import joblib
import numpy as np
import matplotlib.pyplot as plt # Only needed for figure type hint potentially
import seaborn as sns # Not directly used if images are pre-generated
import os
import warnings

warnings.filterwarnings('ignore')

# --- Configuration: Relative Paths for HF Spaces ---
# Ensure these files are uploaded to your HF Space repository
MODEL_FILENAME = 'placement_model_pipeline.joblib'
LABEL_ENCODER_FILENAME = 'placement_label_encoder.joblib'
FEATURES_FILENAME = 'placement_model_features.joblib'
DATA_FILE = 'Campus_Selection.csv' # Original data file
PLOT_DIR = 'plots' # Subdirectory for plots
FEATURE_IMPORTANCE_PLOT = os.path.join(PLOT_DIR, 'feature_importance.png')
PLACEMENT_PIE_CHART = os.path.join(PLOT_DIR, 'placement_distribution.png')
CORRELATION_HEATMAP = os.path.join(PLOT_DIR, 'correlation_heatmap.png')

# --- Global Variables to Hold Loaded Objects ---
pipeline = None
label_encoder = None
feature_names = None
df_original = None
df_head = pd.DataFrame() # Default empty dataframe
dataset_stats = "Dataset information not available."

# --- Load Model and Preprocessing Objects ---
print("Attempting to load model artifacts...")
try:
    if os.path.exists(MODEL_FILENAME):
        pipeline = joblib.load(MODEL_FILENAME)
        print(f"- Loaded: {MODEL_FILENAME}")
    else:
        print(f"Error: Model file not found at {MODEL_FILENAME}")
        # gr.Error(f"Model file '{MODEL_FILENAME}' not found. Cannot make predictions.") # Use if you want error banner on load

    if os.path.exists(LABEL_ENCODER_FILENAME):
        label_encoder = joblib.load(LABEL_ENCODER_FILENAME)
        print(f"- Loaded: {LABEL_ENCODER_FILENAME}")
    else:
        print(f"Error: Label encoder file not found at {LABEL_ENCODER_FILENAME}")
        # gr.Error(f"Label encoder file '{LABEL_ENCODER_FILENAME}' not found.")

    if os.path.exists(FEATURES_FILENAME):
        feature_names = joblib.load(FEATURES_FILENAME)
        print(f"- Loaded: {FEATURES_FILENAME}")
    else:
        print(f"Error: Feature names file not found at {FEATURES_FILENAME}")
        # gr.Error(f"Feature names file '{FEATURES_FILENAME}' not found.")

    if pipeline and label_encoder and feature_names:
        print("All essential model artifacts loaded successfully.")
    else:
        print("Warning: One or more essential model artifacts failed to load. Prediction functionality may be limited.")

except Exception as e:
    print(f"Error loading model artifacts: {e}")
    # Optionally raise a Gradio error to be visible in the UI on load
    # gr.Error(f"Failed to load model artifacts: {e}")


# --- Load Original Data for Overview Tab ---
print("Attempting to load original dataset...")
try:
    if os.path.exists(DATA_FILE):
        df_original = pd.read_csv(DATA_FILE)
        df_head = df_original.head(10)
        dataset_stats = f"**Number of Records:** {len(df_original)}\n\n**Columns:** {len(df_original.columns)}"
        print(f"- Loaded: {DATA_FILE}")
    else:
        print(f"Warning: Original data file '{DATA_FILE}' not found for overview tab.")
        dataset_stats = f"Original dataset file '{DATA_FILE}' not found."

except Exception as e:
    print(f"Error loading original dataset: {e}")
    dataset_stats = f"Error loading original dataset: {e}"

# --- Check if Plot Files Exist (for warnings in UI) ---
plots_exist = {
    "feature_importance": os.path.exists(FEATURE_IMPORTANCE_PLOT),
    "pie_chart": os.path.exists(PLACEMENT_PIE_CHART),
    "heatmap": os.path.exists(CORRELATION_HEATMAP)
}
print(f"Plot file existence check: {plots_exist}")


# --- Define Prediction Function ---
def predict_placement(*args):
    """

    Predicts placement status based on input features.

    Returns:

        - Profile Summary (Markdown)

        - Prediction Result (Markdown)

        - Probability Plot (Matplotlib Figure or None)

    """
    # Check if essential objects are loaded
    if pipeline is None or label_encoder is None or feature_names is None:
         message = "⚠️ **Error:** Model artifacts not loaded correctly. Cannot perform prediction."
         print(message)
         return (message, "", None) # Return error message and no plot

    # Create a DataFrame from the inputs with correct column names
    try:
        input_data = pd.DataFrame([args], columns=feature_names)
    except ValueError as e:
         message = f"⚠️ **Error:** Input data mismatch with expected features. Details: {e}"
         print(message)
         return (message, "", None)

    # Prepare Profile Summary String
    profile_md = "### πŸ§‘β€πŸŽ“ Student Profile Summary\n" + "-"*25 + "\n"
    for i, feature in enumerate(feature_names):
        label = feature.replace('_p', ' %').replace('_b', ' Board').replace('_s', ' Stream').replace('_t', ' Type').replace('workex', 'Work Experience').replace('etest', 'Employability Test').replace('ssc', 'SSC').replace('hsc', 'HSC').replace('mba', 'MBA').replace('degree','Degree').replace('specialisation','Specialisation').replace('gender','Gender').replace('_',' ').title()
        profile_md += f"**{label}:** {args[i]}\n"

    # Convert numerical inputs (sliders/numbers) to numeric types
    numerical_cols_in_features = [
        'ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p'
    ]
    try:
        for col in numerical_cols_in_features:
            if col in input_data.columns:
               input_data[col] = pd.to_numeric(input_data[col])
    except ValueError as e:
        error_msg = f"Error: Invalid numeric value provided. Details: {e}"
        print(error_msg)
        return (profile_md, f"⚠️ **Prediction Error:**\n{error_msg}", None)

    # Make prediction probability
    try:
        pred_proba = pipeline.predict_proba(input_data)[0]
        predicted_class_index = np.argmax(pred_proba)
        predicted_status = label_encoder.inverse_transform([predicted_class_index])[0]
        confidence = pred_proba[predicted_class_index]

        # Format prediction result
        if predicted_status == 'Placed':
            result_md = f"## βœ… Prediction: PLACED\n**Confidence:** {confidence:.2%}"
        else:
            result_md = f"## ❌ Prediction: NOT PLACED\n**Confidence:** {confidence:.2%}"

        # Create probability bar chart
        fig, ax = plt.subplots(figsize=(5, 3)) # Smaller plot for UI
        statuses = label_encoder.classes_
        probabilities = pred_proba
        colors = ['#ff9999', '#66b3ff'] # Ensure colors match labels if needed
        # Ensure correct color mapping if classes aren't always ['Not Placed', 'Placed']
        status_color_map = {label_encoder.classes_[0]: colors[0], label_encoder.classes_[1]: colors[1]}
        bar_colors = [status_color_map[status] for status in statuses]

        bars = ax.bar(statuses, probabilities, color=bar_colors)
        ax.set_ylim(0, 1)
        ax.set_ylabel('Probability')
        ax.set_title('Placement Probability')
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2%}',
                    ha='center', va='bottom', fontsize=9)
        plt.tight_layout()

        # IMPORTANT: Close the plot to prevent it from displaying in logs or consuming memory
        # We return the figure object for Gradio to render
        # plt.close(fig) # DO NOT CLOSE HERE - Gradio needs the figure object

        return profile_md, result_md, fig # Return figure object

    except Exception as e:
        error_msg = f"An error occurred during prediction: {e}"
        print(f"Error during prediction: {e}")
        print(f"Input data:\n{input_data.to_string()}")
        print(f"Input data types:\n{input_data.dtypes}")
        # Ensure plot is closed if an error occurs before returning
        try: plt.close(fig)
        except NameError: pass # fig might not be defined if error happened early
        return (profile_md, f"⚠️ **Prediction Error:**\n{error_msg}", None)


# --- Build Gradio Interface using Blocks ---
app_title = "πŸŽ“ Campus Placement Predictor"
app_description = """

Predict student placement based on academic performance, background, work experience, and MBA specialization.

Input the details below and click 'Predict'. Explore other tabs for insights.

"""

css = """

.gradio-container { font-family: 'IBM Plex Sans', sans-serif; max-width: 1200px; margin: auto; }

.gr-button { color: white; border-color: #007bff; background: #007bff; }

footer { visibility: hidden; }

.gr-label { font-weight: bold; }

h1 { text-align: center; }

"""

# Define default values (can be adjusted)
default_ssc_p = 70.0
default_hsc_p = 70.0
default_degree_p = 70.0
default_etest_p = 70.0
default_mba_p = 65.0

# Start Gradio Blocks UI Definition
app_ui = gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title=app_title, css=css)

with app_ui:
    gr.Markdown(f"<h1>{app_title}</h1>")
    gr.Markdown(app_description)

    # Define Input Components (organized)
    input_components_map = {}
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("**Personal & Secondary**")
            input_components_map['gender'] = gr.Radio(label="Gender", choices=['M', 'F'], value='M')
            input_components_map['ssc_p'] = gr.Slider(label="SSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_ssc_p)
            input_components_map['ssc_b'] = gr.Dropdown(label="SSC Board", choices=['Central', 'Others'], value='Central')
        with gr.Column(scale=1):
            gr.Markdown("**Higher Secondary**")
            input_components_map['hsc_p'] = gr.Slider(label="HSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_hsc_p)
            input_components_map['hsc_b'] = gr.Dropdown(label="HSC Board", choices=['Central', 'Others'], value='Central')
            input_components_map['hsc_s'] = gr.Dropdown(label="HSC Stream", choices=['Commerce', 'Science', 'Arts'], value='Commerce')
        with gr.Column(scale=1):
            gr.Markdown("**Degree & Experience**")
            input_components_map['degree_p'] = gr.Slider(label="Degree Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_degree_p)
            input_components_map['degree_t'] = gr.Dropdown(label="Degree Type", choices=['Comm&Mgmt', 'Sci&Tech', 'Others'], value='Comm&Mgmt')
            input_components_map['workex'] = gr.Radio(label="Work Experience", choices=['No', 'Yes'], value='No')
        with gr.Column(scale=1):
            gr.Markdown("**Employability & MBA**")
            input_components_map['etest_p'] = gr.Slider(label="Employability Test %", minimum=0.0, maximum=100.0, step=0.1, value=default_etest_p)
            input_components_map['specialisation'] = gr.Dropdown(label="MBA Specialization", choices=['Mkt&Fin', 'Mkt&HR'], value='Mkt&Fin')
            input_components_map['mba_p'] = gr.Slider(label="MBA Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_mba_p)

    # --- Order Input Components based on loaded feature_names ---
    ordered_input_components = []
    if feature_names:
        missing_features = []
        for name in feature_names:
            component = input_components_map.get(name)
            if component:
                ordered_input_components.append(component)
            else:
                missing_features.append(name)
                print(f"Warning: UI component for feature '{name}' not defined in input_components_map.")
        if missing_features:
             gr.Warning(f"Missing UI components for features: {', '.join(missing_features)}. Predictions might fail.")
        elif len(ordered_input_components) != len(feature_names):
             gr.Warning("Mismatch between number of UI components and expected features.")
    else:
        # Fallback if feature_names couldn't load - order might be wrong!
        ordered_input_components = list(input_components_map.values())
        gr.Warning("Feature names file not loaded. Input order may be incorrect, predictions might fail.")


    predict_button = gr.Button("πŸš€ Predict Placement Status")

    # Define Output Components within Tabs
    with gr.Tabs():
        with gr.TabItem("πŸ“Š Prediction Results"):
            with gr.Row():
                out_profile = gr.Markdown(label="Input Summary")
                with gr.Column():
                     out_prediction = gr.Markdown(label="Prediction")
                     out_plot = gr.Plot(label="Probability Distribution") # Displays the matplotlib fig

        with gr.TabItem("πŸ’‘ Feature Importance"):
            gr.Markdown("## Feature Importance Analysis")
            gr.Markdown("Shows which factors most influence the placement prediction (based on the trained model). Higher values indicate greater influence.")
            if plots_exist["feature_importance"]:
                gr.Image(FEATURE_IMPORTANCE_PLOT, label="Feature Importance Plot", show_label=False)
            else:
                gr.Warning(f"Feature importance plot not found at '{FEATURE_IMPORTANCE_PLOT}'. Please ensure it was generated and uploaded.")
            gr.Markdown("""

            *Insights based on typical results for this type of problem:*

            - **Academic Performance:** SSC %, HSC %, and Degree % are often strong predictors.

            - **Employability Test:** Performance in standardized tests (etest_p) is usually critical.

            - **Work Experience:** Can provide a significant advantage.

            - **MBA Performance:** MBA % reinforces the importance of consistent academic achievement.

            """)

        with gr.TabItem("πŸ“ˆ Dataset Overview"):
            gr.Markdown("## Dataset Overview")
            gr.Markdown("A quick look at the data used to train the model.")
            with gr.Row():
                with gr.Column(scale=2): # Give more space to dataframe
                    gr.Markdown("**Data Sample**")
                    if df_original is not None:
                         gr.DataFrame(df_head, label="First 10 Rows", row_count=(10, "fixed"), wrap=True, interactive=False)
                    else:
                         gr.Warning(f"Original dataset '{DATA_FILE}' not found.")
                    gr.Markdown("**Basic Stats**")
                    gr.Markdown(dataset_stats)
                with gr.Column(scale=1):
                     gr.Markdown("**Placement Distribution**")
                     if plots_exist["pie_chart"]:
                         gr.Image(PLACEMENT_PIE_CHART, label="Placement Distribution", show_label=False)
                     else:
                         gr.Warning(f"Placement distribution plot not found at '{PLACEMENT_PIE_CHART}'.")
                     gr.Markdown("**Correlation Analysis**")
                     if plots_exist["heatmap"]:
                         gr.Image(CORRELATION_HEATMAP, label="Correlation Heatmap", show_label=False)
                     else:
                         gr.Warning(f"Correlation heatmap not found at '{CORRELATION_HEATMAP}'.")

    # --- Link Button Click to Function ---
    predict_button.click(
        fn=predict_placement,
        inputs=ordered_input_components, # Use the ordered list
        outputs=[out_profile, out_prediction, out_plot]
    )

    # --- Add Examples ---
    # Ensure example values match the order and type of ordered_input_components
    if feature_names: # Only add examples if we know the correct feature order
        example_list = [
            # M, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p, degree_t, workex, etest_p, specialisation, mba_p -> default order if no feature_names
            ['M', 67.0, 'Others', 91.0, 'Others', 'Commerce', 58.0, 'Sci&Tech', 'No', 55.0, 'Mkt&HR', 58.8], # Row 1 (Placed)
            ['M', 56.0, 'Central', 52.0, 'Central', 'Science', 52.0, 'Sci&Tech', 'No', 66.0, 'Mkt&HR', 59.43], # Row 4 (Not Placed)
            ['F', 77.0, 'Central', 87.0, 'Central', 'Commerce', 59.0, 'Comm&Mgmt', 'No', 68.0, 'Mkt&Fin', 68.63], # Row 14 (Placed)
            ['F', 52.0, 'Central', 64.0, 'Central', 'Commerce', 61.0, 'Comm&Mgmt', 'No', 55.0, 'Mkt&Fin', 62.93], # Row 187 (Not Placed)
            ['M', 84.0, 'Others', 90.9, 'Others', 'Science', 64.5, 'Sci&Tech', 'No', 86.04, 'Mkt&Fin', 59.42]  # Row 79 (Placed)
        ]
        # Remap examples based on actual feature_names order if necessary (though the default order matches here)
        # This step is complex if the order differs significantly. Assuming the order defined in UI matches feature_names for simplicity now.
        final_examples = example_list

        gr.Examples(
            examples=final_examples,
            inputs=ordered_input_components,
            outputs=[out_profile, out_prediction, out_plot],
            fn=predict_placement,
            cache_examples=False # Caching might be ok if function is pure
        )

# --- Launch the App ---
# This is the standard way to launch in HF Spaces (app variable must be defined)
# app_ui.launch() # No debug=True for production on Spaces

# If running locally for testing before pushing to HF:
if __name__ == "__main__":
    print("Launching Gradio app locally...")
    app_ui.launch(debug=True) # Use debug=True for local testing
    # app_ui.launch() # Use this for standard local deployment without debug prints