|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr
|
|
|
import pandas as pd
|
|
|
import joblib
|
|
|
import numpy as np
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
import os
|
|
|
import warnings
|
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_FILENAME = 'placement_model_pipeline.joblib'
|
|
|
LABEL_ENCODER_FILENAME = 'placement_label_encoder.joblib'
|
|
|
FEATURES_FILENAME = 'placement_model_features.joblib'
|
|
|
DATA_FILE = 'Campus_Selection.csv'
|
|
|
PLOT_DIR = 'plots'
|
|
|
FEATURE_IMPORTANCE_PLOT = os.path.join(PLOT_DIR, 'feature_importance.png')
|
|
|
PLACEMENT_PIE_CHART = os.path.join(PLOT_DIR, 'placement_distribution.png')
|
|
|
CORRELATION_HEATMAP = os.path.join(PLOT_DIR, 'correlation_heatmap.png')
|
|
|
|
|
|
|
|
|
pipeline = None
|
|
|
label_encoder = None
|
|
|
feature_names = None
|
|
|
df_original = None
|
|
|
df_head = pd.DataFrame()
|
|
|
dataset_stats = "Dataset information not available."
|
|
|
|
|
|
|
|
|
print("Attempting to load model artifacts...")
|
|
|
try:
|
|
|
if os.path.exists(MODEL_FILENAME):
|
|
|
pipeline = joblib.load(MODEL_FILENAME)
|
|
|
print(f"- Loaded: {MODEL_FILENAME}")
|
|
|
else:
|
|
|
print(f"Error: Model file not found at {MODEL_FILENAME}")
|
|
|
|
|
|
|
|
|
if os.path.exists(LABEL_ENCODER_FILENAME):
|
|
|
label_encoder = joblib.load(LABEL_ENCODER_FILENAME)
|
|
|
print(f"- Loaded: {LABEL_ENCODER_FILENAME}")
|
|
|
else:
|
|
|
print(f"Error: Label encoder file not found at {LABEL_ENCODER_FILENAME}")
|
|
|
|
|
|
|
|
|
if os.path.exists(FEATURES_FILENAME):
|
|
|
feature_names = joblib.load(FEATURES_FILENAME)
|
|
|
print(f"- Loaded: {FEATURES_FILENAME}")
|
|
|
else:
|
|
|
print(f"Error: Feature names file not found at {FEATURES_FILENAME}")
|
|
|
|
|
|
|
|
|
if pipeline and label_encoder and feature_names:
|
|
|
print("All essential model artifacts loaded successfully.")
|
|
|
else:
|
|
|
print("Warning: One or more essential model artifacts failed to load. Prediction functionality may be limited.")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error loading model artifacts: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Attempting to load original dataset...")
|
|
|
try:
|
|
|
if os.path.exists(DATA_FILE):
|
|
|
df_original = pd.read_csv(DATA_FILE)
|
|
|
df_head = df_original.head(10)
|
|
|
dataset_stats = f"**Number of Records:** {len(df_original)}\n\n**Columns:** {len(df_original.columns)}"
|
|
|
print(f"- Loaded: {DATA_FILE}")
|
|
|
else:
|
|
|
print(f"Warning: Original data file '{DATA_FILE}' not found for overview tab.")
|
|
|
dataset_stats = f"Original dataset file '{DATA_FILE}' not found."
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error loading original dataset: {e}")
|
|
|
dataset_stats = f"Error loading original dataset: {e}"
|
|
|
|
|
|
|
|
|
plots_exist = {
|
|
|
"feature_importance": os.path.exists(FEATURE_IMPORTANCE_PLOT),
|
|
|
"pie_chart": os.path.exists(PLACEMENT_PIE_CHART),
|
|
|
"heatmap": os.path.exists(CORRELATION_HEATMAP)
|
|
|
}
|
|
|
print(f"Plot file existence check: {plots_exist}")
|
|
|
|
|
|
|
|
|
|
|
|
def predict_placement(*args):
|
|
|
"""
|
|
|
Predicts placement status based on input features.
|
|
|
Returns:
|
|
|
- Profile Summary (Markdown)
|
|
|
- Prediction Result (Markdown)
|
|
|
- Probability Plot (Matplotlib Figure or None)
|
|
|
"""
|
|
|
|
|
|
if pipeline is None or label_encoder is None or feature_names is None:
|
|
|
message = "β οΈ **Error:** Model artifacts not loaded correctly. Cannot perform prediction."
|
|
|
print(message)
|
|
|
return (message, "", None)
|
|
|
|
|
|
|
|
|
try:
|
|
|
input_data = pd.DataFrame([args], columns=feature_names)
|
|
|
except ValueError as e:
|
|
|
message = f"β οΈ **Error:** Input data mismatch with expected features. Details: {e}"
|
|
|
print(message)
|
|
|
return (message, "", None)
|
|
|
|
|
|
|
|
|
profile_md = "### π§βπ Student Profile Summary\n" + "-"*25 + "\n"
|
|
|
for i, feature in enumerate(feature_names):
|
|
|
label = feature.replace('_p', ' %').replace('_b', ' Board').replace('_s', ' Stream').replace('_t', ' Type').replace('workex', 'Work Experience').replace('etest', 'Employability Test').replace('ssc', 'SSC').replace('hsc', 'HSC').replace('mba', 'MBA').replace('degree','Degree').replace('specialisation','Specialisation').replace('gender','Gender').replace('_',' ').title()
|
|
|
profile_md += f"**{label}:** {args[i]}\n"
|
|
|
|
|
|
|
|
|
numerical_cols_in_features = [
|
|
|
'ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p'
|
|
|
]
|
|
|
try:
|
|
|
for col in numerical_cols_in_features:
|
|
|
if col in input_data.columns:
|
|
|
input_data[col] = pd.to_numeric(input_data[col])
|
|
|
except ValueError as e:
|
|
|
error_msg = f"Error: Invalid numeric value provided. Details: {e}"
|
|
|
print(error_msg)
|
|
|
return (profile_md, f"β οΈ **Prediction Error:**\n{error_msg}", None)
|
|
|
|
|
|
|
|
|
try:
|
|
|
pred_proba = pipeline.predict_proba(input_data)[0]
|
|
|
predicted_class_index = np.argmax(pred_proba)
|
|
|
predicted_status = label_encoder.inverse_transform([predicted_class_index])[0]
|
|
|
confidence = pred_proba[predicted_class_index]
|
|
|
|
|
|
|
|
|
if predicted_status == 'Placed':
|
|
|
result_md = f"## β
Prediction: PLACED\n**Confidence:** {confidence:.2%}"
|
|
|
else:
|
|
|
result_md = f"## β Prediction: NOT PLACED\n**Confidence:** {confidence:.2%}"
|
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(5, 3))
|
|
|
statuses = label_encoder.classes_
|
|
|
probabilities = pred_proba
|
|
|
colors = ['#ff9999', '#66b3ff']
|
|
|
|
|
|
status_color_map = {label_encoder.classes_[0]: colors[0], label_encoder.classes_[1]: colors[1]}
|
|
|
bar_colors = [status_color_map[status] for status in statuses]
|
|
|
|
|
|
bars = ax.bar(statuses, probabilities, color=bar_colors)
|
|
|
ax.set_ylim(0, 1)
|
|
|
ax.set_ylabel('Probability')
|
|
|
ax.set_title('Placement Probability')
|
|
|
for bar in bars:
|
|
|
height = bar.get_height()
|
|
|
ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2%}',
|
|
|
ha='center', va='bottom', fontsize=9)
|
|
|
plt.tight_layout()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return profile_md, result_md, fig
|
|
|
|
|
|
except Exception as e:
|
|
|
error_msg = f"An error occurred during prediction: {e}"
|
|
|
print(f"Error during prediction: {e}")
|
|
|
print(f"Input data:\n{input_data.to_string()}")
|
|
|
print(f"Input data types:\n{input_data.dtypes}")
|
|
|
|
|
|
try: plt.close(fig)
|
|
|
except NameError: pass
|
|
|
return (profile_md, f"β οΈ **Prediction Error:**\n{error_msg}", None)
|
|
|
|
|
|
|
|
|
|
|
|
app_title = "π Campus Placement Predictor"
|
|
|
app_description = """
|
|
|
Predict student placement based on academic performance, background, work experience, and MBA specialization.
|
|
|
Input the details below and click 'Predict'. Explore other tabs for insights.
|
|
|
"""
|
|
|
|
|
|
css = """
|
|
|
.gradio-container { font-family: 'IBM Plex Sans', sans-serif; max-width: 1200px; margin: auto; }
|
|
|
.gr-button { color: white; border-color: #007bff; background: #007bff; }
|
|
|
footer { visibility: hidden; }
|
|
|
.gr-label { font-weight: bold; }
|
|
|
h1 { text-align: center; }
|
|
|
"""
|
|
|
|
|
|
|
|
|
default_ssc_p = 70.0
|
|
|
default_hsc_p = 70.0
|
|
|
default_degree_p = 70.0
|
|
|
default_etest_p = 70.0
|
|
|
default_mba_p = 65.0
|
|
|
|
|
|
|
|
|
app_ui = gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title=app_title, css=css)
|
|
|
|
|
|
with app_ui:
|
|
|
gr.Markdown(f"<h1>{app_title}</h1>")
|
|
|
gr.Markdown(app_description)
|
|
|
|
|
|
|
|
|
input_components_map = {}
|
|
|
with gr.Row():
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown("**Personal & Secondary**")
|
|
|
input_components_map['gender'] = gr.Radio(label="Gender", choices=['M', 'F'], value='M')
|
|
|
input_components_map['ssc_p'] = gr.Slider(label="SSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_ssc_p)
|
|
|
input_components_map['ssc_b'] = gr.Dropdown(label="SSC Board", choices=['Central', 'Others'], value='Central')
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown("**Higher Secondary**")
|
|
|
input_components_map['hsc_p'] = gr.Slider(label="HSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_hsc_p)
|
|
|
input_components_map['hsc_b'] = gr.Dropdown(label="HSC Board", choices=['Central', 'Others'], value='Central')
|
|
|
input_components_map['hsc_s'] = gr.Dropdown(label="HSC Stream", choices=['Commerce', 'Science', 'Arts'], value='Commerce')
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown("**Degree & Experience**")
|
|
|
input_components_map['degree_p'] = gr.Slider(label="Degree Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_degree_p)
|
|
|
input_components_map['degree_t'] = gr.Dropdown(label="Degree Type", choices=['Comm&Mgmt', 'Sci&Tech', 'Others'], value='Comm&Mgmt')
|
|
|
input_components_map['workex'] = gr.Radio(label="Work Experience", choices=['No', 'Yes'], value='No')
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown("**Employability & MBA**")
|
|
|
input_components_map['etest_p'] = gr.Slider(label="Employability Test %", minimum=0.0, maximum=100.0, step=0.1, value=default_etest_p)
|
|
|
input_components_map['specialisation'] = gr.Dropdown(label="MBA Specialization", choices=['Mkt&Fin', 'Mkt&HR'], value='Mkt&Fin')
|
|
|
input_components_map['mba_p'] = gr.Slider(label="MBA Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_mba_p)
|
|
|
|
|
|
|
|
|
ordered_input_components = []
|
|
|
if feature_names:
|
|
|
missing_features = []
|
|
|
for name in feature_names:
|
|
|
component = input_components_map.get(name)
|
|
|
if component:
|
|
|
ordered_input_components.append(component)
|
|
|
else:
|
|
|
missing_features.append(name)
|
|
|
print(f"Warning: UI component for feature '{name}' not defined in input_components_map.")
|
|
|
if missing_features:
|
|
|
gr.Warning(f"Missing UI components for features: {', '.join(missing_features)}. Predictions might fail.")
|
|
|
elif len(ordered_input_components) != len(feature_names):
|
|
|
gr.Warning("Mismatch between number of UI components and expected features.")
|
|
|
else:
|
|
|
|
|
|
ordered_input_components = list(input_components_map.values())
|
|
|
gr.Warning("Feature names file not loaded. Input order may be incorrect, predictions might fail.")
|
|
|
|
|
|
|
|
|
predict_button = gr.Button("π Predict Placement Status")
|
|
|
|
|
|
|
|
|
with gr.Tabs():
|
|
|
with gr.TabItem("π Prediction Results"):
|
|
|
with gr.Row():
|
|
|
out_profile = gr.Markdown(label="Input Summary")
|
|
|
with gr.Column():
|
|
|
out_prediction = gr.Markdown(label="Prediction")
|
|
|
out_plot = gr.Plot(label="Probability Distribution")
|
|
|
|
|
|
with gr.TabItem("π‘ Feature Importance"):
|
|
|
gr.Markdown("## Feature Importance Analysis")
|
|
|
gr.Markdown("Shows which factors most influence the placement prediction (based on the trained model). Higher values indicate greater influence.")
|
|
|
if plots_exist["feature_importance"]:
|
|
|
gr.Image(FEATURE_IMPORTANCE_PLOT, label="Feature Importance Plot", show_label=False)
|
|
|
else:
|
|
|
gr.Warning(f"Feature importance plot not found at '{FEATURE_IMPORTANCE_PLOT}'. Please ensure it was generated and uploaded.")
|
|
|
gr.Markdown("""
|
|
|
*Insights based on typical results for this type of problem:*
|
|
|
- **Academic Performance:** SSC %, HSC %, and Degree % are often strong predictors.
|
|
|
- **Employability Test:** Performance in standardized tests (etest_p) is usually critical.
|
|
|
- **Work Experience:** Can provide a significant advantage.
|
|
|
- **MBA Performance:** MBA % reinforces the importance of consistent academic achievement.
|
|
|
""")
|
|
|
|
|
|
with gr.TabItem("π Dataset Overview"):
|
|
|
gr.Markdown("## Dataset Overview")
|
|
|
gr.Markdown("A quick look at the data used to train the model.")
|
|
|
with gr.Row():
|
|
|
with gr.Column(scale=2):
|
|
|
gr.Markdown("**Data Sample**")
|
|
|
if df_original is not None:
|
|
|
gr.DataFrame(df_head, label="First 10 Rows", row_count=(10, "fixed"), wrap=True, interactive=False)
|
|
|
else:
|
|
|
gr.Warning(f"Original dataset '{DATA_FILE}' not found.")
|
|
|
gr.Markdown("**Basic Stats**")
|
|
|
gr.Markdown(dataset_stats)
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown("**Placement Distribution**")
|
|
|
if plots_exist["pie_chart"]:
|
|
|
gr.Image(PLACEMENT_PIE_CHART, label="Placement Distribution", show_label=False)
|
|
|
else:
|
|
|
gr.Warning(f"Placement distribution plot not found at '{PLACEMENT_PIE_CHART}'.")
|
|
|
gr.Markdown("**Correlation Analysis**")
|
|
|
if plots_exist["heatmap"]:
|
|
|
gr.Image(CORRELATION_HEATMAP, label="Correlation Heatmap", show_label=False)
|
|
|
else:
|
|
|
gr.Warning(f"Correlation heatmap not found at '{CORRELATION_HEATMAP}'.")
|
|
|
|
|
|
|
|
|
predict_button.click(
|
|
|
fn=predict_placement,
|
|
|
inputs=ordered_input_components,
|
|
|
outputs=[out_profile, out_prediction, out_plot]
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if feature_names:
|
|
|
example_list = [
|
|
|
|
|
|
['M', 67.0, 'Others', 91.0, 'Others', 'Commerce', 58.0, 'Sci&Tech', 'No', 55.0, 'Mkt&HR', 58.8],
|
|
|
['M', 56.0, 'Central', 52.0, 'Central', 'Science', 52.0, 'Sci&Tech', 'No', 66.0, 'Mkt&HR', 59.43],
|
|
|
['F', 77.0, 'Central', 87.0, 'Central', 'Commerce', 59.0, 'Comm&Mgmt', 'No', 68.0, 'Mkt&Fin', 68.63],
|
|
|
['F', 52.0, 'Central', 64.0, 'Central', 'Commerce', 61.0, 'Comm&Mgmt', 'No', 55.0, 'Mkt&Fin', 62.93],
|
|
|
['M', 84.0, 'Others', 90.9, 'Others', 'Science', 64.5, 'Sci&Tech', 'No', 86.04, 'Mkt&Fin', 59.42]
|
|
|
]
|
|
|
|
|
|
|
|
|
final_examples = example_list
|
|
|
|
|
|
gr.Examples(
|
|
|
examples=final_examples,
|
|
|
inputs=ordered_input_components,
|
|
|
outputs=[out_profile, out_prediction, out_plot],
|
|
|
fn=predict_placement,
|
|
|
cache_examples=False
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("Launching Gradio app locally...")
|
|
|
app_ui.launch(debug=True)
|
|
|
|