Spaces:

piyusharma
/

Campus-Placement-Prediction

Sleeping

App Files Files Community

Campus-Placement-Prediction / app.py

piyusharma

Upload 11 files

2855f37 verified 7 months ago

raw

history blame contribute delete

17.9 kB

	# -- coding: utf-8 --
	# ==================================================
	# app.py - Gradio App for Hugging Face Spaces
	# Campus Placement Prediction
	# ==================================================

	import gradio as gr
	import pandas as pd
	import joblib
	import numpy as np
	import matplotlib.pyplot as plt # Only needed for figure type hint potentially
	import seaborn as sns # Not directly used if images are pre-generated
	import os
	import warnings

	warnings.filterwarnings('ignore')

	# --- Configuration: Relative Paths for HF Spaces ---
	# Ensure these files are uploaded to your HF Space repository
	MODEL_FILENAME = 'placement_model_pipeline.joblib'
	LABEL_ENCODER_FILENAME = 'placement_label_encoder.joblib'
	FEATURES_FILENAME = 'placement_model_features.joblib'
	DATA_FILE = 'Campus_Selection.csv' # Original data file
	PLOT_DIR = 'plots' # Subdirectory for plots
	FEATURE_IMPORTANCE_PLOT = os.path.join(PLOT_DIR, 'feature_importance.png')
	PLACEMENT_PIE_CHART = os.path.join(PLOT_DIR, 'placement_distribution.png')
	CORRELATION_HEATMAP = os.path.join(PLOT_DIR, 'correlation_heatmap.png')

	# --- Global Variables to Hold Loaded Objects ---
	pipeline = None
	label_encoder = None
	feature_names = None
	df_original = None
	df_head = pd.DataFrame() # Default empty dataframe
	dataset_stats = "Dataset information not available."

	# --- Load Model and Preprocessing Objects ---
	print("Attempting to load model artifacts...")
	try:
	if os.path.exists(MODEL_FILENAME):
	pipeline = joblib.load(MODEL_FILENAME)
	print(f"- Loaded: {MODEL_FILENAME}")
	else:
	print(f"Error: Model file not found at {MODEL_FILENAME}")
	# gr.Error(f"Model file '{MODEL_FILENAME}' not found. Cannot make predictions.") # Use if you want error banner on load

	if os.path.exists(LABEL_ENCODER_FILENAME):
	label_encoder = joblib.load(LABEL_ENCODER_FILENAME)
	print(f"- Loaded: {LABEL_ENCODER_FILENAME}")
	else:
	print(f"Error: Label encoder file not found at {LABEL_ENCODER_FILENAME}")
	# gr.Error(f"Label encoder file '{LABEL_ENCODER_FILENAME}' not found.")

	if os.path.exists(FEATURES_FILENAME):
	feature_names = joblib.load(FEATURES_FILENAME)
	print(f"- Loaded: {FEATURES_FILENAME}")
	else:
	print(f"Error: Feature names file not found at {FEATURES_FILENAME}")
	# gr.Error(f"Feature names file '{FEATURES_FILENAME}' not found.")

	if pipeline and label_encoder and feature_names:
	print("All essential model artifacts loaded successfully.")
	else:
	print("Warning: One or more essential model artifacts failed to load. Prediction functionality may be limited.")

	except Exception as e:
	print(f"Error loading model artifacts: {e}")
	# Optionally raise a Gradio error to be visible in the UI on load
	# gr.Error(f"Failed to load model artifacts: {e}")


	# --- Load Original Data for Overview Tab ---
	print("Attempting to load original dataset...")
	try:
	if os.path.exists(DATA_FILE):
	df_original = pd.read_csv(DATA_FILE)
	df_head = df_original.head(10)
	dataset_stats = f"Number of Records: {len(df_original)}\n\nColumns: {len(df_original.columns)}"
	print(f"- Loaded: {DATA_FILE}")
	else:
	print(f"Warning: Original data file '{DATA_FILE}' not found for overview tab.")
	dataset_stats = f"Original dataset file '{DATA_FILE}' not found."

	except Exception as e:
	print(f"Error loading original dataset: {e}")
	dataset_stats = f"Error loading original dataset: {e}"

	# --- Check if Plot Files Exist (for warnings in UI) ---
	plots_exist = {
	"feature_importance": os.path.exists(FEATURE_IMPORTANCE_PLOT),
	"pie_chart": os.path.exists(PLACEMENT_PIE_CHART),
	"heatmap": os.path.exists(CORRELATION_HEATMAP)
	}
	print(f"Plot file existence check: {plots_exist}")


	# --- Define Prediction Function ---
	def predict_placement(*args):
	"""
	Predicts placement status based on input features.
	Returns:
	- Profile Summary (Markdown)
	- Prediction Result (Markdown)
	- Probability Plot (Matplotlib Figure or None)
	"""
	# Check if essential objects are loaded
	if pipeline is None or label_encoder is None or feature_names is None:
	message = "⚠️ Error: Model artifacts not loaded correctly. Cannot perform prediction."
	print(message)
	return (message, "", None) # Return error message and no plot

	# Create a DataFrame from the inputs with correct column names
	try:
	input_data = pd.DataFrame([args], columns=feature_names)
	except ValueError as e:
	message = f"⚠️ Error: Input data mismatch with expected features. Details: {e}"
	print(message)
	return (message, "", None)

	# Prepare Profile Summary String
	profile_md = "### 🧑‍🎓 Student Profile Summary\n" + "-"*25 + "\n"
	for i, feature in enumerate(feature_names):
	label = feature.replace('_p', ' %').replace('_b', ' Board').replace('_s', ' Stream').replace('_t', ' Type').replace('workex', 'Work Experience').replace('etest', 'Employability Test').replace('ssc', 'SSC').replace('hsc', 'HSC').replace('mba', 'MBA').replace('degree','Degree').replace('specialisation','Specialisation').replace('gender','Gender').replace('_',' ').title()
	profile_md += f"{label}: {args[i]}\n"

	# Convert numerical inputs (sliders/numbers) to numeric types
	numerical_cols_in_features = [
	'ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p'
	]
	try:
	for col in numerical_cols_in_features:
	if col in input_data.columns:
	input_data[col] = pd.to_numeric(input_data[col])
	except ValueError as e:
	error_msg = f"Error: Invalid numeric value provided. Details: {e}"
	print(error_msg)
	return (profile_md, f"⚠️ Prediction Error:\n{error_msg}", None)

	# Make prediction probability
	try:
	pred_proba = pipeline.predict_proba(input_data)[0]
	predicted_class_index = np.argmax(pred_proba)
	predicted_status = label_encoder.inverse_transform([predicted_class_index])[0]
	confidence = pred_proba[predicted_class_index]

	# Format prediction result
	if predicted_status == 'Placed':
	result_md = f"## ✅ Prediction: PLACED\nConfidence: {confidence:.2%}"
	else:
	result_md = f"## ❌ Prediction: NOT PLACED\nConfidence: {confidence:.2%}"

	# Create probability bar chart
	fig, ax = plt.subplots(figsize=(5, 3)) # Smaller plot for UI
	statuses = label_encoder.classes_
	probabilities = pred_proba
	colors = ['#ff9999', '#66b3ff'] # Ensure colors match labels if needed
	# Ensure correct color mapping if classes aren't always ['Not Placed', 'Placed']
	status_color_map = {label_encoder.classes_[0]: colors[0], label_encoder.classes_[1]: colors[1]}
	bar_colors = [status_color_map[status] for status in statuses]

	bars = ax.bar(statuses, probabilities, color=bar_colors)
	ax.set_ylim(0, 1)
	ax.set_ylabel('Probability')
	ax.set_title('Placement Probability')
	for bar in bars:
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2%}',
	ha='center', va='bottom', fontsize=9)
	plt.tight_layout()

	# IMPORTANT: Close the plot to prevent it from displaying in logs or consuming memory
	# We return the figure object for Gradio to render
	# plt.close(fig) # DO NOT CLOSE HERE - Gradio needs the figure object

	return profile_md, result_md, fig # Return figure object

	except Exception as e:
	error_msg = f"An error occurred during prediction: {e}"
	print(f"Error during prediction: {e}")
	print(f"Input data:\n{input_data.to_string()}")
	print(f"Input data types:\n{input_data.dtypes}")
	# Ensure plot is closed if an error occurs before returning
	try: plt.close(fig)
	except NameError: pass # fig might not be defined if error happened early
	return (profile_md, f"⚠️ Prediction Error:\n{error_msg}", None)


	# --- Build Gradio Interface using Blocks ---
	app_title = "🎓 Campus Placement Predictor"
	app_description = """
	Predict student placement based on academic performance, background, work experience, and MBA specialization.
	Input the details below and click 'Predict'. Explore other tabs for insights.
	"""

	css = """
	.gradio-container { font-family: 'IBM Plex Sans', sans-serif; max-width: 1200px; margin: auto; }
	.gr-button { color: white; border-color: #007bff; background: #007bff; }
	footer { visibility: hidden; }
	.gr-label { font-weight: bold; }
	h1 { text-align: center; }
	"""

	# Define default values (can be adjusted)
	default_ssc_p = 70.0
	default_hsc_p = 70.0
	default_degree_p = 70.0
	default_etest_p = 70.0
	default_mba_p = 65.0

	# Start Gradio Blocks UI Definition
	app_ui = gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title=app_title, css=css)

	with app_ui:
	gr.Markdown(f"<h1>{app_title}</h1>")
	gr.Markdown(app_description)

	# Define Input Components (organized)
	input_components_map = {}
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("Personal & Secondary")
	input_components_map['gender'] = gr.Radio(label="Gender", choices=['M', 'F'], value='M')
	input_components_map['ssc_p'] = gr.Slider(label="SSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_ssc_p)
	input_components_map['ssc_b'] = gr.Dropdown(label="SSC Board", choices=['Central', 'Others'], value='Central')
	with gr.Column(scale=1):
	gr.Markdown("Higher Secondary")
	input_components_map['hsc_p'] = gr.Slider(label="HSC Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_hsc_p)
	input_components_map['hsc_b'] = gr.Dropdown(label="HSC Board", choices=['Central', 'Others'], value='Central')
	input_components_map['hsc_s'] = gr.Dropdown(label="HSC Stream", choices=['Commerce', 'Science', 'Arts'], value='Commerce')
	with gr.Column(scale=1):
	gr.Markdown("Degree & Experience")
	input_components_map['degree_p'] = gr.Slider(label="Degree Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_degree_p)
	input_components_map['degree_t'] = gr.Dropdown(label="Degree Type", choices=['Comm&Mgmt', 'Sci&Tech', 'Others'], value='Comm&Mgmt')
	input_components_map['workex'] = gr.Radio(label="Work Experience", choices=['No', 'Yes'], value='No')
	with gr.Column(scale=1):
	gr.Markdown("Employability & MBA")
	input_components_map['etest_p'] = gr.Slider(label="Employability Test %", minimum=0.0, maximum=100.0, step=0.1, value=default_etest_p)
	input_components_map['specialisation'] = gr.Dropdown(label="MBA Specialization", choices=['Mkt&Fin', 'Mkt&HR'], value='Mkt&Fin')
	input_components_map['mba_p'] = gr.Slider(label="MBA Percentage", minimum=0.0, maximum=100.0, step=0.1, value=default_mba_p)

	# --- Order Input Components based on loaded feature_names ---
	ordered_input_components = []
	if feature_names:
	missing_features = []
	for name in feature_names:
	component = input_components_map.get(name)
	if component:
	ordered_input_components.append(component)
	else:
	missing_features.append(name)
	print(f"Warning: UI component for feature '{name}' not defined in input_components_map.")
	if missing_features:
	gr.Warning(f"Missing UI components for features: {', '.join(missing_features)}. Predictions might fail.")
	elif len(ordered_input_components) != len(feature_names):
	gr.Warning("Mismatch between number of UI components and expected features.")
	else:
	# Fallback if feature_names couldn't load - order might be wrong!
	ordered_input_components = list(input_components_map.values())
	gr.Warning("Feature names file not loaded. Input order may be incorrect, predictions might fail.")


	predict_button = gr.Button("🚀 Predict Placement Status")

	# Define Output Components within Tabs
	with gr.Tabs():
	with gr.TabItem("📊 Prediction Results"):
	with gr.Row():
	out_profile = gr.Markdown(label="Input Summary")
	with gr.Column():
	out_prediction = gr.Markdown(label="Prediction")
	out_plot = gr.Plot(label="Probability Distribution") # Displays the matplotlib fig

	with gr.TabItem("💡 Feature Importance"):
	gr.Markdown("## Feature Importance Analysis")
	gr.Markdown("Shows which factors most influence the placement prediction (based on the trained model). Higher values indicate greater influence.")
	if plots_exist["feature_importance"]:
	gr.Image(FEATURE_IMPORTANCE_PLOT, label="Feature Importance Plot", show_label=False)
	else:
	gr.Warning(f"Feature importance plot not found at '{FEATURE_IMPORTANCE_PLOT}'. Please ensure it was generated and uploaded.")
	gr.Markdown("""
	Insights based on typical results for this type of problem:
	- Academic Performance: SSC %, HSC %, and Degree % are often strong predictors.
	- Employability Test: Performance in standardized tests (etest_p) is usually critical.
	- Work Experience: Can provide a significant advantage.
	- MBA Performance: MBA % reinforces the importance of consistent academic achievement.
	""")

	with gr.TabItem("📈 Dataset Overview"):
	gr.Markdown("## Dataset Overview")
	gr.Markdown("A quick look at the data used to train the model.")
	with gr.Row():
	with gr.Column(scale=2): # Give more space to dataframe
	gr.Markdown("Data Sample")
	if df_original is not None:
	gr.DataFrame(df_head, label="First 10 Rows", row_count=(10, "fixed"), wrap=True, interactive=False)
	else:
	gr.Warning(f"Original dataset '{DATA_FILE}' not found.")
	gr.Markdown("Basic Stats")
	gr.Markdown(dataset_stats)
	with gr.Column(scale=1):
	gr.Markdown("Placement Distribution")
	if plots_exist["pie_chart"]:
	gr.Image(PLACEMENT_PIE_CHART, label="Placement Distribution", show_label=False)
	else:
	gr.Warning(f"Placement distribution plot not found at '{PLACEMENT_PIE_CHART}'.")
	gr.Markdown("Correlation Analysis")
	if plots_exist["heatmap"]:
	gr.Image(CORRELATION_HEATMAP, label="Correlation Heatmap", show_label=False)
	else:
	gr.Warning(f"Correlation heatmap not found at '{CORRELATION_HEATMAP}'.")

	# --- Link Button Click to Function ---
	predict_button.click(
	fn=predict_placement,
	inputs=ordered_input_components, # Use the ordered list
	outputs=[out_profile, out_prediction, out_plot]
	)

	# --- Add Examples ---
	# Ensure example values match the order and type of ordered_input_components
	if feature_names: # Only add examples if we know the correct feature order
	example_list = [
	# M, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p, degree_t, workex, etest_p, specialisation, mba_p -> default order if no feature_names
	['M', 67.0, 'Others', 91.0, 'Others', 'Commerce', 58.0, 'Sci&Tech', 'No', 55.0, 'Mkt&HR', 58.8], # Row 1 (Placed)
	['M', 56.0, 'Central', 52.0, 'Central', 'Science', 52.0, 'Sci&Tech', 'No', 66.0, 'Mkt&HR', 59.43], # Row 4 (Not Placed)
	['F', 77.0, 'Central', 87.0, 'Central', 'Commerce', 59.0, 'Comm&Mgmt', 'No', 68.0, 'Mkt&Fin', 68.63], # Row 14 (Placed)
	['F', 52.0, 'Central', 64.0, 'Central', 'Commerce', 61.0, 'Comm&Mgmt', 'No', 55.0, 'Mkt&Fin', 62.93], # Row 187 (Not Placed)
	['M', 84.0, 'Others', 90.9, 'Others', 'Science', 64.5, 'Sci&Tech', 'No', 86.04, 'Mkt&Fin', 59.42] # Row 79 (Placed)
	]
	# Remap examples based on actual feature_names order if necessary (though the default order matches here)
	# This step is complex if the order differs significantly. Assuming the order defined in UI matches feature_names for simplicity now.
	final_examples = example_list

	gr.Examples(
	examples=final_examples,
	inputs=ordered_input_components,
	outputs=[out_profile, out_prediction, out_plot],
	fn=predict_placement,
	cache_examples=False # Caching might be ok if function is pure
	)

	# --- Launch the App ---
	# This is the standard way to launch in HF Spaces (app variable must be defined)
	# app_ui.launch() # No debug=True for production on Spaces

	# If running locally for testing before pushing to HF:
	if __name__ == "__main__":
	print("Launching Gradio app locally...")
	app_ui.launch(debug=True) # Use debug=True for local testing
	# app_ui.launch() # Use this for standard local deployment without debug prints