# -*- coding: utf-8 -*- # ================================================== # app.py - Gradio App for Hugging Face Spaces # Campus Placement Prediction # ================================================== import gradio as gr import pandas as pd import joblib import numpy as np import matplotlib.pyplot as plt # Only needed for figure type hint potentially import seaborn as sns # Not directly used if images are pre-generated import os import warnings warnings.filterwarnings('ignore') # --- Configuration: Relative Paths for HF Spaces --- # Ensure these files are uploaded to your HF Space repository MODEL_FILENAME = 'placement_model_pipeline.joblib' LABEL_ENCODER_FILENAME = 'placement_label_encoder.joblib' FEATURES_FILENAME = 'placement_model_features.joblib' DATA_FILE = 'Campus_Selection.csv' # Original data file PLOT_DIR = 'plots' # Subdirectory for plots FEATURE_IMPORTANCE_PLOT = os.path.join(PLOT_DIR, 'feature_importance.png') PLACEMENT_PIE_CHART = os.path.join(PLOT_DIR, 'placement_distribution.png') CORRELATION_HEATMAP = os.path.join(PLOT_DIR, 'correlation_heatmap.png') # --- Global Variables to Hold Loaded Objects --- pipeline = None label_encoder = None feature_names = None df_original = None df_head = pd.DataFrame() # Default empty dataframe dataset_stats = "Dataset information not available." # --- Load Model and Preprocessing Objects --- print("Attempting to load model artifacts...") try: if os.path.exists(MODEL_FILENAME): pipeline = joblib.load(MODEL_FILENAME) print(f"- Loaded: {MODEL_FILENAME}") else: print(f"Error: Model file not found at {MODEL_FILENAME}") # gr.Error(f"Model file '{MODEL_FILENAME}' not found. Cannot make predictions.") # Use if you want error banner on load if os.path.exists(LABEL_ENCODER_FILENAME): label_encoder = joblib.load(LABEL_ENCODER_FILENAME) print(f"- Loaded: {LABEL_ENCODER_FILENAME}") else: print(f"Error: Label encoder file not found at {LABEL_ENCODER_FILENAME}") # gr.Error(f"Label encoder file '{LABEL_ENCODER_FILENAME}' not found.") if os.path.exists(FEATURES_FILENAME): feature_names = joblib.load(FEATURES_FILENAME) print(f"- Loaded: {FEATURES_FILENAME}") else: print(f"Error: Feature names file not found at {FEATURES_FILENAME}") # gr.Error(f"Feature names file '{FEATURES_FILENAME}' not found.") if pipeline and label_encoder and feature_names: print("All essential model artifacts loaded successfully.") else: print("Warning: One or more essential model artifacts failed to load. Prediction functionality may be limited.") except Exception as e: print(f"Error loading model artifacts: {e}") # Optionally raise a Gradio error to be visible in the UI on load # gr.Error(f"Failed to load model artifacts: {e}") # --- Load Original Data for Overview Tab --- print("Attempting to load original dataset...") try: if os.path.exists(DATA_FILE): df_original = pd.read_csv(DATA_FILE) df_head = df_original.head(10) dataset_stats = f"**Number of Records:** {len(df_original)}\n\n**Columns:** {len(df_original.columns)}" print(f"- Loaded: {DATA_FILE}") else: print(f"Warning: Original data file '{DATA_FILE}' not found for overview tab.") dataset_stats = f"Original dataset file '{DATA_FILE}' not found." except Exception as e: print(f"Error loading original dataset: {e}") dataset_stats = f"Error loading original dataset: {e}" # --- Check if Plot Files Exist (for warnings in UI) --- plots_exist = { "feature_importance": os.path.exists(FEATURE_IMPORTANCE_PLOT), "pie_chart": os.path.exists(PLACEMENT_PIE_CHART), "heatmap": os.path.exists(CORRELATION_HEATMAP) } print(f"Plot file existence check: {plots_exist}") # --- Define Prediction Function --- def predict_placement(*args): """ Predicts placement status based on input features. Returns: - Profile Summary (Markdown) - Prediction Result (Markdown) - Probability Plot (Matplotlib Figure or None) """ # Check if essential objects are loaded if pipeline is None or label_encoder is None or feature_names is None: message = "⚠️ **Error:** Model artifacts not loaded correctly. Cannot perform prediction." print(message) return (message, "", None) # Return error message and no plot # Create a DataFrame from the inputs with correct column names try: input_data = pd.DataFrame([args], columns=feature_names) except ValueError as e: message = f"⚠️ **Error:** Input data mismatch with expected features. Details: {e}" print(message) return (message, "", None) # Prepare Profile Summary String profile_md = "### 🧑🎓 Student Profile Summary\n" + "-"*25 + "\n" for i, feature in enumerate(feature_names): label = feature.replace('_p', ' %').replace('_b', ' Board').replace('_s', ' Stream').replace('_t', ' Type').replace('workex', 'Work Experience').replace('etest', 'Employability Test').replace('ssc', 'SSC').replace('hsc', 'HSC').replace('mba', 'MBA').replace('degree','Degree').replace('specialisation','Specialisation').replace('gender','Gender').replace('_',' ').title() profile_md += f"**{label}:** {args[i]}\n" # Convert numerical inputs (sliders/numbers) to numeric types numerical_cols_in_features = [ 'ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p' ] try: for col in numerical_cols_in_features: if col in input_data.columns: input_data[col] = pd.to_numeric(input_data[col]) except ValueError as e: error_msg = f"Error: Invalid numeric value provided. Details: {e}" print(error_msg) return (profile_md, f"⚠️ **Prediction Error:**\n{error_msg}", None) # Make prediction probability try: pred_proba = pipeline.predict_proba(input_data)[0] predicted_class_index = np.argmax(pred_proba) predicted_status = label_encoder.inverse_transform([predicted_class_index])[0] confidence = pred_proba[predicted_class_index] # Format prediction result if predicted_status == 'Placed': result_md = f"## ✅ Prediction: PLACED\n**Confidence:** {confidence:.2%}" else: result_md = f"## ❌ Prediction: NOT PLACED\n**Confidence:** {confidence:.2%}" # Create probability bar chart fig, ax = plt.subplots(figsize=(5, 3)) # Smaller plot for UI statuses = label_encoder.classes_ probabilities = pred_proba colors = ['#ff9999', '#66b3ff'] # Ensure colors match labels if needed # Ensure correct color mapping if classes aren't always ['Not Placed', 'Placed'] status_color_map = {label_encoder.classes_[0]: colors[0], label_encoder.classes_[1]: colors[1]} bar_colors = [status_color_map[status] for status in statuses] bars = ax.bar(statuses, probabilities, color=bar_colors) ax.set_ylim(0, 1) ax.set_ylabel('Probability') ax.set_title('Placement Probability') for bar in bars: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2%}', ha='center', va='bottom', fontsize=9) plt.tight_layout() # IMPORTANT: Close the plot to prevent it from displaying in logs or consuming memory # We return the figure object for Gradio to render # plt.close(fig) # DO NOT CLOSE HERE - Gradio needs the figure object return profile_md, result_md, fig # Return figure object except Exception as e: error_msg = f"An error occurred during prediction: {e}" print(f"Error during prediction: {e}") print(f"Input data:\n{input_data.to_string()}") print(f"Input data types:\n{input_data.dtypes}") # Ensure plot is closed if an error occurs before returning try: plt.close(fig) except NameError: pass # fig might not be defined if error happened early return (profile_md, f"⚠️ **Prediction Error:**\n{error_msg}", None) # --- Build Gradio Interface using Blocks --- app_title = "🎓 Campus Placement Predictor" app_description = """ Predict student placement based on academic performance, background, work experience, and MBA specialization. Input the details below and click 'Predict'. Explore other tabs for insights. """ css = """ .gradio-container { font-family: 'IBM Plex Sans', sans-serif; max-width: 1200px; margin: auto; } .gr-button { color: white; border-color: #007bff; background: #007bff; } footer { visibility: hidden; } .gr-label { font-weight: bold; } h1 { text-align: center; } """ # Define default values (can be adjusted) default_ssc_p = 70.0 default_hsc_p = 70.0 default_degree_p = 70.0 default_etest_p = 70.0 default_mba_p = 65.0 # Start Gradio Blocks UI Definition app_ui = gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky), title=app_title, css=css) with app_ui: gr.Markdown(f"