import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re

# Load model & tokenizer
MODEL_REPO = "fusingAIandSec/malicious-url-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)

labels = ["benign", "defacement", "phishing", "malware"]

# URL normalization helper
def normalize_url(url):
    url = url.strip()
    if not re.match(r"^https?://", url):
        url = "https://" + url
    return url.lower()

# Prediction function
def predict_url(url):
    url = normalize_url(url)
    inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()

    # Convert to readable dictionary
    confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))}
    pred_idx = torch.argmax(outputs.logits, dim=-1).item()
    pred_label = labels[pred_idx]
    max_prob = max(probs)

    # Apply smart threshold to reduce false phishing/defacement
    if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85:
        pred_label = "benign"

    return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}"

# Gradio interface
demo = gr.Interface(
    fn=predict_url,
    inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"),
    outputs=["text", "text"],
    title="🔍 Malicious URL Detector",
    description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!",
)

demo.launch()