import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import re # Load model & tokenizer MODEL_REPO = "fusingAIandSec/malicious-url-detector" tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO) model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO) labels = ["benign", "defacement", "phishing", "malware"] # URL normalization helper def normalize_url(url): url = url.strip() if not re.match(r"^https?://", url): url = "https://" + url return url.lower() # Prediction function def predict_url(url): url = normalize_url(url) inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist() # Convert to readable dictionary confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))} pred_idx = torch.argmax(outputs.logits, dim=-1).item() pred_label = labels[pred_idx] max_prob = max(probs) # Apply smart threshold to reduce false phishing/defacement if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85: pred_label = "benign" return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}" # Gradio interface demo = gr.Interface( fn=predict_url, inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"), outputs=["text", "text"], title="🔍 Malicious URL Detector", description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!", ) demo.launch()