Venkata Nagasai Kesani
Improve prediction logic and fix false positives
b5ee9ec
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
# Load model & tokenizer
MODEL_REPO = "fusingAIandSec/malicious-url-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
labels = ["benign", "defacement", "phishing", "malware"]
# URL normalization helper
def normalize_url(url):
url = url.strip()
if not re.match(r"^https?://", url):
url = "https://" + url
return url.lower()
# Prediction function
def predict_url(url):
url = normalize_url(url)
inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
# Convert to readable dictionary
confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))}
pred_idx = torch.argmax(outputs.logits, dim=-1).item()
pred_label = labels[pred_idx]
max_prob = max(probs)
# Apply smart threshold to reduce false phishing/defacement
if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85:
pred_label = "benign"
return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}"
# Gradio interface
demo = gr.Interface(
fn=predict_url,
inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"),
outputs=["text", "text"],
title="πŸ” Malicious URL Detector",
description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!",
)
demo.launch()