Spaces:

fusingAIandSec
/

malicious-url-demo

Sleeping

Venkata Nagasai Kesani

Improve prediction logic and fix false positives

b5ee9ec 26 days ago

1.67 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import re

	# Load model & tokenizer
	MODEL_REPO = "fusingAIandSec/malicious-url-detector"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)

	labels = ["benign", "defacement", "phishing", "malware"]

	# URL normalization helper
	def normalize_url(url):
	url = url.strip()
	if not re.match(r"^https?://", url):
	url = "https://" + url
	return url.lower()

	# Prediction function
	def predict_url(url):
	url = normalize_url(url)
	inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()

	# Convert to readable dictionary
	confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))}
	pred_idx = torch.argmax(outputs.logits, dim=-1).item()
	pred_label = labels[pred_idx]
	max_prob = max(probs)

	# Apply smart threshold to reduce false phishing/defacement
	if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85:
	pred_label = "benign"

	return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}"

	# Gradio interface
	demo = gr.Interface(
	fn=predict_url,
	inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"),
	outputs=["text", "text"],
	title="🔍 Malicious URL Detector",
	description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!",
	)

	demo.launch()