Spaces:
Sleeping
Sleeping
File size: 1,674 Bytes
4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec 4f89adf b5ee9ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
# Load model & tokenizer
MODEL_REPO = "fusingAIandSec/malicious-url-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
labels = ["benign", "defacement", "phishing", "malware"]
# URL normalization helper
def normalize_url(url):
url = url.strip()
if not re.match(r"^https?://", url):
url = "https://" + url
return url.lower()
# Prediction function
def predict_url(url):
url = normalize_url(url)
inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
# Convert to readable dictionary
confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))}
pred_idx = torch.argmax(outputs.logits, dim=-1).item()
pred_label = labels[pred_idx]
max_prob = max(probs)
# Apply smart threshold to reduce false phishing/defacement
if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85:
pred_label = "benign"
return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}"
# Gradio interface
demo = gr.Interface(
fn=predict_url,
inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"),
outputs=["text", "text"],
title="🔍 Malicious URL Detector",
description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!",
)
demo.launch()
|