Spaces:

fusingAIandSec
/

malicious-url-demo

Sleeping

File size: 1,674 Bytes

4f89adf
 
 
b5ee9ec
4f89adf
b5ee9ec
4f89adf
 
 
b5ee9ec
4f89adf
 
b5ee9ec
 
 
 
 
 
 
 
4f89adf
b5ee9ec
4f89adf
 
 
b5ee9ec
 
 
 
 
 
 
 
 
 
 
 
 
4f89adf
b5ee9ec
4f89adf
 
b5ee9ec
 
4f89adf
b5ee9ec
4f89adf
 
b5ee9ec

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re

# Load model & tokenizer
MODEL_REPO = "fusingAIandSec/malicious-url-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)

labels = ["benign", "defacement", "phishing", "malware"]

# URL normalization helper
def normalize_url(url):
    url = url.strip()
    if not re.match(r"^https?://", url):
        url = "https://" + url
    return url.lower()

# Prediction function
def predict_url(url):
    url = normalize_url(url)
    inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()

    # Convert to readable dictionary
    confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))}
    pred_idx = torch.argmax(outputs.logits, dim=-1).item()
    pred_label = labels[pred_idx]
    max_prob = max(probs)

    # Apply smart threshold to reduce false phishing/defacement
    if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85:
        pred_label = "benign"

    return f"🧠 Prediction: {pred_label}", f"Confidence: {confidence}"

# Gradio interface
demo = gr.Interface(
    fn=predict_url,
    inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"),
    outputs=["text", "text"],
    title="🔍 Malicious URL Detector",
    description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!",
)

demo.launch()