Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import re | |
| # Load model & tokenizer | |
| MODEL_REPO = "fusingAIandSec/malicious-url-detector" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO) | |
| labels = ["benign", "defacement", "phishing", "malware"] | |
| # URL normalization helper | |
| def normalize_url(url): | |
| url = url.strip() | |
| if not re.match(r"^https?://", url): | |
| url = "https://" + url | |
| return url.lower() | |
| # Prediction function | |
| def predict_url(url): | |
| url = normalize_url(url) | |
| inputs = tokenizer(url, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist() | |
| # Convert to readable dictionary | |
| confidence = {labels[i]: round(float(probs[i]), 4) for i in range(len(labels))} | |
| pred_idx = torch.argmax(outputs.logits, dim=-1).item() | |
| pred_label = labels[pred_idx] | |
| max_prob = max(probs) | |
| # Apply smart threshold to reduce false phishing/defacement | |
| if pred_label in ["phishing", "defacement", "malware"] and max_prob < 0.85: | |
| pred_label = "benign" | |
| return f"π§ Prediction: {pred_label}", f"Confidence: {confidence}" | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=predict_url, | |
| inputs=gr.Textbox(label="Enter a URL", placeholder="https://example.com"), | |
| outputs=["text", "text"], | |
| title="π Malicious URL Detector", | |
| description="Classifies URLs as benign, defacement, phishing, or malware. Now with smart confidence logic!", | |
| ) | |
| demo.launch() | |