import io
from typing import List, Tuple, Dict, Any
from PIL import Image, ImageOps
import numpy as np
import torch
import gradio as gr

# Face detector
from facenet_pytorch import MTCNN

# HF image classifier
from transformers import AutoImageProcessor, AutoModelForImageClassification

# ========= Config =========
# Multi-model ensemble (Soft Voting). You can add 1~2 more deepfake binary classifiers here.
MODEL_IDS = [
    "prithivMLmods/Deep-Fake-Detector-v2-Model",
    # Example for additional model:
    # "HuggingFaceM4/dfdc_deit_base_patch16_224",
]
MAX_SIDE = 896           # Resize image, keep detail
FACE_MIN_SIZE = 112      # Faces smaller than this in pixels are skipped (avoid artifacts)
FACE_MARGIN = 0.20       # Margin added when cropping face (square crop)
DETECT_THRESH = 0.80     # Face detection confidence threshold
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED = 42                # For reproducibility
# =========================

torch.manual_seed(SEED)
np.random.seed(SEED)

# ---- Utilities ----
def resize_keep_ratio(img: Image.Image, max_side: int = MAX_SIDE) -> Image.Image:
    """Resize while preserving aspect ratio."""
    w, h = img.size
    m = max(w, h)
    if m <= max_side:
        return img
    scale = max_side / float(m)
    return img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)

def to_square_with_margin(box, W, H, margin=FACE_MARGIN):
    """Convert face bounding box to a square crop with margin."""
    x1, y1, x2, y2 = box
    w = x2 - x1
    h = y2 - y1
    cx = (x1 + x2) / 2.0
    cy = (y1 + y2) / 2.0
    side = max(w, h) * (1.0 + margin)
    x1s = int(max(0, cx - side/2))
    y1s = int(max(0, cy - side/2))
    x2s = int(min(W, cx + side/2))
    y2s = int(min(H, cy + side/2))

    # Make it square
    sq_w = x2s - x1s
    sq_h = y2s - y1s
    if sq_w != sq_h:
        diff = abs(sq_w - sq_h)
        if sq_w < sq_h:
            x1s = max(0, x1s - diff // 2)
            x2s = min(W, x2s + diff - diff // 2)
        else:
            y1s = max(0, y1s - diff // 2)
            y2s = min(H, y2s + diff - diff // 2)
    return (x1s, y1s, x2s, y2s)

def canonical_label(label: str) -> str:
    """Standardize label names into fake/real."""
    l = label.lower().strip()
    fake_keys = ["fake", "ai", "synthetic", "deepfake", "generated", "manipulated", "forged"]
    real_keys = ["real", "authentic", "genuine", "original", "live"]
    if any(k in l for k in fake_keys): return "fake"
    if any(k in l for k in real_keys): return "real"
    return label

def rank_probs(id2label: Dict[int, str], probs: np.ndarray) -> List[Tuple[str, float]]:
    """Sort probabilities by descending value."""
    pairs = [(id2label[i], float(probs[i])) for i in range(len(probs))]
    return sorted(pairs, key=lambda x: x[1], reverse=True)

def entropy(p: np.ndarray) -> float:
    """Probability entropy as uncertainty measure."""
    p = np.clip(p, 1e-8, 1.0)
    return float(-(p * np.log(p)).sum())

def jpeg_recompress(pil_img: Image.Image, quality: int = 85) -> Image.Image:
    """JPEG recompression to simulate compression noise (TTA)."""
    buf = io.BytesIO()
    pil_img.save(buf, format="JPEG", quality=quality, optimize=True)
    buf.seek(0)
    return Image.open(buf).convert("RGB")

def mild_center_crop_resize(pil_img: Image.Image, ratio: float = 0.92) -> Image.Image:
    """Slight center crop and resize (TTA)."""
    w, h = pil_img.size
    nw, nh = int(w * ratio), int(h * ratio)
    left = (w - nw) // 2
    top = (h - nh) // 2
    return pil_img.crop((left, top, left + nw, top + nh)).resize((w, h), Image.LANCZOS)

# ===== Image quality evaluation (to reduce false positives on webcam photos) =====
def _np_gray(pil_img: Image.Image) -> np.ndarray:
    return np.array(pil_img.convert("L"))

def _conv2d_same_reflect(img: np.ndarray, kernel: np.ndarray) -> np.ndarray:
    """Lightweight 2D convolution (reflect padding, no cv2/scipy)."""
    kh, kw = kernel.shape
    ph, pw = kh // 2, kw // 2
    img_pad = np.pad(img, ((ph, ph), (pw, pw)), mode="reflect")
    out = np.zeros_like(img, dtype=np.float32)
    for i in range(out.shape[0]):
        for j in range(out.shape[1]):
            region = img_pad[i:i+kh, j:j+kw]
            out[i, j] = float((region * kernel).sum())
    return out

def variance_of_laplacian(gray: np.ndarray) -> float:
    """Sharpness measure (blur detection)."""
    k = np.array([[0, 1, 0],[1,-4, 1],[0, 1, 0]], dtype=np.float32)
    lap = _conv2d_same_reflect(gray.astype(np.float32), k)
    return float(lap.var())

def jpeg_size_ratio(pil_img: Image.Image, quality: int = 85) -> float:
    """Compression ratio proxy."""
    buf_png = io.BytesIO(); pil_img.save(buf_png, format="PNG"); s_png = len(buf_png.getvalue())
    buf_jpg = io.BytesIO(); pil_img.save(buf_jpg, format="JPEG", quality=quality, optimize=True); s_jpg = len(buf_jpg.getvalue())
    if s_png == 0: return 1.0
    return float(s_jpg) / float(s_png)

def image_quality_metrics(pil_img: Image.Image) -> Dict[str, float]:
    g = _np_gray(pil_img)
    return {
        "sharp": variance_of_laplacian(g),
        "bright": float(g.mean()),
        "contrast": float(g.std()),
        "comp": jpeg_size_ratio(pil_img, 85)
    }

def quality_bucket(m: Dict[str,float]) -> str:
    """Classify image quality level."""
    poor = (m["sharp"] < 60) or (m["bright"] < 40) or (m["bright"] > 215) or (m["contrast"] < 25)
    good = (m["sharp"] >= 120) and (50 <= m["bright"] <= 200) and (m["contrast"] >= 35)
    if poor: return "poor"
    if good: return "good"
    return "ok"

def logit(p, eps=1e-6): p = min(max(p, eps), 1 - eps); return float(np.log(p/(1-p)))
def sigmoid(x): return float(1/(1+np.exp(-x)))

def calibrate_fake_prob(p: float, qlvl: str) -> float:
    """Quality-adaptive probability scaling."""
    if qlvl == "poor":  t, b = 1.6, -0.4
    elif qlvl == "good": t, b = 0.9, +0.1
    else:                t, b = 1.2, -0.1
    z = (logit(p) + b) / t
    return sigmoid(z)

# ---- Load models ----
mtcnn = MTCNN(keep_all=True, device=DEVICE)
_models = []
for mid in MODEL_IDS:
    try: processor = AutoImageProcessor.from_pretrained(mid, use_fast=True)
    except Exception: processor = AutoImageProcessor.from_pretrained(mid)
    clf = AutoModelForImageClassification.from_pretrained(mid).to(DEVICE).eval()
    _models.append((mid, processor, clf))

# ---- Core inference ----
@torch.no_grad()
def classify_images_ensemble(pils: List[Image.Image]) -> Dict[str, Any]:
    """Run classification on multiple faces and return per-image predictions."""
    per_image_results = []

    def tta_views(img): return [
        img,
        ImageOps.mirror(img),
        jpeg_recompress(img, 85),
        mild_center_crop_resize(img, 0.92),
    ]

    use_amp = (DEVICE == "cuda")
    for img in pils:
        views = tta_views(img)
        model_probs_accum = []

        for (mid, processor, clf) in _models:
            inputs = processor(images=views, return_tensors="pt")
            inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
            with torch.cuda.amp.autocast(enabled=use_amp):
                logits = clf(**inputs).logits
            probs = torch.softmax(logits, dim=-1)
            model_probs_accum.append(probs.mean(dim=0).unsqueeze(0))

        probs_ens = torch.cat(model_probs_accum, dim=0).mean(dim=0)
        probs_np = probs_ens.float().cpu().numpy()
        id2label = _models[0][2].config.id2label
        ranked = rank_probs(id2label, probs_np)

        fake_p = real_p = None
        for i in range(len(probs_np)):
            cat = canonical_label(id2label[i])
            if cat == "fake": fake_p = max(fake_p or 0, probs_np[i])
            elif cat == "real": real_p = max(real_p or 0, probs_np[i])

        if fake_p is None and real_p is None:
            top1 = ranked[0]
            if canonical_label(top1[0]) == "fake": fake_p, real_p = top1[1], 1-top1[1]
            else: real_p, fake_p = top1[1], 1-top1[1]

        per_image_results.append({
            "top": ranked[:3],
            "fake_prob": None if fake_p is None else round(fake_p, 4),
            "real_prob": None if real_p is None else round(real_p, 4),
            "entropy": round(entropy(probs_np), 4)
        })
    return {"per_image": per_image_results}

def analyze(img: Image.Image) -> Dict[str, Any]:
    """Detect deepfake in faces and full image, quality-aware fusion."""
    if img is None: return {"error": "No image uploaded."}

    img = resize_keep_ratio(img.convert("RGB"), MAX_SIDE)
    q_metrics = image_quality_metrics(img)
    q_level = quality_bucket(q_metrics)

    boxes, probs = mtcnn.detect(img)
    crops, crop_boxes, kept_scores = [], [], []

    # Face detection & filtering
    if boxes is not None and probs is not None:
        W, H = img.size
        for (b, sc) in zip(boxes, probs):
            if sc is None or sc < DETECT_THRESH: continue
            x1s, y1s, x2s, y2s = to_square_with_margin(b, W, H, FACE_MARGIN)
            if x2s<=x1s or y2s<=y1s: continue
            face = img.crop((x1s, y1s, x2s, y2s))
            if min(face.size) < FACE_MIN_SIZE: continue
            crops.append(face); crop_boxes.append((x1s,y1s,x2s,y2s)); kept_scores.append(float(sc))

    # Face inference
    per_face_results = []
    if crops:
        preds = classify_images_ensemble(crops)["per_image"]
        for i,(pred,box,sc) in enumerate(zip(preds, crop_boxes, kept_scores),1):
            fm = image_quality_metrics(crops[i-1])
            fl = quality_bucket(fm)
            fp = pred.get("fake_prob")
            if fp is not None:
                pred["fake_prob_raw"]=fp
                pred["fake_prob"]=round(calibrate_fake_prob(fp, fl),4)
            pred["quality"]={"level":fl,**fm}
            per_face_results.append({
                "face_index":i,"box":{"x1":box[0],"y1":box[1],"x2":box[2],"y2":box[3]},
                "det_score":round(sc,4),**pred
            })

    # Full-image inference (weak expert)
    full_pred = classify_images_ensemble([img])["per_image"][0]
    if full_pred.get("fake_prob") is not None:
        full_pred["fake_prob_raw"]=full_pred["fake_prob"]
        full_pred["fake_prob"]=round(calibrate_fake_prob(full_pred["fake_prob"], q_level),4)
    full_pred["quality"]={"level":q_level,**q_metrics}

    # Score fusion (faces > full image)
    face_scores=[r["fake_prob"] for r in per_face_results if r.get("fake_prob") is not None]
    if not face_scores and full_pred.get("fake_prob") is None:
        overall_fake=0.5
    else:
        faces_robust=float(np.median(face_scores)) if face_scores else None
        full_score=full_pred.get("fake_prob",None)

        if faces_robust and full_score:
            overall_fake=0.8*faces_robust+0.2*full_score
        elif faces_robust: overall_fake=faces_robust
        else: overall_fake=full_score

        if face_scores:
            q3=float(np.quantile(face_scores,0.75))
            overall_fake=float(0.7*overall_fake+0.3*q3)

    # Dynamic thresholding based on quality
    if q_level=="poor": th_fake,th_unc=0.85,0.65
    elif q_level=="good": th_fake,th_unc=0.70,0.55
    else: th_fake,th_unc=0.75,0.60

    if overall_fake>=th_fake: label="Likely Deepfake"
    elif overall_fake>=th_unc: label="Uncertain"
    else: label="Likely Real"

    return {
        "label":label,
        "overall_fake_probability":round(overall_fake,4),
        "faces_detected":len(per_face_results),
    }

def visualize_faces(img: Image.Image):
    """Return cropped faces for preview."""
    if img is None: return []
    img=resize_keep_ratio(img.convert("RGB"),MAX_SIDE)
    boxes,probs=mtcnn.detect(img)
    thumbs=[]
    if boxes is not None and probs is not None:
        W,H=img.size
        for(b,sc) in zip(boxes,probs):
            if sc is None or sc<DETECT_THRESH: continue
            x1s,y1s,x2s,y2s=to_square_with_margin(b,W,H,FACE_MARGIN)
            if x2s<=x1s or y2s<=y1s: continue
            face=img.crop((x1s,y1s,x2s,y2s))
            if min(face.size)<FACE_MIN_SIZE: continue
            thumbs.append(face.resize((160,160),Image.LANCZOS))
    return thumbs

# ---- Gradio UI ----
with gr.Blocks() as demo:
    gr.Markdown("""
    # 🕵️ FakeSpotter — Deepfake Image Detector  
    **Ensemble + TTA + Quality Awareness**
    - Multi-model ensemble  
    - Face-focused detection + fallback to whole image  
    - Image quality guard to reduce false positives (e.g., webcam noise)  
    > Educational, not forensic-grade.
    """)

    with gr.Row():
        inp=gr.Image(type="pil",label="Upload Image")
        with gr.Column():
            thumbs=gr.Gallery(label="Detected Face Crops (preview)",columns=6,height="auto")
            out=gr.JSON(label="Results")

    btn=gr.Button("Analyze")
    btn.click(lambda im:visualize_faces(im),inputs=inp,outputs=thumbs)
    btn.click(analyze,inputs=inp,outputs=out)

if __name__=="__main__":
    demo.launch()