#!/usr/bin/env python3
#
# @File: filter-candidates.py
# @Date: 2026-06-01
#
# Step 2 of 3: Filter downloaded car images to keep only those where
# outdoor background is visibly through the windows.
#
# WHY this matters for training:
#   If a photo shows no background through the glass (tinted windows, dark
#   interior, angle where glass appears solid), BiRefNet and ImgIX produce
#   nearly identical masks. The model has nothing to learn from those images.
#   Only images where ImgIX actually punches through the glass are useful.
#
# HOW it detects visible-through-glass without any ML model:
#   Car lot photos typically have the car centred. The window region sits in
#   roughly the upper-middle portion of the image. We sample that zone and
#   measure two things:
#     1. Brightness — outdoor sky/background visible → brighter
#     2. Variance   — background content is more varied than flat dark interior
#   Images scoring above threshold on BOTH are kept.
#   A contact-sheet review grid is also saved so you can visually confirm.
#
# Usage:
#   python3 tools/internal/filter-candidates.py --source candidates/source
#   python3 tools/internal/filter-candidates.py --source candidates/source --threshold 0.4

import argparse
import shutil
from pathlib import Path

import numpy as np
from PIL import Image, ImageDraw, ImageFont


# ──────────────────────────────────────────────────────
# Scoring
# ──────────────────────────────────────────────────────
def score_image(img_path: Path) -> dict:
    """
    Score an image for likelihood that outdoor background is visible through windows.

    Returns a dict with:
      score        — 0..1 combined score (higher = more likely to have visible windows)
      brightness   — mean brightness in window zone (0..255)
      variance     — colour std-dev in window zone
      body_bright  — mean brightness in lower body zone (reference)
      bright_ratio — window_zone / body_zone brightness ratio
      natural_bg   — True if the image has a natural outdoor background (not studio/white)
    """
    try:
        img = Image.open(img_path).convert("RGB")
    except Exception:
        return {"score": 0.0, "error": True}

    w, h   = img.size
    arr    = np.array(img, dtype=np.float32)

    # ── Background type detection ──────────────────────────────────────────
    # Check 30x30 corners — studio white photos have near-zero std and near-255 brightness
    corners = [arr[0:30, 0:30], arr[0:30, w-30:], arr[h-30:, 0:30], arr[h-30:, w-30:]]
    corner_stds  = [c.std() for c in corners]
    avg_corner_bright = sum(c.mean() for c in corners) / 4
    studio_white = any(s < 5 for s in corner_stds) and avg_corner_bright > 230
    # Flat branded/gradient background: very low variance across the top strip
    top_strip_std = float(arr[0:10, :].std())
    studio_flat   = (not studio_white) and top_strip_std < 10
    natural_bg    = not studio_white and not studio_flat

    # ── Exterior full-car shot detection ──────────────────────────────────
    # Interior shots: camera is inside the car — dark lower half (dashboard),
    #   or the entire image is very dark.
    # Close-up shots: grille, badge, steering wheel — no ground visible at bottom.
    #
    # Ground/asphalt in the bottom 18% of the image is the strongest exterior signal:
    #   moderate brightness (35–185), low colour saturation (channels close together).
    ground_zone  = arr[int(h * 0.82):, int(w * 0.08):int(w * 0.92)]
    gnd_bright   = float(ground_zone.mean())
    gnd_sat      = float((ground_zone.max(axis=2) - ground_zone.min(axis=2)).mean())
    has_ground   = 35 < gnd_bright < 185 and gnd_sat < 45

    # Overall image brightness — very dark = interior or night shot
    overall_bright = float(arr.mean())
    is_too_dark    = overall_bright < 55

    # Bottom strip much darker than top strip → interior (dashboard below, sky above)
    top20_bright = float(arr[0:int(h * 0.20), :].mean())
    bot20_bright = float(arr[int(h * 0.80):, :].mean())
    interior_dark_bottom = (bot20_bright < 55 and top20_bright > bot20_bright * 2.5)

    # Reject very low-saturation images: monochrome renders, black-and-white
    # press shots, or near-greyscale photos (e.g. the CIVIC overlay image, dark renders)
    overall_sat   = float((arr.max(axis=2) - arr.min(axis=2)).mean())
    is_colorful   = overall_sat > 12

    # Tighten ground brightness minimum — images with very dark bottoms
    # (underground/tunnel/artificial stage) sneak in at the 35 boundary
    has_ground = 45 < gnd_bright < 185 and gnd_sat < 45

    is_exterior = has_ground and not is_too_dark and not interior_dark_bottom and is_colorful

    # ── Single-car shot detection ──────────────────────────────────────────
    # Lot/aerial shots have many small cars spread across the image → high
    # colour variance in both horizontal halves AND vertical halves.
    # Single-car shots have one dominant foreground object → colour variance
    # is concentrated, not evenly spread.
    #
    # Heuristic: divide the image into a 3×3 grid; count how many cells have
    # a distinct bright/saturated region. Lot shots light up many cells;
    # single-car shots light up 1-2 central cells.
    cell_h, cell_w = h // 3, w // 3
    bright_cells = 0
    for gy in range(3):
        for gx in range(3):
            cell = arr[gy*cell_h:(gy+1)*cell_h, gx*cell_w:(gx+1)*cell_w]
            # A cell "lights up" if it has moderate brightness AND colour variance
            if cell.mean() > 60 and cell.std() > 25:
                bright_cells += 1
    # Lot shots: all 9 cells light up.  Single car: typically 3-6 cells.
    is_single_car = bright_cells <= 7

    is_exterior = is_exterior and is_single_car

    # ── Window visibility score ────────────────────────────────────────────
    # Window zone: upper-middle of image where windshield/windows typically are
    # y: 20-55% of height   x: 15-85% of width
    y0w, y1w = int(h * 0.20), int(h * 0.55)
    x0w, x1w = int(w * 0.15), int(w * 0.85)
    window_zone = arr[y0w:y1w, x0w:x1w]

    # Lower body zone: door / lower body area — definitely painted metal, not glass
    # y: 60-80% of height
    y0b, y1b = int(h * 0.60), int(h * 0.80)
    body_zone = arr[y0b:y1b, x0w:x1w]

    win_bright  = float(window_zone.mean())
    body_bright = float(body_zone.mean()) if body_zone.size > 0 else 128.0
    win_var     = float(window_zone.std())

    # Brightness ratio: how much brighter is the window zone vs the body?
    # Windows showing sky → higher ratio.  Dark interior → ratio ≈ 1 or < 1.
    bright_ratio = win_bright / max(body_bright, 1.0)

    # Normalise each signal to 0..1
    norm_brightness = min(win_bright / 200.0, 1.0)          # 200 = "bright enough" threshold
    norm_variance   = min(win_var / 50.0, 1.0)              # 50 = "varied enough"
    norm_ratio      = min(max(bright_ratio - 0.8, 0) / 0.6, 1.0)  # bonus if window > body

    # Combined score — weighted average (ratio most important for tinted glass)
    score = 0.35 * norm_brightness + 0.30 * norm_variance + 0.35 * norm_ratio

    return {
        "score":       round(score, 4),
        "brightness":  round(win_bright, 1),
        "variance":    round(win_var, 1),
        "body_bright": round(body_bright, 1),
        "bright_ratio":round(bright_ratio, 3),
        "natural_bg":  natural_bg,
        "is_exterior": is_exterior,
        "error":       False,
    }


# ──────────────────────────────────────────────────────
# Contact sheet
# ──────────────────────────────────────────────────────
THUMB_W  = 300
THUMB_H  = 200
COLS     = 5
BAR_H    = 32


def make_contact_sheet(results: list, out_path: Path, threshold: float) -> None:
    """
    Save a visual grid of all candidates sorted by score (descending).
    Green label = passes threshold, red = filtered out.
    """
    rows  = (len(results) + COLS - 1) // COLS
    sheet = Image.new("RGB", (COLS * THUMB_W, rows * (THUMB_H + BAR_H)), (30, 30, 30))

    for i, (img_path, info) in enumerate(results):
        col = i % COLS
        row = i // COLS
        x   = col * THUMB_W
        y   = row * (THUMB_H + BAR_H)

        try:
            thumb = Image.open(img_path).convert("RGB").resize(
                (THUMB_W, THUMB_H), Image.LANCZOS
            )
        except Exception:
            thumb = Image.new("RGB", (THUMB_W, THUMB_H), (80, 80, 80))

        sheet.paste(thumb, (x, y))

        # Score bar: green=keep, orange=studio excluded, red=score too low
        passes    = info["score"] >= threshold and info.get("natural_bg", True)
        is_studio = not info.get("natural_bg", True)
        bar_col   = (40, 160, 40) if passes else ((180, 120, 0) if is_studio else (180, 40, 40))
        bar       = Image.new("RGB", (THUMB_W, BAR_H), bar_col)
        draw      = ImageDraw.Draw(bar)
        bg_label  = "" if info.get("natural_bg", True) else " [studio]"
        label     = (
            f"{img_path.stem}  score={info['score']:.2f}"
            f"  br={info['brightness']:.0f}  var={info['variance']:.0f}{bg_label}"
        )
        draw.text((4, 8), label, fill=(255, 255, 255))
        sheet.paste(bar, (x, y + THUMB_H))

    sheet.save(out_path, quality=88)
    print(f"  Review grid → {out_path}  ({len(results)} images)")


# ──────────────────────────────────────────────────────
# Main
# ──────────────────────────────────────────────────────
def main():
    parser = argparse.ArgumentParser(
        description="Filter downloaded car images for visible-through-glass background"
    )
    parser.add_argument("--source",    required=True,      help="Folder of downloaded JPEGs (from download-candidates.py)")
    parser.add_argument("--out",       default="candidates/filtered", help="Folder for qualifying images")
    parser.add_argument("--threshold", type=float, default=0.40,      help="Min score to keep (0..1, default 0.40)")
    parser.add_argument("--limit",     type=int,   default=0,         help="Max qualifying images to keep (0=unlimited)")
    args = parser.parse_args()

    source_dir = Path(args.source)
    out_dir    = Path(args.out)
    out_dir.mkdir(parents=True, exist_ok=True)

    images = sorted(source_dir.glob("*.jpg"))
    if not images:
        print(f"No JPEGs found in {source_dir}")
        return

    print(f"Scoring {len(images)} images from {source_dir}/ ...")
    print(f"Threshold : {args.threshold}  (tune with --threshold if too strict/loose)")
    print(f"Background: natural outdoor only (studio-white and flat backgrounds excluded)")
    print(f"Shot type : exterior full-car only (interior/close-up shots excluded)\n")
    print(f"{'File':<12} {'Score':>6}  {'WinBright':>9}  {'Variance':>8}  {'Ratio':>6}  {'BG':>7}  {'Shot':>8}  Result")
    print("-" * 85)

    scored = []
    for img_path in images:
        info = score_image(img_path)
        if info.get("error"):
            continue
        passes  = info["score"] >= args.threshold and info["natural_bg"] and info["is_exterior"]
        bg_tag  = "natural" if info["natural_bg"] else "studio"
        ext_tag = "ext" if info["is_exterior"] else "int/close"
        marker  = "KEEP" if passes else "skip"
        print(
            f"{img_path.stem:<12} {info['score']:>6.3f}  "
            f"{info['brightness']:>9.1f}  {info['variance']:>8.1f}  "
            f"{info['bright_ratio']:>6.3f}  {bg_tag:>7}  {ext_tag:>8}  {marker}"
        )
        scored.append((img_path, info))

    # Sort by score descending
    scored.sort(key=lambda x: x[1]["score"], reverse=True)

    # Copy qualifying images — score threshold + natural outdoor bg + exterior full-car shot
    qualifying = [
        (p, i) for p, i in scored
        if i["score"] >= args.threshold and i["natural_bg"] and i["is_exterior"]
    ]

    # Save review grid — only the kept images (sorted by score descending)
    review_path = Path(args.out).parent / "review_grid.jpg"
    make_contact_sheet(qualifying, review_path, args.threshold)
    if args.limit > 0:
        qualifying = qualifying[:args.limit]

    kept = 0
    for img_path, info in qualifying:
        dest = out_dir / img_path.name
        shutil.copy2(img_path, dest)
        kept += 1

    studio_count   = sum(1 for _, i in scored if not i.get("natural_bg", True))
    interior_count = sum(1 for _, i in scored if i.get("natural_bg", True) and not i.get("is_exterior", True))
    low_score      = sum(1 for _, i in scored if i.get("natural_bg", True) and i.get("is_exterior", True) and i["score"] < args.threshold)
    print(f"\n{'='*55}")
    print(f"Total scored      : {len(scored)}")
    print(f"Kept (exterior, natural, ≥{args.threshold:.2f}): {kept}  → {out_dir}/")
    print(f"Excluded studio   : {studio_count}")
    print(f"Excluded interior/close-up: {interior_count}")
    print(f"Score too low     : {low_score}")
    print(f"\nOpen the review grid to visually confirm quality:")
    print(f"  open {review_path}")
    print(f"\nIf threshold feels too strict or too loose, re-run with a different value:")
    print(f"  python3 tools/internal/filter-candidates.py --source {source_dir} --threshold 0.35")
    print(f"\nWhen happy with the filtered set, run ImgIX to get ground-truth masks:")
    print(f"  python3 tools/internal/fetch-imgix-masks.py --source {out_dir}")
    print("=" * 55)


if __name__ == "__main__":
    main()
