#!/usr/bin/env python3
#
# @File: download-candidates.py
# @Date: 2026-06-01
#
# Step 1 of 3: Download raw candidate car images from the media server.
# No ImgIX calls. No processing. Just SCP.
#
# After this, run filter-candidates.py to check which images have
# background visible through windows before spending any ImgIX credits.
#
# Usage:
#   python3 tools/internal/download-candidates.py --count 300
#   python3 tools/internal/download-candidates.py --count 300 --out candidates/source

import sys
import argparse
import subprocess
import requests
from pathlib import Path

SSH_SERVER  = "root@165.227.32.132"
MEDIA_PATH  = "/var/www/media.liftkit.click/public_html"
MEDIA_URL   = "https://media.liftkit.click"   # public HTTP — no SCP needed
SSH_OPTS    = ["-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=10"]
HEADERS     = {"User-Agent": "Mozilla/5.0"}


def find_remote_images(count: int) -> list:
    """Find large JPEGs on the media server, shuffled for diversity."""
    # maxdepth 3 = site/vehicle_id/filename — avoids full-tree traversal
    find_cmd = (
        f'find {MEDIA_PATH} -maxdepth 3 -name "*-large.jpg" | shuf | head -{count}'
    )
    r = subprocess.run(
        ["ssh"] + SSH_OPTS + [SSH_SERVER, find_cmd],
        capture_output=True, text=True, timeout=180
    )
    if r.returncode != 0 or not r.stdout.strip():
        print(f"SSH error: {r.stderr.strip()}")
        sys.exit(1)
    paths = [p.strip() for p in r.stdout.strip().splitlines() if p.strip()]
    print(f"Found {len(paths)} vehicles on server\n")
    return paths


def download(remote_path: str, dest: Path) -> bool:
    """Download via public HTTPS — faster and more reliable than SCP."""
    rel = remote_path.replace(MEDIA_PATH.rstrip("/") + "/", "")
    url = f"{MEDIA_URL}/{rel}"
    try:
        resp = requests.get(url, timeout=30, headers=HEADERS, stream=True)
        if resp.status_code != 200:
            return False
        dest.write_bytes(resp.content)
        return dest.stat().st_size > 5_000
    except Exception:
        return False


def main():
    parser = argparse.ArgumentParser(description="Download candidate car images (no ImgIX)")
    parser.add_argument("--count", type=int, default=300, help="Images to download (default: 300)")
    parser.add_argument("--out",   default="candidates/source", help="Output folder")
    args = parser.parse_args()

    out_dir = Path(args.out)
    out_dir.mkdir(parents=True, exist_ok=True)

    # Check existing to support resume
    existing = len(list(out_dir.glob("*.jpg")))
    if existing:
        print(f"Resuming — {existing} images already downloaded to {out_dir}/")

    remote_paths = find_remote_images(args.count + existing)

    # Load existing path map to support resume
    paths_file = out_dir / "paths.txt"
    path_map = {}
    if paths_file.exists():
        for line in paths_file.read_text().splitlines():
            if "\t" in line:
                stem, rel = line.split("\t", 1)
                path_map[stem] = rel.strip()

    ok = fail = skip = 0
    for remote_path in remote_paths:
        if ok >= args.count:
            break

        idx  = existing + ok + 1
        stem = f"{idx:04d}"
        dest = out_dir / f"{stem}.jpg"

        if dest.exists():
            ok += 1
            skip += 1
            continue

        name = Path(remote_path).name
        print(f"[{idx:4d}] {name}", end=" ... ", flush=True)

        if download(remote_path, dest):
            # Record server path → needed later by fetch-imgix-masks.py
            rel = remote_path.replace(MEDIA_PATH.rstrip("/") + "/", "")
            path_map[stem] = rel
            print(f"OK  ({dest.stat().st_size // 1024} KB)")
            ok += 1
        else:
            print("FAILED")
            fail += 1

    # Write/update path map
    with open(paths_file, "w") as f:
        for stem, rel in sorted(path_map.items()):
            f.write(f"{stem}\t{rel}\n")

    print(f"\n{'='*50}")
    print(f"Downloaded : {ok} images ({skip} already existed)")
    print(f"Failed     : {fail}")
    print(f"Location   : {out_dir}/")
    print(f"Path map   : {paths_file}")
    print(f"\nNext step:")
    print(f"  python3 tools/internal/filter-candidates.py --source {out_dir}")
    print("=" * 50)


if __name__ == "__main__":
    main()
