video_probe/video_yolo_player.py

#!/usr/bin/env python3
"""
Video YOLO Player - Play video with YOLO object detection overlay
Shows two windows: Original Video and YOLO Detection

Usage:
    python video_yolo_player.py <video_path> <yolo_model_path>

Controls:
    y/Y - Toggle live YOLO detection (blue boxes)
    p/P - Toggle pre-scanned YOLO data (green boxes)
    i/I - Show video probe information
    Space - Pause/Resume
    s/S - Toggle sound
    b/B - Toggle status bar
    h/H - Hide current window
    1/2/3 - Toggle windows
    ←/→ - Seek ±5s
    Shift+←/→ - Seek ±30s
    q/ESC - Quit
"""

import cv2
import numpy as np
import sys
import os
import re
import subprocess
import shutil
import json
import platform
from datetime import datetime
from typing import Tuple, Dict, Any, Optional
from ultralytics import YOLO

FFPLAY = shutil.which('ffplay') or '/opt/homebrew/bin/ffplay'

BUILD_VERSION = "2.0.0"
BUILD_TIME = "2026-03-06 12:00:00"


def get_window_rect(win_name: str) -> Tuple[int, int, int, int]:
    """Get window geometry as tuple (x, y, w, h)"""
    rect = cv2.getWindowImageRect(win_name)
    return (int(rect[0]), int(rect[1]), int(rect[2]), int(rect[3]))


def get_screen_resolution() -> Tuple[int, int]:
    """Detect screen resolution using platform-specific methods"""
    system = platform.system()

    if system == "Darwin":  # macOS
        try:
            result = subprocess.run(
                ['system_profiler', 'SPDisplaysDataType'],
                capture_output=True, text=True, timeout=5
            )
            output = result.stdout
            for line in output.split('\n'):
                if 'Resolution:' in line:
                    match = re.search(r'(\d+)\s*x\s*(\d+)', line)
                    if match:
                        return int(match.group(1)), int(match.group(2))
        except Exception:
            pass

    elif system == "Linux":
        try:
            result = subprocess.run(
                ['xrandr'], capture_output=True, text=True, timeout=5
            )
            output = result.stdout
            for line in output.split('\n'):
                if ' connected' in line and '*' in line:
                    match = re.search(r'(\d+)x(\d+)', line)
                    if match:
                        return int(match.group(1)), int(match.group(2))
        except Exception:
            pass

    elif system == "Windows":
        try:
            import ctypes
            user32 = ctypes.windll.user32
            return user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)
        except Exception:
            pass

    return 1920, 1080


YOLO_NAMES = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
    "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
    "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
    "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
    "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
    "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
]


def format_time(seconds: float) -> str:
    """Format seconds to HH:MM:SS"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    return f"{hours:02d}:{minutes:02d}:{secs:02d}"


def format_time_with_frame(seconds: float, frame_num: int, fps: float) -> Tuple[str, str]:
    """Format time with frame: HH:MM:SS.ff"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    frame_in_sec = int(frame_num % fps) if fps > 0 else 0
    return f"{hours:02d}:{minutes:02d}:{secs:02d}.{frame_in_sec:02d}", f"Frame: {frame_num}"


def load_probe_data(video_path: str) -> Optional[Dict]:
    """Load .probe.json file"""
    video_dir = os.path.dirname(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    probe_file = os.path.join(video_dir, f"{video_name}.probe.json")

    if not os.path.exists(probe_file):
        return None

    try:
        with open(probe_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading probe file: {e}")
        return None


def load_yolo_data(video_path: str) -> Optional[Dict]:
    """Load .yolo.json file"""
    video_dir = os.path.dirname(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    yolo_file = os.path.join(video_dir, f"{video_name}.yolo.json")

    if not os.path.exists(yolo_file):
        return None

    try:
        with open(yolo_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading YOLO file: {e}")
        return None


def get_detections_list(result) -> list:
    """Extract detection info as list of dicts"""
    detections = []

    if result.boxes is None:
        return detections

    boxes = result.boxes.xyxy.cpu().numpy()
    confidences = result.boxes.conf.cpu().numpy()
    class_ids = result.boxes.cls.cpu().numpy().astype(int)

    for box, conf, class_id in zip(boxes, confidences, class_ids):
        x1, y1, x2, y2 = box
        class_name = YOLO_NAMES[class_id] if class_id < len(YOLO_NAMES) else "unknown"

        detections.append({
            'class_id': int(class_id),
            'class_name': class_name,
            'confidence': float(conf),
            'x1': float(x1),
            'y1': float(y1),
            'x2': float(x2),
            'y2': float(y2)
        })

    return detections


def draw_detections(frame: np.ndarray, detections: list, color: Tuple[int, int, int], label_prefix: str = "") -> np.ndarray:
    """Draw detection boxes on frame"""
    annotated_frame = frame.copy()

    for det in detections:
        x1, y1, x2, y2 = int(det['x1']), int(det['y1']), int(det['x2']), int(det['y2'])
        class_name = det['class_name']
        conf = det['confidence']

        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), color, 2)

        label = f"{label_prefix}{class_name} {conf:.1%}"
        (label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
        cv2.rectangle(annotated_frame, (x1, y1 - label_h - 10), (x1 + label_w, y1), color, -1)

        text_color = (255, 255, 255) if color != (0, 255, 0) else (0, 0, 0)
        cv2.putText(annotated_frame, label, (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2)

    return annotated_frame


def draw_time_overlay(frame: np.ndarray, current_time: float, current_frame: int,
                      total_time: float, total_frames: int, fps: float,
                      object_count: int = 0, is_paused: bool = False,
                      sound_on: bool = False, live_yolo: bool = False,
                      pre_yolo: bool = False) -> np.ndarray:
    """Draw time code and frame overlay at bottom of video"""
    height, width = frame.shape[:2]

    time_str, frame_str = format_time_with_frame(current_time, current_frame, fps)
    total_time_str, total_frame_str = format_time_with_frame(total_time, total_frames, fps)

    mode_parts = []
    if live_yolo:
        mode_parts.append("LIVE-YOLO")
    if pre_yolo:
        mode_parts.append("PRE-YOLO")
    mode_str = f" [{'+'.join(mode_parts)}]" if mode_parts else ""

    sound_label = " [SOUND]" if sound_on else ""
    time_text = f"{time_str} / {total_time_str}  |  {frame_str}/{total_frames}  |  Objects: {object_count}{mode_str}{sound_label}"
    if is_paused:
        time_text = f"[PAUSED] {time_text}"

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.55
    thickness = 2
    padding = 10

    (text_w, text_h), baseline = cv2.getTextSize(time_text, font, font_scale, thickness)
    bar_height = text_h + baseline + padding * 3

    overlay = frame.copy()
    cv2.rectangle(overlay, (0, height - bar_height), (width, height), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
    cv2.line(frame, (0, height - bar_height), (width, height - bar_height), (100, 100, 100), 1)

    text_x = (width - text_w) // 2
    text_y = height - bar_height // 2 + text_h // 2

    text_color = (255, 100, 100) if is_paused else (255, 255, 255)

    cv2.putText(frame, time_text, (text_x + 1, text_y + 1), font, font_scale, (0, 0, 0), thickness + 1)
    cv2.putText(frame, time_text, (text_x, text_y), font, font_scale, text_color, thickness)

    return frame


def play_video(video_path: str, model_path: str, probe_data: Optional[Dict], yolo_data: Optional[Dict]):
    """Play video with YOLO overlay"""

    print(f"\nOpening video: {video_path}")
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Cannot open video: {video_path}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_duration = total_frames / fps if fps > 0 else 0

    print(f"Video info: {width}x{height} @ {fps:.2f} fps, {total_frames} frames")

    # Load YOLO model (lazy loading - only when needed)
    model = None

    # Extract YOLO detections by frame
    yolo_detections_by_frame = {}
    if yolo_data and 'frames' in yolo_data:
        for frame_num_str, frame_data in yolo_data['frames'].items():
            yolo_detections_by_frame[int(frame_num_str)] = frame_data.get('detections', [])
        print(f"Loaded {len(yolo_detections_by_frame)} frames from .yolo.json")

    # Screen resolution detection and window layout
    screen_w, screen_h = get_screen_resolution()
    print(f"Detected screen resolution: {screen_w}x{screen_h}")

    GAP = 10
    margin = 40

    available_width = screen_w - 3 * margin
    w_vid = min(width, available_width // 2)
    h_vid = int(w_vid * 9 / 16)
    h_vid = min(h_vid, screen_h - margin * 2 - 200)

    w_cmd = w_vid * 2 + GAP
    h_cmd = 280

    WIN_ORIGINAL = "1: Original Video"
    WIN_YOLO = "2: YOLO Detection"
    WIN_CMD = "3: Command"

    x_start = margin
    y_start = margin

    INIT_GEOM = {
        WIN_ORIGINAL: (x_start, y_start, w_vid, h_vid),
        WIN_YOLO: (x_start + w_vid + GAP, y_start, w_vid, h_vid),
        WIN_CMD: (x_start, y_start + h_vid + GAP + 30, w_cmd, h_cmd),
    }

    print(f"Window layout: Original={w_vid}x{h_vid}, YOLO={w_vid}x{h_vid}, Command={w_cmd}x{h_cmd}")

    def make_win(name):
        x, y, w, h = INIT_GEOM[name]
        cv2.namedWindow(name, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(name, w, h)
        cv2.moveWindow(name, x, y)

    make_win(WIN_ORIGINAL)
    make_win(WIN_YOLO)
    make_win(WIN_CMD)

    # Trackbar
    tb_code_val = {"v": 0}
    seek_request = {"frame": None}

    def on_progress(val):
        if val != tb_code_val["v"]:
            seek_request["frame"] = val

    for wn in (WIN_ORIGINAL, WIN_YOLO):
        cv2.createTrackbar("Progress", wn, 0, max(total_frames - 1, 1), on_progress)

    win_geom = dict(INIT_GEOM)
    win_visible = {WIN_ORIGINAL: True, WIN_YOLO: True, WIN_CMD: True}
    last_shown = WIN_ORIGINAL

    sound_process = None

    def start_audio(pos_secs):
        stop_audio()
        try:
            return subprocess.Popen(
                [FFPLAY, '-nodisp', '-autoexit',
                 '-ss', f'{max(0, pos_secs):.2f}', video_path],
                stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
            )
        except Exception as e:
            print(f"Audio error: {e}")
            return None

    def stop_audio():
        subprocess.run(['pkill', '-f', 'ffplay'],
                      stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    def do_seek(target_frame):
        nonlocal frame_count, current_frame, annotated_frame, object_count, sound_process

        target_frame = max(0, min(total_frames - 1, int(target_frame)))
        cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
        ret, f = cap.read()
        if not ret:
            return

        frame_count = target_frame + 1
        current_frame = f.copy()

        # Update detections
        object_count = 0
        annotated_frame = f.copy()

        if pre_yolo_mode and frame_count in yolo_detections_by_frame:
            dets = yolo_detections_by_frame[frame_count]
            object_count += len(dets)
            annotated_frame = draw_detections(annotated_frame, dets, (0, 255, 0), "[PRE] ")

        if live_yolo_mode and model is not None:
            r = model(f, verbose=False)[0]
            live_dets = get_detections_list(r)
            object_count += len(live_dets)
            annotated_frame = draw_detections(annotated_frame, live_dets, (255, 0, 0), "[LIVE] ")

        if sound_on:
            sound_process = start_audio(frame_count / fps)

        # Update trackbar
        tb_code_val["v"] = frame_count
        for wn in (WIN_ORIGINAL, WIN_YOLO):
            if win_visible.get(wn):
                cv2.setTrackbarPos("Progress", wn, frame_count)

        print(f"Seek → frame {frame_count}  ({frame_count/fps:.2f}s)")

    def seek_delta(delta_secs):
        do_seek(frame_count + int(delta_secs * fps))

    # Command-line state
    cmd_input = ""
    cmd_log = []

    def cmd_log_add(line):
        cmd_log.append(line)
        if len(cmd_log) > 12:
            cmd_log.pop(0)

    def execute_command(s):
        s = s.strip()
        if not s:
            return

        try:
            if s.lower() in ('i', 'info', 'probe'):
                # Show probe information
                if probe_data:
                    cmd_log_add(">> Video Probe Info:")
                    fmt = probe_data.get('format', {})
                    cmd_log_add(f"   Format: {fmt.get('format_long_name', 'N/A')}")
                    cmd_log_add(f"   Duration: {fmt.get('duration', 0):.2f}s")
                    cmd_log_add(f"   Size: {fmt.get('size', 0) / 1024 / 1024:.2f} MB")
                    vs = probe_data.get('video_stream', {})
                    if vs:
                        cmd_log_add(f"   Video: {vs.get('codec_name')} {vs.get('width')}x{vs.get('height')}")
                    cmd_log_add(f"   Audio: {len(probe_data.get('audio_streams', []))} streams")
                else:
                    cmd_log_add("!! No .probe.json found")
                return

            if s.startswith(('+', '-')):
                seek_delta(float(s))
                cmd_log_add(f">> seek {float(s):+.1f}s")
                return

            if ':' in s:
                parts = s.split(':')
                hh = int(parts[0])
                mm = int(parts[1])
                ss_parts = parts[2].split('.')
                ss = int(ss_parts[0])
                ff = int(ss_parts[1]) if len(ss_parts) > 1 else 0
                total_s = hh*3600 + mm*60 + ss + ff/fps
                do_seek(int(total_s * fps))
                cmd_log_add(f">> seek {s}")
                return

            do_seek(int(float(s)))
            cmd_log_add(f">> seek frame {int(float(s))}")

        except Exception as e:
            cmd_log_add(f"!! {e}")

    print("\nPlaying video...")
    print("Keys: q/ESC=quit  space=pause  s=sound  b=statusbar")
    print("      y=live YOLO  p=pre YOLO  i=probe info  h=hide  1/2/3=toggle windows")
    print("      ←/→=±5s  Shift+←/→=±30s")
    print("Command: <frame> | hh:mm:ss[.ff] | +/-secs | i (probe info)")

    frame_count = 0
    is_paused = False
    sound_on = False
    show_statusbar = True
    current_frame = None
    annotated_frame = None
    object_count = 0

    # YOLO modes
    live_yolo_mode = False
    pre_yolo_mode = False

    while True:
        if not is_paused:
            ret, frame = cap.read()

            if not ret:
                print("End of video, looping...")
                cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
                frame_count = 0
                continue

            current_frame = frame.copy()
            frame_count += 1

            # Process detections
            object_count = 0
            annotated_frame = frame.copy()

            if pre_yolo_mode and frame_count in yolo_detections_by_frame:
                dets = yolo_detections_by_frame[frame_count]
                object_count += len(dets)
                annotated_frame = draw_detections(annotated_frame, dets, (0, 255, 0), "[PRE] ")

            if live_yolo_mode:
                if model is None:
                    print("Loading YOLO model for live detection...")
                    model = YOLO(model_path)
                    print("✓ Model loaded")

                results = model(frame, verbose=False)
                result = results[0]
                live_dets = get_detections_list(result)
                object_count += len(live_dets)
                annotated_frame = draw_detections(annotated_frame, live_dets, (255, 0, 0), "[LIVE] ")

        current_time = frame_count / fps if fps > 0 else 0

        if frame_count % 30 == 0 and not is_paused:
            print(f"Frame: {frame_count}/{total_frames}, Objects: {object_count}")

        if current_frame is None:
            continue

        # Handle trackbar seek
        if seek_request["frame"] is not None:
            do_seek(seek_request["frame"])
            seek_request["frame"] = None

        # Sync trackbar
        if not seek_request["frame"]:
            tb_code_val["v"] = frame_count
            for wn in (WIN_ORIGINAL, WIN_YOLO):
                if win_visible.get(wn):
                    cv2.setTrackbarPos("Progress", wn, frame_count)

        # Render status bar
        overlay_args = (current_time, frame_count, total_duration, total_frames, fps,
                        object_count, is_paused, sound_on, live_yolo_mode, pre_yolo_mode)

        if win_visible[WIN_ORIGINAL]:
            if show_statusbar:
                frame_out = draw_time_overlay(current_frame, *overlay_args)
            else:
                frame_out = current_frame.copy()
            cv2.imshow(WIN_ORIGINAL, frame_out)
            last_shown = WIN_ORIGINAL

        if win_visible[WIN_YOLO] and annotated_frame is not None:
            if show_statusbar:
                ann_out = draw_time_overlay(annotated_frame, *overlay_args)
            else:
                ann_out = annotated_frame.copy()
            cv2.imshow(WIN_YOLO, ann_out)
            last_shown = WIN_YOLO

        # Command window
        if win_visible[WIN_CMD]:
            cmd_h, cmd_w = 320, w_cmd
            panel = np.zeros((cmd_h, cmd_w, 3), dtype=np.uint8)

            # Title bar with build info
            cv2.rectangle(panel, (0, 0), (cmd_w, 28), (40, 40, 80), -1)
            title = f"3: Command  |  v{BUILD_VERSION}  |  {BUILD_TIME}"
            cv2.putText(panel, title, (6, 18), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (180, 220, 255), 1)

            # Examples section
            examples = [
                "Examples:  123  |  00:01:30  |  +10  |  -5  |  i (probe info)"
            ]
            y = 50
            for ex in examples:
                cv2.putText(panel, ex, (8, y), cv2.FONT_HERSHEY_SIMPLEX, 0.42, (150, 150, 150), 1)
                y += 20

            # Separator
            cv2.line(panel, (0, y), (cmd_w, y), (60, 60, 60), 1)
            y += 15

            # Log lines
            for line in cmd_log[-8:]:
                color = (80, 200, 80) if line.startswith(">>") else \
                        (80, 80, 200) if line.startswith("!!") else (180, 180, 180)
                cv2.putText(panel, line, (8, y), cv2.FONT_HERSHEY_SIMPLEX, 0.50, color, 1)
                y += 22

            # Status line
            mode_str = f"Live:{'Y' if live_yolo_mode else 'N'} Pre:{'Y' if pre_yolo_mode else 'N'}"
            s_line = (f"  [{format_time(current_time)}  f:{frame_count}/{total_frames}]"
                      f"  {mode_str}"
                      f"  Pause:{'Y' if is_paused else 'N'}"
                      f"  Sound:{'Y' if sound_on else 'N'}")
            cv2.putText(panel, s_line, (6, cmd_h - 38),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.42, (120, 200, 255), 1)

            # Input prompt
            cv2.line(panel, (0, cmd_h - 28), (cmd_w, cmd_h - 28), (80, 80, 80), 1)
            prompt = f"> {cmd_input}_"
            cv2.putText(panel, prompt, (8, cmd_h - 8),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 200), 1)

            # Focus indicator
            if cmd_input:
                cv2.putText(panel, "[TYPING]", (cmd_w - 100, cmd_h - 8),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.40, (255, 200, 0), 1)

            cv2.imshow(WIN_CMD, panel)
            last_shown = WIN_CMD

        # Key handling
        key = cv2.waitKeyEx(30 if not is_paused else 100)
        key_char = key & 0xFF

        in_focus_mode = bool(cmd_input)

        if key_char == 13:  # Enter
            if cmd_input.strip():
                cmd_log_add(f"> {cmd_input}")
                execute_command(cmd_input)
            cmd_input = ""

        elif key_char in (8, 127):  # Backspace / Delete
            cmd_input = cmd_input[:-1]

        elif 32 <= key_char <= 126:
            if in_focus_mode or chr(key_char) in ('+', '-', '0','1','2','3','4','5','6','7','8','9', ':'):
                cmd_input += chr(key_char)
            elif key_char == ord('q') or key_char == ord('Q') or key_char == 27:
                print("Quitting...")
                break
            elif key_char == ord(' '):
                is_paused = not is_paused
                if sound_on:
                    if is_paused:
                        stop_audio()
                        sound_process = None
                    else:
                        sound_process = start_audio(frame_count / fps)
                print(f"{'Paused' if is_paused else 'Resumed'}")
            elif key_char == ord('b') or key_char == ord('B'):
                show_statusbar = not show_statusbar
                print(f"Status bar {'ON' if show_statusbar else 'OFF'}")
            elif key_char == ord('s') or key_char == ord('S'):
                sound_on = not sound_on
                if sound_on:
                    sound_process = start_audio(frame_count / fps)
                    print(f"Sound ON (at {frame_count/fps:.1f}s)")
                else:
                    stop_audio()
                    sound_process = None
                    print("Sound OFF")
            elif key_char == ord('y') or key_char == ord('Y'):
                live_yolo_mode = not live_yolo_mode
                print(f"Live YOLO {'ON' if live_yolo_mode else 'OFF'}")
            elif key_char == ord('p') or key_char == ord('P'):
                if yolo_data:
                    pre_yolo_mode = not pre_yolo_mode
                    print(f"Pre-scanned YOLO {'ON' if pre_yolo_mode else 'OFF'}")
                else:
                    print("No .yolo.json file found")
                    cmd_log_add("!! No .yolo.json found")
            elif key_char == ord('h') or key_char == ord('H'):
                target = last_shown
                if target and win_visible.get(target):
                    win_geom[target] = get_window_rect(target)
                    win_visible[target] = False
                    cv2.destroyWindow(target)
                    print(f"Hidden: {target}")
            elif key_char == ord('1'):
                win_visible[WIN_ORIGINAL] = not win_visible[WIN_ORIGINAL]
                if not win_visible[WIN_ORIGINAL]:
                    win_geom[WIN_ORIGINAL] = get_window_rect(WIN_ORIGINAL)
                    cv2.destroyWindow(WIN_ORIGINAL)
                else:
                    g = win_geom.get(WIN_ORIGINAL, INIT_GEOM[WIN_ORIGINAL])
                    cv2.namedWindow(WIN_ORIGINAL, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(WIN_ORIGINAL, g[2], g[3])
                    cv2.moveWindow(WIN_ORIGINAL, g[0], g[1])
                    cv2.createTrackbar("Progress", WIN_ORIGINAL,
                                       frame_count, max(total_frames-1,1), on_progress)
                print(f"[1] Original: {'ON' if win_visible[WIN_ORIGINAL] else 'OFF'}")
            elif key_char == ord('2'):
                win_visible[WIN_YOLO] = not win_visible[WIN_YOLO]
                if not win_visible[WIN_YOLO]:
                    win_geom[WIN_YOLO] = get_window_rect(WIN_YOLO)
                    cv2.destroyWindow(WIN_YOLO)
                else:
                    g = win_geom.get(WIN_YOLO, INIT_GEOM[WIN_YOLO])
                    cv2.namedWindow(WIN_YOLO, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(WIN_YOLO, g[2], g[3])
                    cv2.moveWindow(WIN_YOLO, g[0], g[1])
                    cv2.createTrackbar("Progress", WIN_YOLO,
                                       frame_count, max(total_frames-1,1), on_progress)
                print(f"[2] YOLO: {'ON' if win_visible[WIN_YOLO] else 'OFF'}")
            elif key_char == ord('3'):
                win_visible[WIN_CMD] = not win_visible[WIN_CMD]
                if not win_visible[WIN_CMD]:
                    win_geom[WIN_CMD] = get_window_rect(WIN_CMD)
                    cv2.destroyWindow(WIN_CMD)
                else:
                    g = win_geom.get(WIN_CMD, INIT_GEOM[WIN_CMD])
                    cv2.namedWindow(WIN_CMD, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(WIN_CMD, g[2], g[3])
                    cv2.moveWindow(WIN_CMD, g[0], g[1])
                print(f"[3] Command: {'ON' if win_visible[WIN_CMD] else 'OFF'}")

        # Arrow key seek
        elif key in (2424832, 63234, 65361):  # ←
            seek_delta(-5)
        elif key in (2555904, 63235, 65363):  # →
            seek_delta(5)
        elif key in (2162688, 63232, 65360):  # Shift+←
            seek_delta(-30)
        elif key in (2293760, 63233, 65367):  # Shift+→
            seek_delta(30)

    # Cleanup
    stop_audio()
    cap.release()
    cv2.destroyAllWindows()
    print("Done!")


def main():
    if len(sys.argv) < 3:
        print(f"Usage: python {sys.argv[0]} <video_path> <yolo_model_path>")
        print(f"Example: python {sys.argv[0]} video.mp4 yolov8n.pt")
        sys.exit(1)

    video_path = sys.argv[1]
    model_path = sys.argv[2]

    print("\n" + "=" * 60)
    print("Video YOLO Player v" + BUILD_VERSION)
    print("=" * 60)

    # Load probe data
    probe_data = load_probe_data(video_path)
    if probe_data:
        print(f"✓ Found .probe.json")
    else:
        print(f"⚠ No .probe.json found (run video_probe.py first)")

    # Load YOLO pre-scan data
    yolo_data = load_yolo_data(video_path)
    if yolo_data:
        print(f"✓ Found .yolo.json")
    else:
        print(f"⚠ No .yolo.json found (run video_yolo_object_prescan.py first)")

    print("=" * 60)

    # Play video
    play_video(video_path, model_path, probe_data, yolo_data)


if __name__ == "__main__":
    main()