Core modules: - probe.rs: ffprobe execution logic - parser.rs: JSON parsing logic - output.rs: Output formatting - lib.rs: Library interface - main.rs: CLI entry point Features: - Extract video metadata using ffprobe - Parse video/audio/subtitle streams - Save to JSON file - Console summary output Documentation: - Added QUICKSTART.md - Added ENVIRONMENT_SETUP_REPORT.md
744 lines
27 KiB
Python
744 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Video YOLO Player - Play video with YOLO object detection overlay
|
|
Shows two windows: Original Video and YOLO Detection
|
|
|
|
Usage:
|
|
python video_yolo_player.py <video_path> <yolo_model_path>
|
|
|
|
Controls:
|
|
y/Y - Toggle live YOLO detection (blue boxes)
|
|
p/P - Toggle pre-scanned YOLO data (green boxes)
|
|
i/I - Show video probe information
|
|
Space - Pause/Resume
|
|
s/S - Toggle sound
|
|
b/B - Toggle status bar
|
|
h/H - Hide current window
|
|
1/2/3 - Toggle windows
|
|
←/→ - Seek ±5s
|
|
Shift+←/→ - Seek ±30s
|
|
q/ESC - Quit
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import sys
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import shutil
|
|
import json
|
|
import platform
|
|
from datetime import datetime
|
|
from typing import Tuple, Dict, Any, Optional
|
|
from ultralytics import YOLO
|
|
|
|
FFPLAY = shutil.which('ffplay') or '/opt/homebrew/bin/ffplay'
|
|
|
|
BUILD_VERSION = "2.0.0"
|
|
BUILD_TIME = "2026-03-06 12:00:00"
|
|
|
|
|
|
def get_window_rect(win_name: str) -> Tuple[int, int, int, int]:
|
|
"""Get window geometry as tuple (x, y, w, h)"""
|
|
rect = cv2.getWindowImageRect(win_name)
|
|
return (int(rect[0]), int(rect[1]), int(rect[2]), int(rect[3]))
|
|
|
|
|
|
def get_screen_resolution() -> Tuple[int, int]:
|
|
"""Detect screen resolution using platform-specific methods"""
|
|
system = platform.system()
|
|
|
|
if system == "Darwin": # macOS
|
|
try:
|
|
result = subprocess.run(
|
|
['system_profiler', 'SPDisplaysDataType'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
output = result.stdout
|
|
for line in output.split('\n'):
|
|
if 'Resolution:' in line:
|
|
match = re.search(r'(\d+)\s*x\s*(\d+)', line)
|
|
if match:
|
|
return int(match.group(1)), int(match.group(2))
|
|
except Exception:
|
|
pass
|
|
|
|
elif system == "Linux":
|
|
try:
|
|
result = subprocess.run(
|
|
['xrandr'], capture_output=True, text=True, timeout=5
|
|
)
|
|
output = result.stdout
|
|
for line in output.split('\n'):
|
|
if ' connected' in line and '*' in line:
|
|
match = re.search(r'(\d+)x(\d+)', line)
|
|
if match:
|
|
return int(match.group(1)), int(match.group(2))
|
|
except Exception:
|
|
pass
|
|
|
|
elif system == "Windows":
|
|
try:
|
|
import ctypes
|
|
user32 = ctypes.windll.user32
|
|
return user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)
|
|
except Exception:
|
|
pass
|
|
|
|
return 1920, 1080
|
|
|
|
|
|
YOLO_NAMES = [
|
|
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
|
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
|
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
|
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
|
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
|
"sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
|
|
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
|
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
|
|
]
|
|
|
|
|
|
def format_time(seconds: float) -> str:
|
|
"""Format seconds to HH:MM:SS"""
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
def format_time_with_frame(seconds: float, frame_num: int, fps: float) -> Tuple[str, str]:
|
|
"""Format time with frame: HH:MM:SS.ff"""
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
frame_in_sec = int(frame_num % fps) if fps > 0 else 0
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}.{frame_in_sec:02d}", f"Frame: {frame_num}"
|
|
|
|
|
|
def load_probe_data(video_path: str) -> Optional[Dict]:
|
|
"""Load .probe.json file"""
|
|
video_dir = os.path.dirname(video_path)
|
|
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
|
probe_file = os.path.join(video_dir, f"{video_name}.probe.json")
|
|
|
|
if not os.path.exists(probe_file):
|
|
return None
|
|
|
|
try:
|
|
with open(probe_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading probe file: {e}")
|
|
return None
|
|
|
|
|
|
def load_yolo_data(video_path: str) -> Optional[Dict]:
|
|
"""Load .yolo.json file"""
|
|
video_dir = os.path.dirname(video_path)
|
|
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
|
yolo_file = os.path.join(video_dir, f"{video_name}.yolo.json")
|
|
|
|
if not os.path.exists(yolo_file):
|
|
return None
|
|
|
|
try:
|
|
with open(yolo_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading YOLO file: {e}")
|
|
return None
|
|
|
|
|
|
def get_detections_list(result) -> list:
|
|
"""Extract detection info as list of dicts"""
|
|
detections = []
|
|
|
|
if result.boxes is None:
|
|
return detections
|
|
|
|
boxes = result.boxes.xyxy.cpu().numpy()
|
|
confidences = result.boxes.conf.cpu().numpy()
|
|
class_ids = result.boxes.cls.cpu().numpy().astype(int)
|
|
|
|
for box, conf, class_id in zip(boxes, confidences, class_ids):
|
|
x1, y1, x2, y2 = box
|
|
class_name = YOLO_NAMES[class_id] if class_id < len(YOLO_NAMES) else "unknown"
|
|
|
|
detections.append({
|
|
'class_id': int(class_id),
|
|
'class_name': class_name,
|
|
'confidence': float(conf),
|
|
'x1': float(x1),
|
|
'y1': float(y1),
|
|
'x2': float(x2),
|
|
'y2': float(y2)
|
|
})
|
|
|
|
return detections
|
|
|
|
|
|
def draw_detections(frame: np.ndarray, detections: list, color: Tuple[int, int, int], label_prefix: str = "") -> np.ndarray:
|
|
"""Draw detection boxes on frame"""
|
|
annotated_frame = frame.copy()
|
|
|
|
for det in detections:
|
|
x1, y1, x2, y2 = int(det['x1']), int(det['y1']), int(det['x2']), int(det['y2'])
|
|
class_name = det['class_name']
|
|
conf = det['confidence']
|
|
|
|
cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), color, 2)
|
|
|
|
label = f"{label_prefix}{class_name} {conf:.1%}"
|
|
(label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
|
cv2.rectangle(annotated_frame, (x1, y1 - label_h - 10), (x1 + label_w, y1), color, -1)
|
|
|
|
text_color = (255, 255, 255) if color != (0, 255, 0) else (0, 0, 0)
|
|
cv2.putText(annotated_frame, label, (x1, y1 - 5),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2)
|
|
|
|
return annotated_frame
|
|
|
|
|
|
def draw_time_overlay(frame: np.ndarray, current_time: float, current_frame: int,
|
|
total_time: float, total_frames: int, fps: float,
|
|
object_count: int = 0, is_paused: bool = False,
|
|
sound_on: bool = False, live_yolo: bool = False,
|
|
pre_yolo: bool = False) -> np.ndarray:
|
|
"""Draw time code and frame overlay at bottom of video"""
|
|
height, width = frame.shape[:2]
|
|
|
|
time_str, frame_str = format_time_with_frame(current_time, current_frame, fps)
|
|
total_time_str, total_frame_str = format_time_with_frame(total_time, total_frames, fps)
|
|
|
|
mode_parts = []
|
|
if live_yolo:
|
|
mode_parts.append("LIVE-YOLO")
|
|
if pre_yolo:
|
|
mode_parts.append("PRE-YOLO")
|
|
mode_str = f" [{'+'.join(mode_parts)}]" if mode_parts else ""
|
|
|
|
sound_label = " [SOUND]" if sound_on else ""
|
|
time_text = f"{time_str} / {total_time_str} | {frame_str}/{total_frames} | Objects: {object_count}{mode_str}{sound_label}"
|
|
if is_paused:
|
|
time_text = f"[PAUSED] {time_text}"
|
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
font_scale = 0.55
|
|
thickness = 2
|
|
padding = 10
|
|
|
|
(text_w, text_h), baseline = cv2.getTextSize(time_text, font, font_scale, thickness)
|
|
bar_height = text_h + baseline + padding * 3
|
|
|
|
overlay = frame.copy()
|
|
cv2.rectangle(overlay, (0, height - bar_height), (width, height), (0, 0, 0), -1)
|
|
cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
|
|
cv2.line(frame, (0, height - bar_height), (width, height - bar_height), (100, 100, 100), 1)
|
|
|
|
text_x = (width - text_w) // 2
|
|
text_y = height - bar_height // 2 + text_h // 2
|
|
|
|
text_color = (255, 100, 100) if is_paused else (255, 255, 255)
|
|
|
|
cv2.putText(frame, time_text, (text_x + 1, text_y + 1), font, font_scale, (0, 0, 0), thickness + 1)
|
|
cv2.putText(frame, time_text, (text_x, text_y), font, font_scale, text_color, thickness)
|
|
|
|
return frame
|
|
|
|
|
|
def play_video(video_path: str, model_path: str, probe_data: Optional[Dict], yolo_data: Optional[Dict]):
|
|
"""Play video with YOLO overlay"""
|
|
|
|
print(f"\nOpening video: {video_path}")
|
|
cap = cv2.VideoCapture(video_path)
|
|
|
|
if not cap.isOpened():
|
|
print(f"Error: Cannot open video: {video_path}")
|
|
return
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
total_duration = total_frames / fps if fps > 0 else 0
|
|
|
|
print(f"Video info: {width}x{height} @ {fps:.2f} fps, {total_frames} frames")
|
|
|
|
# Load YOLO model (lazy loading - only when needed)
|
|
model = None
|
|
|
|
# Extract YOLO detections by frame
|
|
yolo_detections_by_frame = {}
|
|
if yolo_data and 'frames' in yolo_data:
|
|
for frame_num_str, frame_data in yolo_data['frames'].items():
|
|
yolo_detections_by_frame[int(frame_num_str)] = frame_data.get('detections', [])
|
|
print(f"Loaded {len(yolo_detections_by_frame)} frames from .yolo.json")
|
|
|
|
# Screen resolution detection and window layout
|
|
screen_w, screen_h = get_screen_resolution()
|
|
print(f"Detected screen resolution: {screen_w}x{screen_h}")
|
|
|
|
GAP = 10
|
|
margin = 40
|
|
|
|
available_width = screen_w - 3 * margin
|
|
w_vid = min(width, available_width // 2)
|
|
h_vid = int(w_vid * 9 / 16)
|
|
h_vid = min(h_vid, screen_h - margin * 2 - 200)
|
|
|
|
w_cmd = w_vid * 2 + GAP
|
|
h_cmd = 280
|
|
|
|
WIN_ORIGINAL = "1: Original Video"
|
|
WIN_YOLO = "2: YOLO Detection"
|
|
WIN_CMD = "3: Command"
|
|
|
|
x_start = margin
|
|
y_start = margin
|
|
|
|
INIT_GEOM = {
|
|
WIN_ORIGINAL: (x_start, y_start, w_vid, h_vid),
|
|
WIN_YOLO: (x_start + w_vid + GAP, y_start, w_vid, h_vid),
|
|
WIN_CMD: (x_start, y_start + h_vid + GAP + 30, w_cmd, h_cmd),
|
|
}
|
|
|
|
print(f"Window layout: Original={w_vid}x{h_vid}, YOLO={w_vid}x{h_vid}, Command={w_cmd}x{h_cmd}")
|
|
|
|
def make_win(name):
|
|
x, y, w, h = INIT_GEOM[name]
|
|
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
|
|
cv2.resizeWindow(name, w, h)
|
|
cv2.moveWindow(name, x, y)
|
|
|
|
make_win(WIN_ORIGINAL)
|
|
make_win(WIN_YOLO)
|
|
make_win(WIN_CMD)
|
|
|
|
# Trackbar
|
|
tb_code_val = {"v": 0}
|
|
seek_request = {"frame": None}
|
|
|
|
def on_progress(val):
|
|
if val != tb_code_val["v"]:
|
|
seek_request["frame"] = val
|
|
|
|
for wn in (WIN_ORIGINAL, WIN_YOLO):
|
|
cv2.createTrackbar("Progress", wn, 0, max(total_frames - 1, 1), on_progress)
|
|
|
|
win_geom = dict(INIT_GEOM)
|
|
win_visible = {WIN_ORIGINAL: True, WIN_YOLO: True, WIN_CMD: True}
|
|
last_shown = WIN_ORIGINAL
|
|
|
|
sound_process = None
|
|
|
|
def start_audio(pos_secs):
|
|
stop_audio()
|
|
try:
|
|
return subprocess.Popen(
|
|
[FFPLAY, '-nodisp', '-autoexit',
|
|
'-ss', f'{max(0, pos_secs):.2f}', video_path],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
|
)
|
|
except Exception as e:
|
|
print(f"Audio error: {e}")
|
|
return None
|
|
|
|
def stop_audio():
|
|
subprocess.run(['pkill', '-f', 'ffplay'],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
|
def do_seek(target_frame):
|
|
nonlocal frame_count, current_frame, annotated_frame, object_count, sound_process
|
|
|
|
target_frame = max(0, min(total_frames - 1, int(target_frame)))
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
|
|
ret, f = cap.read()
|
|
if not ret:
|
|
return
|
|
|
|
frame_count = target_frame + 1
|
|
current_frame = f.copy()
|
|
|
|
# Update detections
|
|
object_count = 0
|
|
annotated_frame = f.copy()
|
|
|
|
if pre_yolo_mode and frame_count in yolo_detections_by_frame:
|
|
dets = yolo_detections_by_frame[frame_count]
|
|
object_count += len(dets)
|
|
annotated_frame = draw_detections(annotated_frame, dets, (0, 255, 0), "[PRE] ")
|
|
|
|
if live_yolo_mode and model is not None:
|
|
r = model(f, verbose=False)[0]
|
|
live_dets = get_detections_list(r)
|
|
object_count += len(live_dets)
|
|
annotated_frame = draw_detections(annotated_frame, live_dets, (255, 0, 0), "[LIVE] ")
|
|
|
|
if sound_on:
|
|
sound_process = start_audio(frame_count / fps)
|
|
|
|
# Update trackbar
|
|
tb_code_val["v"] = frame_count
|
|
for wn in (WIN_ORIGINAL, WIN_YOLO):
|
|
if win_visible.get(wn):
|
|
cv2.setTrackbarPos("Progress", wn, frame_count)
|
|
|
|
print(f"Seek → frame {frame_count} ({frame_count/fps:.2f}s)")
|
|
|
|
def seek_delta(delta_secs):
|
|
do_seek(frame_count + int(delta_secs * fps))
|
|
|
|
# Command-line state
|
|
cmd_input = ""
|
|
cmd_log = []
|
|
|
|
def cmd_log_add(line):
|
|
cmd_log.append(line)
|
|
if len(cmd_log) > 12:
|
|
cmd_log.pop(0)
|
|
|
|
def execute_command(s):
|
|
s = s.strip()
|
|
if not s:
|
|
return
|
|
|
|
try:
|
|
if s.lower() in ('i', 'info', 'probe'):
|
|
# Show probe information
|
|
if probe_data:
|
|
cmd_log_add(">> Video Probe Info:")
|
|
fmt = probe_data.get('format', {})
|
|
cmd_log_add(f" Format: {fmt.get('format_long_name', 'N/A')}")
|
|
cmd_log_add(f" Duration: {fmt.get('duration', 0):.2f}s")
|
|
cmd_log_add(f" Size: {fmt.get('size', 0) / 1024 / 1024:.2f} MB")
|
|
vs = probe_data.get('video_stream', {})
|
|
if vs:
|
|
cmd_log_add(f" Video: {vs.get('codec_name')} {vs.get('width')}x{vs.get('height')}")
|
|
cmd_log_add(f" Audio: {len(probe_data.get('audio_streams', []))} streams")
|
|
else:
|
|
cmd_log_add("!! No .probe.json found")
|
|
return
|
|
|
|
if s.startswith(('+', '-')):
|
|
seek_delta(float(s))
|
|
cmd_log_add(f">> seek {float(s):+.1f}s")
|
|
return
|
|
|
|
if ':' in s:
|
|
parts = s.split(':')
|
|
hh = int(parts[0])
|
|
mm = int(parts[1])
|
|
ss_parts = parts[2].split('.')
|
|
ss = int(ss_parts[0])
|
|
ff = int(ss_parts[1]) if len(ss_parts) > 1 else 0
|
|
total_s = hh*3600 + mm*60 + ss + ff/fps
|
|
do_seek(int(total_s * fps))
|
|
cmd_log_add(f">> seek {s}")
|
|
return
|
|
|
|
do_seek(int(float(s)))
|
|
cmd_log_add(f">> seek frame {int(float(s))}")
|
|
|
|
except Exception as e:
|
|
cmd_log_add(f"!! {e}")
|
|
|
|
print("\nPlaying video...")
|
|
print("Keys: q/ESC=quit space=pause s=sound b=statusbar")
|
|
print(" y=live YOLO p=pre YOLO i=probe info h=hide 1/2/3=toggle windows")
|
|
print(" ←/→=±5s Shift+←/→=±30s")
|
|
print("Command: <frame> | hh:mm:ss[.ff] | +/-secs | i (probe info)")
|
|
|
|
frame_count = 0
|
|
is_paused = False
|
|
sound_on = False
|
|
show_statusbar = True
|
|
current_frame = None
|
|
annotated_frame = None
|
|
object_count = 0
|
|
|
|
# YOLO modes
|
|
live_yolo_mode = False
|
|
pre_yolo_mode = False
|
|
|
|
while True:
|
|
if not is_paused:
|
|
ret, frame = cap.read()
|
|
|
|
if not ret:
|
|
print("End of video, looping...")
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|
frame_count = 0
|
|
continue
|
|
|
|
current_frame = frame.copy()
|
|
frame_count += 1
|
|
|
|
# Process detections
|
|
object_count = 0
|
|
annotated_frame = frame.copy()
|
|
|
|
if pre_yolo_mode and frame_count in yolo_detections_by_frame:
|
|
dets = yolo_detections_by_frame[frame_count]
|
|
object_count += len(dets)
|
|
annotated_frame = draw_detections(annotated_frame, dets, (0, 255, 0), "[PRE] ")
|
|
|
|
if live_yolo_mode:
|
|
if model is None:
|
|
print("Loading YOLO model for live detection...")
|
|
model = YOLO(model_path)
|
|
print("✓ Model loaded")
|
|
|
|
results = model(frame, verbose=False)
|
|
result = results[0]
|
|
live_dets = get_detections_list(result)
|
|
object_count += len(live_dets)
|
|
annotated_frame = draw_detections(annotated_frame, live_dets, (255, 0, 0), "[LIVE] ")
|
|
|
|
current_time = frame_count / fps if fps > 0 else 0
|
|
|
|
if frame_count % 30 == 0 and not is_paused:
|
|
print(f"Frame: {frame_count}/{total_frames}, Objects: {object_count}")
|
|
|
|
if current_frame is None:
|
|
continue
|
|
|
|
# Handle trackbar seek
|
|
if seek_request["frame"] is not None:
|
|
do_seek(seek_request["frame"])
|
|
seek_request["frame"] = None
|
|
|
|
# Sync trackbar
|
|
if not seek_request["frame"]:
|
|
tb_code_val["v"] = frame_count
|
|
for wn in (WIN_ORIGINAL, WIN_YOLO):
|
|
if win_visible.get(wn):
|
|
cv2.setTrackbarPos("Progress", wn, frame_count)
|
|
|
|
# Render status bar
|
|
overlay_args = (current_time, frame_count, total_duration, total_frames, fps,
|
|
object_count, is_paused, sound_on, live_yolo_mode, pre_yolo_mode)
|
|
|
|
if win_visible[WIN_ORIGINAL]:
|
|
if show_statusbar:
|
|
frame_out = draw_time_overlay(current_frame, *overlay_args)
|
|
else:
|
|
frame_out = current_frame.copy()
|
|
cv2.imshow(WIN_ORIGINAL, frame_out)
|
|
last_shown = WIN_ORIGINAL
|
|
|
|
if win_visible[WIN_YOLO] and annotated_frame is not None:
|
|
if show_statusbar:
|
|
ann_out = draw_time_overlay(annotated_frame, *overlay_args)
|
|
else:
|
|
ann_out = annotated_frame.copy()
|
|
cv2.imshow(WIN_YOLO, ann_out)
|
|
last_shown = WIN_YOLO
|
|
|
|
# Command window
|
|
if win_visible[WIN_CMD]:
|
|
cmd_h, cmd_w = 320, w_cmd
|
|
panel = np.zeros((cmd_h, cmd_w, 3), dtype=np.uint8)
|
|
|
|
# Title bar with build info
|
|
cv2.rectangle(panel, (0, 0), (cmd_w, 28), (40, 40, 80), -1)
|
|
title = f"3: Command | v{BUILD_VERSION} | {BUILD_TIME}"
|
|
cv2.putText(panel, title, (6, 18), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (180, 220, 255), 1)
|
|
|
|
# Examples section
|
|
examples = [
|
|
"Examples: 123 | 00:01:30 | +10 | -5 | i (probe info)"
|
|
]
|
|
y = 50
|
|
for ex in examples:
|
|
cv2.putText(panel, ex, (8, y), cv2.FONT_HERSHEY_SIMPLEX, 0.42, (150, 150, 150), 1)
|
|
y += 20
|
|
|
|
# Separator
|
|
cv2.line(panel, (0, y), (cmd_w, y), (60, 60, 60), 1)
|
|
y += 15
|
|
|
|
# Log lines
|
|
for line in cmd_log[-8:]:
|
|
color = (80, 200, 80) if line.startswith(">>") else \
|
|
(80, 80, 200) if line.startswith("!!") else (180, 180, 180)
|
|
cv2.putText(panel, line, (8, y), cv2.FONT_HERSHEY_SIMPLEX, 0.50, color, 1)
|
|
y += 22
|
|
|
|
# Status line
|
|
mode_str = f"Live:{'Y' if live_yolo_mode else 'N'} Pre:{'Y' if pre_yolo_mode else 'N'}"
|
|
s_line = (f" [{format_time(current_time)} f:{frame_count}/{total_frames}]"
|
|
f" {mode_str}"
|
|
f" Pause:{'Y' if is_paused else 'N'}"
|
|
f" Sound:{'Y' if sound_on else 'N'}")
|
|
cv2.putText(panel, s_line, (6, cmd_h - 38),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.42, (120, 200, 255), 1)
|
|
|
|
# Input prompt
|
|
cv2.line(panel, (0, cmd_h - 28), (cmd_w, cmd_h - 28), (80, 80, 80), 1)
|
|
prompt = f"> {cmd_input}_"
|
|
cv2.putText(panel, prompt, (8, cmd_h - 8),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 200), 1)
|
|
|
|
# Focus indicator
|
|
if cmd_input:
|
|
cv2.putText(panel, "[TYPING]", (cmd_w - 100, cmd_h - 8),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.40, (255, 200, 0), 1)
|
|
|
|
cv2.imshow(WIN_CMD, panel)
|
|
last_shown = WIN_CMD
|
|
|
|
# Key handling
|
|
key = cv2.waitKeyEx(30 if not is_paused else 100)
|
|
key_char = key & 0xFF
|
|
|
|
in_focus_mode = bool(cmd_input)
|
|
|
|
if key_char == 13: # Enter
|
|
if cmd_input.strip():
|
|
cmd_log_add(f"> {cmd_input}")
|
|
execute_command(cmd_input)
|
|
cmd_input = ""
|
|
|
|
elif key_char in (8, 127): # Backspace / Delete
|
|
cmd_input = cmd_input[:-1]
|
|
|
|
elif 32 <= key_char <= 126:
|
|
if in_focus_mode or chr(key_char) in ('+', '-', '0','1','2','3','4','5','6','7','8','9', ':'):
|
|
cmd_input += chr(key_char)
|
|
elif key_char == ord('q') or key_char == ord('Q') or key_char == 27:
|
|
print("Quitting...")
|
|
break
|
|
elif key_char == ord(' '):
|
|
is_paused = not is_paused
|
|
if sound_on:
|
|
if is_paused:
|
|
stop_audio()
|
|
sound_process = None
|
|
else:
|
|
sound_process = start_audio(frame_count / fps)
|
|
print(f"{'Paused' if is_paused else 'Resumed'}")
|
|
elif key_char == ord('b') or key_char == ord('B'):
|
|
show_statusbar = not show_statusbar
|
|
print(f"Status bar {'ON' if show_statusbar else 'OFF'}")
|
|
elif key_char == ord('s') or key_char == ord('S'):
|
|
sound_on = not sound_on
|
|
if sound_on:
|
|
sound_process = start_audio(frame_count / fps)
|
|
print(f"Sound ON (at {frame_count/fps:.1f}s)")
|
|
else:
|
|
stop_audio()
|
|
sound_process = None
|
|
print("Sound OFF")
|
|
elif key_char == ord('y') or key_char == ord('Y'):
|
|
live_yolo_mode = not live_yolo_mode
|
|
print(f"Live YOLO {'ON' if live_yolo_mode else 'OFF'}")
|
|
elif key_char == ord('p') or key_char == ord('P'):
|
|
if yolo_data:
|
|
pre_yolo_mode = not pre_yolo_mode
|
|
print(f"Pre-scanned YOLO {'ON' if pre_yolo_mode else 'OFF'}")
|
|
else:
|
|
print("No .yolo.json file found")
|
|
cmd_log_add("!! No .yolo.json found")
|
|
elif key_char == ord('h') or key_char == ord('H'):
|
|
target = last_shown
|
|
if target and win_visible.get(target):
|
|
win_geom[target] = get_window_rect(target)
|
|
win_visible[target] = False
|
|
cv2.destroyWindow(target)
|
|
print(f"Hidden: {target}")
|
|
elif key_char == ord('1'):
|
|
win_visible[WIN_ORIGINAL] = not win_visible[WIN_ORIGINAL]
|
|
if not win_visible[WIN_ORIGINAL]:
|
|
win_geom[WIN_ORIGINAL] = get_window_rect(WIN_ORIGINAL)
|
|
cv2.destroyWindow(WIN_ORIGINAL)
|
|
else:
|
|
g = win_geom.get(WIN_ORIGINAL, INIT_GEOM[WIN_ORIGINAL])
|
|
cv2.namedWindow(WIN_ORIGINAL, cv2.WINDOW_NORMAL)
|
|
cv2.resizeWindow(WIN_ORIGINAL, g[2], g[3])
|
|
cv2.moveWindow(WIN_ORIGINAL, g[0], g[1])
|
|
cv2.createTrackbar("Progress", WIN_ORIGINAL,
|
|
frame_count, max(total_frames-1,1), on_progress)
|
|
print(f"[1] Original: {'ON' if win_visible[WIN_ORIGINAL] else 'OFF'}")
|
|
elif key_char == ord('2'):
|
|
win_visible[WIN_YOLO] = not win_visible[WIN_YOLO]
|
|
if not win_visible[WIN_YOLO]:
|
|
win_geom[WIN_YOLO] = get_window_rect(WIN_YOLO)
|
|
cv2.destroyWindow(WIN_YOLO)
|
|
else:
|
|
g = win_geom.get(WIN_YOLO, INIT_GEOM[WIN_YOLO])
|
|
cv2.namedWindow(WIN_YOLO, cv2.WINDOW_NORMAL)
|
|
cv2.resizeWindow(WIN_YOLO, g[2], g[3])
|
|
cv2.moveWindow(WIN_YOLO, g[0], g[1])
|
|
cv2.createTrackbar("Progress", WIN_YOLO,
|
|
frame_count, max(total_frames-1,1), on_progress)
|
|
print(f"[2] YOLO: {'ON' if win_visible[WIN_YOLO] else 'OFF'}")
|
|
elif key_char == ord('3'):
|
|
win_visible[WIN_CMD] = not win_visible[WIN_CMD]
|
|
if not win_visible[WIN_CMD]:
|
|
win_geom[WIN_CMD] = get_window_rect(WIN_CMD)
|
|
cv2.destroyWindow(WIN_CMD)
|
|
else:
|
|
g = win_geom.get(WIN_CMD, INIT_GEOM[WIN_CMD])
|
|
cv2.namedWindow(WIN_CMD, cv2.WINDOW_NORMAL)
|
|
cv2.resizeWindow(WIN_CMD, g[2], g[3])
|
|
cv2.moveWindow(WIN_CMD, g[0], g[1])
|
|
print(f"[3] Command: {'ON' if win_visible[WIN_CMD] else 'OFF'}")
|
|
|
|
# Arrow key seek
|
|
elif key in (2424832, 63234, 65361): # ←
|
|
seek_delta(-5)
|
|
elif key in (2555904, 63235, 65363): # →
|
|
seek_delta(5)
|
|
elif key in (2162688, 63232, 65360): # Shift+←
|
|
seek_delta(-30)
|
|
elif key in (2293760, 63233, 65367): # Shift+→
|
|
seek_delta(30)
|
|
|
|
# Cleanup
|
|
stop_audio()
|
|
cap.release()
|
|
cv2.destroyAllWindows()
|
|
print("Done!")
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print(f"Usage: python {sys.argv[0]} <video_path> <yolo_model_path>")
|
|
print(f"Example: python {sys.argv[0]} video.mp4 yolov8n.pt")
|
|
sys.exit(1)
|
|
|
|
video_path = sys.argv[1]
|
|
model_path = sys.argv[2]
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Video YOLO Player v" + BUILD_VERSION)
|
|
print("=" * 60)
|
|
|
|
# Load probe data
|
|
probe_data = load_probe_data(video_path)
|
|
if probe_data:
|
|
print(f"✓ Found .probe.json")
|
|
else:
|
|
print(f"⚠ No .probe.json found (run video_probe.py first)")
|
|
|
|
# Load YOLO pre-scan data
|
|
yolo_data = load_yolo_data(video_path)
|
|
if yolo_data:
|
|
print(f"✓ Found .yolo.json")
|
|
else:
|
|
print(f"⚠ No .yolo.json found (run video_yolo_object_prescan.py first)")
|
|
|
|
print("=" * 60)
|
|
|
|
# Play video
|
|
play_video(video_path, model_path, probe_data, yolo_data)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|