Core modules: - probe.rs: ffprobe execution logic - parser.rs: JSON parsing logic - output.rs: Output formatting - lib.rs: Library interface - main.rs: CLI entry point Features: - Extract video metadata using ffprobe - Parse video/audio/subtitle streams - Save to JSON file - Console summary output Documentation: - Added QUICKSTART.md - Added ENVIRONMENT_SETUP_REPORT.md
429 lines
15 KiB
Python
429 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Video YOLO Object Prescan - Pre-process video for object detection
|
|
Saves detection results to .yolo.json file
|
|
|
|
Features:
|
|
- Pause/Resume: Press Ctrl+C to pause and save progress
|
|
- Resume from checkpoint: Automatically continues from last frame
|
|
- Auto-save: Configurable auto-save interval (default: 30 seconds)
|
|
|
|
Usage:
|
|
python video_yolo_object_prescan.py <video_path> <yolo_model_path> [--save-interval SECONDS]
|
|
|
|
Examples:
|
|
# Default auto-save every 30 seconds
|
|
python video_yolo_object_prescan.py video.mp4 yolov8n.pt
|
|
|
|
# Auto-save every 60 seconds
|
|
python video_yolo_object_prescan.py video.mp4 yolov8n.pt --save-interval 60
|
|
|
|
# Auto-save every 15 seconds (for long videos)
|
|
python video_yolo_object_prescan.py video.mp4 yolov8n.pt --save-interval 15
|
|
"""
|
|
|
|
import cv2
|
|
import sys
|
|
import os
|
|
import json
|
|
import time
|
|
import signal
|
|
import argparse
|
|
from datetime import datetime
|
|
from ultralytics import YOLO
|
|
|
|
|
|
YOLO_NAMES = [
|
|
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
|
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
|
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
|
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
|
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
|
"sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
|
|
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
|
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
|
|
]
|
|
|
|
|
|
def format_time(seconds):
|
|
"""Format seconds to HH:MM:SS"""
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
def get_detections_list(result):
|
|
"""Extract detection info as list of dicts"""
|
|
detections = []
|
|
|
|
if result.boxes is None:
|
|
return detections
|
|
|
|
boxes = result.boxes.xyxy.cpu().numpy()
|
|
confidences = result.boxes.conf.cpu().numpy()
|
|
class_ids = result.boxes.cls.cpu().numpy().astype(int)
|
|
|
|
for box, conf, class_id in zip(boxes, confidences, class_ids):
|
|
x1, y1, x2, y2 = box
|
|
class_name = YOLO_NAMES[class_id] if class_id < len(YOLO_NAMES) else "unknown"
|
|
|
|
detections.append({
|
|
'class_id': int(class_id),
|
|
'class_name': class_name,
|
|
'confidence': float(conf),
|
|
'x1': float(x1),
|
|
'y1': float(y1),
|
|
'x2': float(x2),
|
|
'y2': float(y2)
|
|
})
|
|
|
|
return detections
|
|
|
|
|
|
def load_existing_data(output_file):
|
|
"""Load existing detection data from file"""
|
|
if not os.path.exists(output_file):
|
|
return None, 0
|
|
|
|
try:
|
|
with open(output_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Get last processed frame number
|
|
frames = data.get('frames', {})
|
|
if frames:
|
|
last_frame = max(int(k) for k in frames.keys())
|
|
return data, last_frame
|
|
except Exception as e:
|
|
print(f"Warning: Could not load existing file: {e}")
|
|
|
|
return None, 0
|
|
|
|
|
|
def save_detection_data(output_file, detection_data, is_interrupted=False, silent=False):
|
|
"""Save detection data to JSON file"""
|
|
try:
|
|
# Update metadata
|
|
if 'metadata' in detection_data:
|
|
detection_data['metadata']['last_saved_at'] = datetime.now().isoformat()
|
|
if is_interrupted:
|
|
detection_data['metadata']['status'] = 'interrupted'
|
|
else:
|
|
detection_data['metadata']['status'] = 'in_progress'
|
|
|
|
# Write to file
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(detection_data, f, indent=2, ensure_ascii=False)
|
|
|
|
if not silent:
|
|
return True, os.path.getsize(output_file)
|
|
return True, 0
|
|
except Exception as e:
|
|
print(f"Error saving data: {e}")
|
|
return False, 0
|
|
|
|
|
|
# Global variables for signal handling
|
|
detection_data_global = None
|
|
output_file_global = None
|
|
frame_count_global = 0
|
|
total_frames_global = 0
|
|
start_time_global = None
|
|
|
|
|
|
def signal_handler(signum, frame):
|
|
"""Handle Ctrl+C to pause and save progress"""
|
|
global detection_data_global, output_file_global, frame_count_global
|
|
global total_frames_global, start_time_global
|
|
|
|
print(f"\n\n{'=' * 60}")
|
|
print("⏸ PAUSED - Saving progress...")
|
|
print(f"{'=' * 60}")
|
|
|
|
if detection_data_global and output_file_global:
|
|
# Calculate stats
|
|
elapsed = time.time() - start_time_global if start_time_global else 0
|
|
total_detections = sum(
|
|
len(f.get('detections', []))
|
|
for f in detection_data_global.get('frames', {}).values()
|
|
)
|
|
|
|
# Update metadata
|
|
detection_data_global['metadata']['processing_time'] = elapsed
|
|
detection_data_global['metadata']['total_detections'] = total_detections
|
|
detection_data_global['metadata']['avg_detections_per_frame'] = (
|
|
round(total_detections / frame_count_global, 2) if frame_count_global > 0 else 0
|
|
)
|
|
detection_data_global['metadata']['avg_time_per_frame'] = (
|
|
round(elapsed / frame_count_global, 3) if frame_count_global > 0 else 0
|
|
)
|
|
|
|
# Save data
|
|
success, _ = save_detection_data(output_file_global, detection_data_global, is_interrupted=True)
|
|
if success:
|
|
print(f"✓ Progress saved to: {output_file_global}")
|
|
print(f" Frames processed: {frame_count_global}/{total_frames_global}")
|
|
print(f" Total detections: {total_detections}")
|
|
print(f" Elapsed time: {elapsed:.1f}s")
|
|
print(f"\n💡 Run the same command again to resume from frame {frame_count_global + 1}")
|
|
|
|
print(f"{'=' * 60}\n")
|
|
sys.exit(0)
|
|
|
|
|
|
def prescan_video(video_path, model_path, output_file, save_interval=30):
|
|
"""Process video and save detection results to JSON file"""
|
|
|
|
global detection_data_global, output_file_global, frame_count_global
|
|
global total_frames_global, start_time_global
|
|
|
|
if not os.path.exists(video_path):
|
|
print(f"Error: Video file not found: {video_path}")
|
|
return False
|
|
|
|
if not os.path.exists(model_path):
|
|
print(f"Error: YOLO model not found: {model_path}")
|
|
return False
|
|
|
|
# Set up signal handler
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
# Check for existing data (resume support)
|
|
existing_data, last_processed_frame = load_existing_data(output_file)
|
|
resume_mode = existing_data is not None and last_processed_frame > 0
|
|
|
|
if resume_mode:
|
|
print(f"\n{'=' * 60}")
|
|
print(f"📂 Found existing data: {output_file}")
|
|
print(f" Last processed frame: {last_processed_frame}")
|
|
print(f"{'=' * 60}")
|
|
|
|
response = input("\nResume from last checkpoint? (Y/n): ").strip().lower()
|
|
if response == 'n':
|
|
print("Starting from beginning...")
|
|
resume_mode = False
|
|
existing_data = None
|
|
last_processed_frame = 0
|
|
else:
|
|
print("Resuming from checkpoint...")
|
|
|
|
start_time = time.time()
|
|
start_time_global = start_time
|
|
|
|
# Load YOLO model
|
|
print(f"\nLoading YOLO model from: {model_path}")
|
|
model = YOLO(model_path)
|
|
print("✓ Model loaded successfully")
|
|
|
|
# Open video
|
|
cap = cv2.VideoCapture(video_path)
|
|
|
|
if not cap.isOpened():
|
|
print(f"Error: Cannot open video: {video_path}")
|
|
return False
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
total_duration = total_frames / fps if fps > 0 else 0
|
|
|
|
total_frames_global = total_frames
|
|
|
|
print(f"\nVideo Info:")
|
|
print(f" Path: {video_path}")
|
|
print(f" Resolution: {width}x{height}")
|
|
print(f" FPS: {fps:.2f}")
|
|
print(f" Total frames: {total_frames}")
|
|
print(f" Duration: {total_duration:.1f}s ({format_time(total_duration)})")
|
|
|
|
if resume_mode:
|
|
print(f" Resume from: frame {last_processed_frame + 1}")
|
|
|
|
print(f"\nOutput: {output_file}")
|
|
print(f"Auto-save interval: {save_interval} seconds")
|
|
print("=" * 60)
|
|
|
|
# Initialize or load detection data
|
|
if resume_mode and existing_data:
|
|
detection_data = existing_data
|
|
frame_count = last_processed_frame
|
|
total_detections = sum(
|
|
len(f.get('detections', []))
|
|
for f in detection_data.get('frames', {}).values()
|
|
)
|
|
|
|
# Seek to resume position
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
|
|
else:
|
|
# Initialize new detection data
|
|
detection_data = {
|
|
"metadata": {
|
|
"video_path": os.path.abspath(video_path),
|
|
"model_path": os.path.abspath(model_path),
|
|
"width": width,
|
|
"height": height,
|
|
"fps": fps,
|
|
"total_frames": total_frames,
|
|
"total_duration": total_duration,
|
|
"processed_at": datetime.now().isoformat(),
|
|
"auto_save_interval": save_interval,
|
|
"status": "in_progress"
|
|
},
|
|
"frames": {}
|
|
}
|
|
frame_count = 0
|
|
total_detections = 0
|
|
|
|
# Set global variables for signal handler
|
|
detection_data_global = detection_data
|
|
output_file_global = output_file
|
|
frame_count_global = frame_count
|
|
|
|
print(f"\n{'Resuming' if resume_mode else 'Starting'} video processing...")
|
|
print("💡 Press Ctrl+C to pause and save progress\n")
|
|
|
|
# Process frames
|
|
last_save_time = time.time()
|
|
auto_save_count = 0
|
|
|
|
while True:
|
|
ret, frame = cap.read()
|
|
|
|
if not ret:
|
|
break
|
|
|
|
frame_count += 1
|
|
frame_count_global = frame_count
|
|
current_time = frame_count / fps if fps > 0 else 0
|
|
|
|
# Run YOLO detection
|
|
results = model(frame, verbose=False)
|
|
result = results[0]
|
|
detections = get_detections_list(result)
|
|
|
|
total_detections += len(detections)
|
|
|
|
# Store detection data
|
|
detection_data["frames"][str(frame_count)] = {
|
|
"frame_number": frame_count,
|
|
"time_seconds": round(current_time, 3),
|
|
"time_formatted": format_time(current_time),
|
|
"detections": detections
|
|
}
|
|
|
|
# Progress indicator
|
|
if frame_count % 100 == 0:
|
|
elapsed = time.time() - start_time
|
|
progress = (frame_count / total_frames) * 100
|
|
eta = (elapsed / frame_count) * (total_frames - frame_count) if frame_count > 0 else 0
|
|
print(f" Progress: {frame_count}/{total_frames} frames ({progress:.1f}%) - "
|
|
f"{len(detections)} objects - Elapsed: {elapsed:.1f}s, ETA: {eta:.1f}s")
|
|
|
|
# Auto-save every save_interval seconds
|
|
current_time_val = time.time()
|
|
if current_time_val - last_save_time >= save_interval:
|
|
success, file_size = save_detection_data(output_file, detection_data, is_interrupted=False, silent=True)
|
|
if success:
|
|
auto_save_count += 1
|
|
elapsed = time.time() - start_time
|
|
progress = (frame_count / total_frames) * 100
|
|
print(f" 💾 Auto-saved (#{auto_save_count}): {frame_count}/{total_frames} frames ({progress:.1f}%) - "
|
|
f"Size: {file_size / 1024:.1f} KB - Elapsed: {elapsed:.1f}s")
|
|
last_save_time = current_time_val
|
|
|
|
cap.release()
|
|
processing_time = time.time() - start_time
|
|
|
|
# Update final metadata
|
|
detection_data["metadata"]["processing_time"] = processing_time
|
|
detection_data["metadata"]["total_detections"] = total_detections
|
|
detection_data["metadata"]["avg_detections_per_frame"] = (
|
|
round(total_detections / frame_count, 2) if frame_count > 0 else 0
|
|
)
|
|
detection_data["metadata"]["avg_time_per_frame"] = (
|
|
round(processing_time / frame_count, 3) if frame_count > 0 else 0
|
|
)
|
|
detection_data["metadata"]["completed_at"] = datetime.now().isoformat()
|
|
detection_data["metadata"]["status"] = "completed"
|
|
detection_data["metadata"]["auto_save_count"] = auto_save_count
|
|
|
|
# Save final data
|
|
save_detection_data(output_file, detection_data, is_interrupted=False)
|
|
|
|
# Print summary
|
|
print(f"\n{'=' * 60}")
|
|
print(f"✓ Detection complete!")
|
|
print(f" Total frames processed: {frame_count}")
|
|
print(f" Total objects detected: {total_detections}")
|
|
print(f" Average objects per frame: {total_detections/frame_count:.2f}")
|
|
print(f" Total processing time: {processing_time:.2f} seconds")
|
|
print(f" Average time per frame: {processing_time/frame_count:.3f} seconds")
|
|
print(f" Auto-saves performed: {auto_save_count}")
|
|
print(f" Results saved to: {output_file}")
|
|
print(f" File size: {os.path.getsize(output_file) / 1024:.2f} KB")
|
|
print("=" * 60)
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Video YOLO Object Prescan - Pre-process video for object detection',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Default auto-save every 30 seconds
|
|
python %(prog)s video.mp4 yolov8n.pt
|
|
|
|
# Auto-save every 60 seconds
|
|
python %(prog)s video.mp4 yolov8n.pt --save-interval 60
|
|
|
|
# Auto-save every 15 seconds (for long videos)
|
|
python %(prog)s video.mp4 yolov8n.pt --save-interval 15
|
|
|
|
Features:
|
|
- Press Ctrl+C to pause and save progress
|
|
- Run again to resume from last checkpoint
|
|
- Auto-save at configurable intervals (default: 30 seconds)
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('video_path', help='Path to video file')
|
|
parser.add_argument('model_path', help='Path to YOLO model file')
|
|
parser.add_argument('--save-interval', type=int, default=30,
|
|
help='Auto-save interval in seconds (default: 30)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Generate output filename
|
|
video_dir = os.path.dirname(args.video_path)
|
|
video_name = os.path.splitext(os.path.basename(args.video_path))[0]
|
|
output_file = os.path.join(video_dir, f"{video_name}.yolo.json")
|
|
|
|
# Validate save interval
|
|
if args.save_interval < 5:
|
|
print("Warning: Save interval too small (minimum 5 seconds). Using 5 seconds.")
|
|
save_interval = 5
|
|
elif args.save_interval > 300:
|
|
print("Warning: Save interval too large (maximum 300 seconds). Using 300 seconds.")
|
|
save_interval = 300
|
|
else:
|
|
save_interval = args.save_interval
|
|
|
|
# Run prescan
|
|
success = prescan_video(args.video_path, args.model_path, output_file, save_interval)
|
|
|
|
if not success:
|
|
print("\n✗ Error during detection processing")
|
|
sys.exit(1)
|
|
|
|
print("\n✓ Video YOLO prescan completed successfully!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|