Initial implementation of video_probe (Rust)

Core modules: - probe.rs: ffprobe execution logic - parser.rs: JSON parsing logic - output.rs: Output formatting - lib.rs: Library interface - main.rs: CLI entry point Features: - Extract video metadata using ffprobe - Parse video/audio/subtitle streams - Save to JSON file - Console summary output Documentation: - Added QUICKSTART.md - Added ENVIRONMENT_SETUP_REPORT.md
2026-03-07 10:10:19 +08:00
commit f3e2d2dca7
464 changed files with 125611 additions and 0 deletions
--- a/video_yolo_object_prescan.py
+++ b/video_yolo_object_prescan.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+"""
+Video YOLO Object Prescan - Pre-process video for object detection
+Saves detection results to .yolo.json file
+
+Features:
+- Pause/Resume: Press Ctrl+C to pause and save progress
+- Resume from checkpoint: Automatically continues from last frame
+- Auto-save: Configurable auto-save interval (default: 30 seconds)
+
+Usage:
+    python video_yolo_object_prescan.py <video_path> <yolo_model_path> [--save-interval SECONDS]
+
+Examples:
+    # Default auto-save every 30 seconds
+    python video_yolo_object_prescan.py video.mp4 yolov8n.pt
+    
+    # Auto-save every 60 seconds
+    python video_yolo_object_prescan.py video.mp4 yolov8n.pt --save-interval 60
+    
+    # Auto-save every 15 seconds (for long videos)
+    python video_yolo_object_prescan.py video.mp4 yolov8n.pt --save-interval 15
+"""
+
+import cv2
+import sys
+import os
+import json
+import time
+import signal
+import argparse
+from datetime import datetime
+from ultralytics import YOLO
+
+
+YOLO_NAMES = [
+    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
+    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
+    "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
+    "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
+    "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
+    "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
+    "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
+    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
+    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
+]
+
+
+def format_time(seconds):
+    """Format seconds to HH:MM:SS"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+
+
+def get_detections_list(result):
+    """Extract detection info as list of dicts"""
+    detections = []
+    
+    if result.boxes is None:
+        return detections
+    
+    boxes = result.boxes.xyxy.cpu().numpy()
+    confidences = result.boxes.conf.cpu().numpy()
+    class_ids = result.boxes.cls.cpu().numpy().astype(int)
+    
+    for box, conf, class_id in zip(boxes, confidences, class_ids):
+        x1, y1, x2, y2 = box
+        class_name = YOLO_NAMES[class_id] if class_id < len(YOLO_NAMES) else "unknown"
+        
+        detections.append({
+            'class_id': int(class_id),
+            'class_name': class_name,
+            'confidence': float(conf),
+            'x1': float(x1),
+            'y1': float(y1),
+            'x2': float(x2),
+            'y2': float(y2)
+        })
+    
+    return detections
+
+
+def load_existing_data(output_file):
+    """Load existing detection data from file"""
+    if not os.path.exists(output_file):
+        return None, 0
+    
+    try:
+        with open(output_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        
+        # Get last processed frame number
+        frames = data.get('frames', {})
+        if frames:
+            last_frame = max(int(k) for k in frames.keys())
+            return data, last_frame
+    except Exception as e:
+        print(f"Warning: Could not load existing file: {e}")
+    
+    return None, 0
+
+
+def save_detection_data(output_file, detection_data, is_interrupted=False, silent=False):
+    """Save detection data to JSON file"""
+    try:
+        # Update metadata
+        if 'metadata' in detection_data:
+            detection_data['metadata']['last_saved_at'] = datetime.now().isoformat()
+            if is_interrupted:
+                detection_data['metadata']['status'] = 'interrupted'
+            else:
+                detection_data['metadata']['status'] = 'in_progress'
+        
+        # Write to file
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(detection_data, f, indent=2, ensure_ascii=False)
+        
+        if not silent:
+            return True, os.path.getsize(output_file)
+        return True, 0
+    except Exception as e:
+        print(f"Error saving data: {e}")
+        return False, 0
+
+
+# Global variables for signal handling
+detection_data_global = None
+output_file_global = None
+frame_count_global = 0
+total_frames_global = 0
+start_time_global = None
+
+
+def signal_handler(signum, frame):
+    """Handle Ctrl+C to pause and save progress"""
+    global detection_data_global, output_file_global, frame_count_global
+    global total_frames_global, start_time_global
+    
+    print(f"\n\n{'=' * 60}")
+    print("⏸  PAUSED - Saving progress...")
+    print(f"{'=' * 60}")
+    
+    if detection_data_global and output_file_global:
+        # Calculate stats
+        elapsed = time.time() - start_time_global if start_time_global else 0
+        total_detections = sum(
+            len(f.get('detections', [])) 
+            for f in detection_data_global.get('frames', {}).values()
+        )
+        
+        # Update metadata
+        detection_data_global['metadata']['processing_time'] = elapsed
+        detection_data_global['metadata']['total_detections'] = total_detections
+        detection_data_global['metadata']['avg_detections_per_frame'] = (
+            round(total_detections / frame_count_global, 2) if frame_count_global > 0 else 0
+        )
+        detection_data_global['metadata']['avg_time_per_frame'] = (
+            round(elapsed / frame_count_global, 3) if frame_count_global > 0 else 0
+        )
+        
+        # Save data
+        success, _ = save_detection_data(output_file_global, detection_data_global, is_interrupted=True)
+        if success:
+            print(f"✓ Progress saved to: {output_file_global}")
+            print(f"  Frames processed: {frame_count_global}/{total_frames_global}")
+            print(f"  Total detections: {total_detections}")
+            print(f"  Elapsed time: {elapsed:.1f}s")
+            print(f"\n💡 Run the same command again to resume from frame {frame_count_global + 1}")
+    
+    print(f"{'=' * 60}\n")
+    sys.exit(0)
+
+
+def prescan_video(video_path, model_path, output_file, save_interval=30):
+    """Process video and save detection results to JSON file"""
+    
+    global detection_data_global, output_file_global, frame_count_global
+    global total_frames_global, start_time_global
+    
+    if not os.path.exists(video_path):
+        print(f"Error: Video file not found: {video_path}")
+        return False
+    
+    if not os.path.exists(model_path):
+        print(f"Error: YOLO model not found: {model_path}")
+        return False
+    
+    # Set up signal handler
+    signal.signal(signal.SIGINT, signal_handler)
+    
+    # Check for existing data (resume support)
+    existing_data, last_processed_frame = load_existing_data(output_file)
+    resume_mode = existing_data is not None and last_processed_frame > 0
+    
+    if resume_mode:
+        print(f"\n{'=' * 60}")
+        print(f"📂 Found existing data: {output_file}")
+        print(f"   Last processed frame: {last_processed_frame}")
+        print(f"{'=' * 60}")
+        
+        response = input("\nResume from last checkpoint? (Y/n): ").strip().lower()
+        if response == 'n':
+            print("Starting from beginning...")
+            resume_mode = False
+            existing_data = None
+            last_processed_frame = 0
+        else:
+            print("Resuming from checkpoint...")
+    
+    start_time = time.time()
+    start_time_global = start_time
+    
+    # Load YOLO model
+    print(f"\nLoading YOLO model from: {model_path}")
+    model = YOLO(model_path)
+    print("✓ Model loaded successfully")
+    
+    # Open video
+    cap = cv2.VideoCapture(video_path)
+    
+    if not cap.isOpened():
+        print(f"Error: Cannot open video: {video_path}")
+        return False
+    
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    total_duration = total_frames / fps if fps > 0 else 0
+    
+    total_frames_global = total_frames
+    
+    print(f"\nVideo Info:")
+    print(f"  Path: {video_path}")
+    print(f"  Resolution: {width}x{height}")
+    print(f"  FPS: {fps:.2f}")
+    print(f"  Total frames: {total_frames}")
+    print(f"  Duration: {total_duration:.1f}s ({format_time(total_duration)})")
+    
+    if resume_mode:
+        print(f"  Resume from: frame {last_processed_frame + 1}")
+    
+    print(f"\nOutput: {output_file}")
+    print(f"Auto-save interval: {save_interval} seconds")
+    print("=" * 60)
+    
+    # Initialize or load detection data
+    if resume_mode and existing_data:
+        detection_data = existing_data
+        frame_count = last_processed_frame
+        total_detections = sum(
+            len(f.get('detections', [])) 
+            for f in detection_data.get('frames', {}).values()
+        )
+        
+        # Seek to resume position
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
+    else:
+        # Initialize new detection data
+        detection_data = {
+            "metadata": {
+                "video_path": os.path.abspath(video_path),
+                "model_path": os.path.abspath(model_path),
+                "width": width,
+                "height": height,
+                "fps": fps,
+                "total_frames": total_frames,
+                "total_duration": total_duration,
+                "processed_at": datetime.now().isoformat(),
+                "auto_save_interval": save_interval,
+                "status": "in_progress"
+            },
+            "frames": {}
+        }
+        frame_count = 0
+        total_detections = 0
+    
+    # Set global variables for signal handler
+    detection_data_global = detection_data
+    output_file_global = output_file
+    frame_count_global = frame_count
+    
+    print(f"\n{'Resuming' if resume_mode else 'Starting'} video processing...")
+    print("💡 Press Ctrl+C to pause and save progress\n")
+    
+    # Process frames
+    last_save_time = time.time()
+    auto_save_count = 0
+    
+    while True:
+        ret, frame = cap.read()
+        
+        if not ret:
+            break
+        
+        frame_count += 1
+        frame_count_global = frame_count
+        current_time = frame_count / fps if fps > 0 else 0
+        
+        # Run YOLO detection
+        results = model(frame, verbose=False)
+        result = results[0]
+        detections = get_detections_list(result)
+        
+        total_detections += len(detections)
+        
+        # Store detection data
+        detection_data["frames"][str(frame_count)] = {
+            "frame_number": frame_count,
+            "time_seconds": round(current_time, 3),
+            "time_formatted": format_time(current_time),
+            "detections": detections
+        }
+        
+        # Progress indicator
+        if frame_count % 100 == 0:
+            elapsed = time.time() - start_time
+            progress = (frame_count / total_frames) * 100
+            eta = (elapsed / frame_count) * (total_frames - frame_count) if frame_count > 0 else 0
+            print(f"  Progress: {frame_count}/{total_frames} frames ({progress:.1f}%) - "
+                  f"{len(detections)} objects - Elapsed: {elapsed:.1f}s, ETA: {eta:.1f}s")
+        
+        # Auto-save every save_interval seconds
+        current_time_val = time.time()
+        if current_time_val - last_save_time >= save_interval:
+            success, file_size = save_detection_data(output_file, detection_data, is_interrupted=False, silent=True)
+            if success:
+                auto_save_count += 1
+                elapsed = time.time() - start_time
+                progress = (frame_count / total_frames) * 100
+                print(f"  💾 Auto-saved (#{auto_save_count}): {frame_count}/{total_frames} frames ({progress:.1f}%) - "
+                      f"Size: {file_size / 1024:.1f} KB - Elapsed: {elapsed:.1f}s")
+            last_save_time = current_time_val
+    
+    cap.release()
+    processing_time = time.time() - start_time
+    
+    # Update final metadata
+    detection_data["metadata"]["processing_time"] = processing_time
+    detection_data["metadata"]["total_detections"] = total_detections
+    detection_data["metadata"]["avg_detections_per_frame"] = (
+        round(total_detections / frame_count, 2) if frame_count > 0 else 0
+    )
+    detection_data["metadata"]["avg_time_per_frame"] = (
+        round(processing_time / frame_count, 3) if frame_count > 0 else 0
+    )
+    detection_data["metadata"]["completed_at"] = datetime.now().isoformat()
+    detection_data["metadata"]["status"] = "completed"
+    detection_data["metadata"]["auto_save_count"] = auto_save_count
+    
+    # Save final data
+    save_detection_data(output_file, detection_data, is_interrupted=False)
+    
+    # Print summary
+    print(f"\n{'=' * 60}")
+    print(f"✓ Detection complete!")
+    print(f"  Total frames processed: {frame_count}")
+    print(f"  Total objects detected: {total_detections}")
+    print(f"  Average objects per frame: {total_detections/frame_count:.2f}")
+    print(f"  Total processing time: {processing_time:.2f} seconds")
+    print(f"  Average time per frame: {processing_time/frame_count:.3f} seconds")
+    print(f"  Auto-saves performed: {auto_save_count}")
+    print(f"  Results saved to: {output_file}")
+    print(f"  File size: {os.path.getsize(output_file) / 1024:.2f} KB")
+    print("=" * 60)
+    
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Video YOLO Object Prescan - Pre-process video for object detection',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Default auto-save every 30 seconds
+  python %(prog)s video.mp4 yolov8n.pt
+  
+  # Auto-save every 60 seconds
+  python %(prog)s video.mp4 yolov8n.pt --save-interval 60
+  
+  # Auto-save every 15 seconds (for long videos)
+  python %(prog)s video.mp4 yolov8n.pt --save-interval 15
+
+Features:
+  - Press Ctrl+C to pause and save progress
+  - Run again to resume from last checkpoint
+  - Auto-save at configurable intervals (default: 30 seconds)
+        """
+    )
+    
+    parser.add_argument('video_path', help='Path to video file')
+    parser.add_argument('model_path', help='Path to YOLO model file')
+    parser.add_argument('--save-interval', type=int, default=30,
+                        help='Auto-save interval in seconds (default: 30)')
+    
+    args = parser.parse_args()
+    
+    # Generate output filename
+    video_dir = os.path.dirname(args.video_path)
+    video_name = os.path.splitext(os.path.basename(args.video_path))[0]
+    output_file = os.path.join(video_dir, f"{video_name}.yolo.json")
+    
+    # Validate save interval
+    if args.save_interval < 5:
+        print("Warning: Save interval too small (minimum 5 seconds). Using 5 seconds.")
+        save_interval = 5
+    elif args.save_interval > 300:
+        print("Warning: Save interval too large (maximum 300 seconds). Using 300 seconds.")
+        save_interval = 300
+    else:
+        save_interval = args.save_interval
+    
+    # Run prescan
+    success = prescan_video(args.video_path, args.model_path, output_file, save_interval)
+    
+    if not success:
+        print("\n✗ Error during detection processing")
+        sys.exit(1)
+    
+    print("\n✓ Video YOLO prescan completed successfully!")
+
+
+if __name__ == "__main__":
+    main()