video_probe/video_probe.py

#!/usr/bin/env python3
"""
Video Probe - Extract video metadata using ffprobe
Saves metadata to .probe.json file

Usage:
    python video_probe.py <video_path>
"""

import subprocess
import json
import sys
import os
from datetime import datetime


def probe_video(video_path):
    """Extract video metadata using ffprobe"""

    if not os.path.exists(video_path):
        print(f"Error: Video file not found: {video_path}")
        return None

    # ffprobe command to get all streams and format info in JSON
    cmd = [
        'ffprobe',
        '-v', 'quiet',
        '-print_format', 'json',
        '-show_format',
        '-show_streams',
        video_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        probe_data = json.loads(result.stdout)
    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing ffprobe output: {e}")
        return None

    # Extract relevant information
    metadata = {
        "video_path": os.path.abspath(video_path),
        "probed_at": datetime.now().isoformat(),
        "format": {},
        "video_stream": None,
        "audio_streams": [],
        "subtitle_streams": [],
        "other_streams": []
    }

    # Format information
    if 'format' in probe_data:
        fmt = probe_data['format']
        metadata['format'] = {
            "filename": fmt.get('filename'),
            "format_name": fmt.get('format_name'),
            "format_long_name": fmt.get('format_long_name'),
            "duration": float(fmt.get('duration', 0)),
            "size": int(fmt.get('size', 0)),
            "bit_rate": int(fmt.get('bit_rate', 0)),
            "probe_score": fmt.get('probe_score'),
            "tags": fmt.get('tags', {})
        }

    # Stream information
    if 'streams' in probe_data:
        for stream in probe_data['streams']:
            codec_type = stream.get('codec_type')

            if codec_type == 'video':
                # Find the main video stream (usually first one)
                if metadata['video_stream'] is None:
                    metadata['video_stream'] = {
                        "index": stream.get('index'),
                        "codec_name": stream.get('codec_name'),
                        "codec_long_name": stream.get('codec_long_name'),
                        "profile": stream.get('profile'),
                        "level": stream.get('level'),
                        "width": stream.get('width'),
                        "height": stream.get('height'),
                        "coded_width": stream.get('coded_width'),
                        "coded_height": stream.get('coded_height'),
                        "aspect_ratio": stream.get('display_aspect_ratio'),
                        "pix_fmt": stream.get('pix_fmt'),
                        "field_order": stream.get('field_order'),
                        "r_frame_rate": stream.get('r_frame_rate'),
                        "avg_frame_rate": stream.get('avg_frame_rate'),
                        "time_base": stream.get('time_base'),
                        "start_pts": stream.get('start_pts'),
                        "start_time": float(stream.get('start_time', 0)),
                        "duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
                        "bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
                        "nb_frames": int(stream.get('nb_frames', 0)) if 'nb_frames' in stream else None,
                        "tags": stream.get('tags', {})
                    }

            elif codec_type == 'audio':
                metadata['audio_streams'].append({
                    "index": stream.get('index'),
                    "codec_name": stream.get('codec_name'),
                    "codec_long_name": stream.get('codec_long_name'),
                    "profile": stream.get('profile'),
                    "channels": stream.get('channels'),
                    "channel_layout": stream.get('channel_layout'),
                    "sample_rate": stream.get('sample_rate'),
                    "sample_fmt": stream.get('sample_fmt'),
                    "bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
                    "duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
                    "tags": stream.get('tags', {})
                })

            elif codec_type == 'subtitle':
                metadata['subtitle_streams'].append({
                    "index": stream.get('index'),
                    "codec_name": stream.get('codec_name'),
                    "language": stream.get('tags', {}).get('language'),
                    "tags": stream.get('tags', {})
                })

            else:
                metadata['other_streams'].append({
                    "index": stream.get('index'),
                    "codec_type": codec_type,
                    "codec_name": stream.get('codec_name'),
                    "tags": stream.get('tags', {})
                })

    return metadata


def save_probe_metadata(video_path, metadata):
    """Save probe metadata to JSON file"""

    video_dir = os.path.dirname(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    output_file = os.path.join(video_dir, f"{video_name}.probe.json")

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)

    return output_file


def main():
    if len(sys.argv) < 2:
        print(f"Usage: python {sys.argv[0]} <video_path>")
        print(f"Example: python {sys.argv[0]} video.mp4")
        sys.exit(1)

    video_path = sys.argv[1]

    print(f"Probing video: {video_path}")
    print("=" * 60)

    # Probe video
    metadata = probe_video(video_path)

    if metadata is None:
        print("Failed to probe video")
        sys.exit(1)

    # Save to JSON
    output_file = save_probe_metadata(video_path, metadata)

    # Print summary
    print(f"\n✓ Video probed successfully!")
    print(f"\nFile: {metadata['format'].get('filename')}")
    print(f"Format: {metadata['format'].get('format_long_name')}")
    print(f"Duration: {metadata['format'].get('duration', 0):.2f} seconds")
    print(f"Size: {metadata['format'].get('size', 0) / 1024 / 1024:.2f} MB")
    print(f"Bit rate: {metadata['format'].get('bit_rate', 0) / 1000:.0f} kbps")

    if metadata['video_stream']:
        vs = metadata['video_stream']
        print(f"\nVideo Stream:")
        print(f"  Codec: {vs.get('codec_name')} ({vs.get('profile')})")
        print(f"  Resolution: {vs.get('width')}x{vs.get('height')}")
        print(f"  Frame rate: {vs.get('r_frame_rate')}")
        print(f"  Pixel format: {vs.get('pix_fmt')}")

    if metadata['audio_streams']:
        print(f"\nAudio Streams: {len(metadata['audio_streams'])}")
        for i, audio in enumerate(metadata['audio_streams'], 1):
            print(f"  [{i}] {audio.get('codec_name')} - {audio.get('channels')} channels @ {audio.get('sample_rate')} Hz")

    if metadata['subtitle_streams']:
        print(f"\nSubtitle Streams: {len(metadata['subtitle_streams'])}")
        for i, sub in enumerate(metadata['subtitle_streams'], 1):
            print(f"  [{i}] {sub.get('codec_name')} ({sub.get('language')})")

    print(f"\n✓ Metadata saved to: {output_file}")
    print("=" * 60)


if __name__ == "__main__":
    main()