Files
video_probe/video_probe.py
accusys f3e2d2dca7 Initial implementation of video_probe (Rust)
Core modules:
- probe.rs: ffprobe execution logic
- parser.rs: JSON parsing logic
- output.rs: Output formatting
- lib.rs: Library interface
- main.rs: CLI entry point

Features:
- Extract video metadata using ffprobe
- Parse video/audio/subtitle streams
- Save to JSON file
- Console summary output

Documentation:
- Added QUICKSTART.md
- Added ENVIRONMENT_SETUP_REPORT.md
2026-03-07 10:10:19 +08:00

201 lines
7.5 KiB
Python

#!/usr/bin/env python3
"""
Video Probe - Extract video metadata using ffprobe
Saves metadata to .probe.json file
Usage:
python video_probe.py <video_path>
"""
import subprocess
import json
import sys
import os
from datetime import datetime
def probe_video(video_path):
"""Extract video metadata using ffprobe"""
if not os.path.exists(video_path):
print(f"Error: Video file not found: {video_path}")
return None
# ffprobe command to get all streams and format info in JSON
cmd = [
'ffprobe',
'-v', 'quiet',
'-print_format', 'json',
'-show_format',
'-show_streams',
video_path
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
probe_data = json.loads(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Error running ffprobe: {e}")
return None
except json.JSONDecodeError as e:
print(f"Error parsing ffprobe output: {e}")
return None
# Extract relevant information
metadata = {
"video_path": os.path.abspath(video_path),
"probed_at": datetime.now().isoformat(),
"format": {},
"video_stream": None,
"audio_streams": [],
"subtitle_streams": [],
"other_streams": []
}
# Format information
if 'format' in probe_data:
fmt = probe_data['format']
metadata['format'] = {
"filename": fmt.get('filename'),
"format_name": fmt.get('format_name'),
"format_long_name": fmt.get('format_long_name'),
"duration": float(fmt.get('duration', 0)),
"size": int(fmt.get('size', 0)),
"bit_rate": int(fmt.get('bit_rate', 0)),
"probe_score": fmt.get('probe_score'),
"tags": fmt.get('tags', {})
}
# Stream information
if 'streams' in probe_data:
for stream in probe_data['streams']:
codec_type = stream.get('codec_type')
if codec_type == 'video':
# Find the main video stream (usually first one)
if metadata['video_stream'] is None:
metadata['video_stream'] = {
"index": stream.get('index'),
"codec_name": stream.get('codec_name'),
"codec_long_name": stream.get('codec_long_name'),
"profile": stream.get('profile'),
"level": stream.get('level'),
"width": stream.get('width'),
"height": stream.get('height'),
"coded_width": stream.get('coded_width'),
"coded_height": stream.get('coded_height'),
"aspect_ratio": stream.get('display_aspect_ratio'),
"pix_fmt": stream.get('pix_fmt'),
"field_order": stream.get('field_order'),
"r_frame_rate": stream.get('r_frame_rate'),
"avg_frame_rate": stream.get('avg_frame_rate'),
"time_base": stream.get('time_base'),
"start_pts": stream.get('start_pts'),
"start_time": float(stream.get('start_time', 0)),
"duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
"bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
"nb_frames": int(stream.get('nb_frames', 0)) if 'nb_frames' in stream else None,
"tags": stream.get('tags', {})
}
elif codec_type == 'audio':
metadata['audio_streams'].append({
"index": stream.get('index'),
"codec_name": stream.get('codec_name'),
"codec_long_name": stream.get('codec_long_name'),
"profile": stream.get('profile'),
"channels": stream.get('channels'),
"channel_layout": stream.get('channel_layout'),
"sample_rate": stream.get('sample_rate'),
"sample_fmt": stream.get('sample_fmt'),
"bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
"duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
"tags": stream.get('tags', {})
})
elif codec_type == 'subtitle':
metadata['subtitle_streams'].append({
"index": stream.get('index'),
"codec_name": stream.get('codec_name'),
"language": stream.get('tags', {}).get('language'),
"tags": stream.get('tags', {})
})
else:
metadata['other_streams'].append({
"index": stream.get('index'),
"codec_type": codec_type,
"codec_name": stream.get('codec_name'),
"tags": stream.get('tags', {})
})
return metadata
def save_probe_metadata(video_path, metadata):
"""Save probe metadata to JSON file"""
video_dir = os.path.dirname(video_path)
video_name = os.path.splitext(os.path.basename(video_path))[0]
output_file = os.path.join(video_dir, f"{video_name}.probe.json")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
return output_file
def main():
if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <video_path>")
print(f"Example: python {sys.argv[0]} video.mp4")
sys.exit(1)
video_path = sys.argv[1]
print(f"Probing video: {video_path}")
print("=" * 60)
# Probe video
metadata = probe_video(video_path)
if metadata is None:
print("Failed to probe video")
sys.exit(1)
# Save to JSON
output_file = save_probe_metadata(video_path, metadata)
# Print summary
print(f"\n✓ Video probed successfully!")
print(f"\nFile: {metadata['format'].get('filename')}")
print(f"Format: {metadata['format'].get('format_long_name')}")
print(f"Duration: {metadata['format'].get('duration', 0):.2f} seconds")
print(f"Size: {metadata['format'].get('size', 0) / 1024 / 1024:.2f} MB")
print(f"Bit rate: {metadata['format'].get('bit_rate', 0) / 1000:.0f} kbps")
if metadata['video_stream']:
vs = metadata['video_stream']
print(f"\nVideo Stream:")
print(f" Codec: {vs.get('codec_name')} ({vs.get('profile')})")
print(f" Resolution: {vs.get('width')}x{vs.get('height')}")
print(f" Frame rate: {vs.get('r_frame_rate')}")
print(f" Pixel format: {vs.get('pix_fmt')}")
if metadata['audio_streams']:
print(f"\nAudio Streams: {len(metadata['audio_streams'])}")
for i, audio in enumerate(metadata['audio_streams'], 1):
print(f" [{i}] {audio.get('codec_name')} - {audio.get('channels')} channels @ {audio.get('sample_rate')} Hz")
if metadata['subtitle_streams']:
print(f"\nSubtitle Streams: {len(metadata['subtitle_streams'])}")
for i, sub in enumerate(metadata['subtitle_streams'], 1):
print(f" [{i}] {sub.get('codec_name')} ({sub.get('language')})")
print(f"\n✓ Metadata saved to: {output_file}")
print("=" * 60)
if __name__ == "__main__":
main()