Core modules: - probe.rs: ffprobe execution logic - parser.rs: JSON parsing logic - output.rs: Output formatting - lib.rs: Library interface - main.rs: CLI entry point Features: - Extract video metadata using ffprobe - Parse video/audio/subtitle streams - Save to JSON file - Console summary output Documentation: - Added QUICKSTART.md - Added ENVIRONMENT_SETUP_REPORT.md
201 lines
7.5 KiB
Python
201 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Video Probe - Extract video metadata using ffprobe
|
|
Saves metadata to .probe.json file
|
|
|
|
Usage:
|
|
python video_probe.py <video_path>
|
|
"""
|
|
|
|
import subprocess
|
|
import json
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
|
|
|
|
def probe_video(video_path):
|
|
"""Extract video metadata using ffprobe"""
|
|
|
|
if not os.path.exists(video_path):
|
|
print(f"Error: Video file not found: {video_path}")
|
|
return None
|
|
|
|
# ffprobe command to get all streams and format info in JSON
|
|
cmd = [
|
|
'ffprobe',
|
|
'-v', 'quiet',
|
|
'-print_format', 'json',
|
|
'-show_format',
|
|
'-show_streams',
|
|
video_path
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
probe_data = json.loads(result.stdout)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error running ffprobe: {e}")
|
|
return None
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error parsing ffprobe output: {e}")
|
|
return None
|
|
|
|
# Extract relevant information
|
|
metadata = {
|
|
"video_path": os.path.abspath(video_path),
|
|
"probed_at": datetime.now().isoformat(),
|
|
"format": {},
|
|
"video_stream": None,
|
|
"audio_streams": [],
|
|
"subtitle_streams": [],
|
|
"other_streams": []
|
|
}
|
|
|
|
# Format information
|
|
if 'format' in probe_data:
|
|
fmt = probe_data['format']
|
|
metadata['format'] = {
|
|
"filename": fmt.get('filename'),
|
|
"format_name": fmt.get('format_name'),
|
|
"format_long_name": fmt.get('format_long_name'),
|
|
"duration": float(fmt.get('duration', 0)),
|
|
"size": int(fmt.get('size', 0)),
|
|
"bit_rate": int(fmt.get('bit_rate', 0)),
|
|
"probe_score": fmt.get('probe_score'),
|
|
"tags": fmt.get('tags', {})
|
|
}
|
|
|
|
# Stream information
|
|
if 'streams' in probe_data:
|
|
for stream in probe_data['streams']:
|
|
codec_type = stream.get('codec_type')
|
|
|
|
if codec_type == 'video':
|
|
# Find the main video stream (usually first one)
|
|
if metadata['video_stream'] is None:
|
|
metadata['video_stream'] = {
|
|
"index": stream.get('index'),
|
|
"codec_name": stream.get('codec_name'),
|
|
"codec_long_name": stream.get('codec_long_name'),
|
|
"profile": stream.get('profile'),
|
|
"level": stream.get('level'),
|
|
"width": stream.get('width'),
|
|
"height": stream.get('height'),
|
|
"coded_width": stream.get('coded_width'),
|
|
"coded_height": stream.get('coded_height'),
|
|
"aspect_ratio": stream.get('display_aspect_ratio'),
|
|
"pix_fmt": stream.get('pix_fmt'),
|
|
"field_order": stream.get('field_order'),
|
|
"r_frame_rate": stream.get('r_frame_rate'),
|
|
"avg_frame_rate": stream.get('avg_frame_rate'),
|
|
"time_base": stream.get('time_base'),
|
|
"start_pts": stream.get('start_pts'),
|
|
"start_time": float(stream.get('start_time', 0)),
|
|
"duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
|
|
"bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
|
|
"nb_frames": int(stream.get('nb_frames', 0)) if 'nb_frames' in stream else None,
|
|
"tags": stream.get('tags', {})
|
|
}
|
|
|
|
elif codec_type == 'audio':
|
|
metadata['audio_streams'].append({
|
|
"index": stream.get('index'),
|
|
"codec_name": stream.get('codec_name'),
|
|
"codec_long_name": stream.get('codec_long_name'),
|
|
"profile": stream.get('profile'),
|
|
"channels": stream.get('channels'),
|
|
"channel_layout": stream.get('channel_layout'),
|
|
"sample_rate": stream.get('sample_rate'),
|
|
"sample_fmt": stream.get('sample_fmt'),
|
|
"bit_rate": int(stream.get('bit_rate', 0)) if 'bit_rate' in stream else None,
|
|
"duration": float(stream.get('duration', 0)) if 'duration' in stream else None,
|
|
"tags": stream.get('tags', {})
|
|
})
|
|
|
|
elif codec_type == 'subtitle':
|
|
metadata['subtitle_streams'].append({
|
|
"index": stream.get('index'),
|
|
"codec_name": stream.get('codec_name'),
|
|
"language": stream.get('tags', {}).get('language'),
|
|
"tags": stream.get('tags', {})
|
|
})
|
|
|
|
else:
|
|
metadata['other_streams'].append({
|
|
"index": stream.get('index'),
|
|
"codec_type": codec_type,
|
|
"codec_name": stream.get('codec_name'),
|
|
"tags": stream.get('tags', {})
|
|
})
|
|
|
|
return metadata
|
|
|
|
|
|
def save_probe_metadata(video_path, metadata):
|
|
"""Save probe metadata to JSON file"""
|
|
|
|
video_dir = os.path.dirname(video_path)
|
|
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
|
output_file = os.path.join(video_dir, f"{video_name}.probe.json")
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
|
|
|
return output_file
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: python {sys.argv[0]} <video_path>")
|
|
print(f"Example: python {sys.argv[0]} video.mp4")
|
|
sys.exit(1)
|
|
|
|
video_path = sys.argv[1]
|
|
|
|
print(f"Probing video: {video_path}")
|
|
print("=" * 60)
|
|
|
|
# Probe video
|
|
metadata = probe_video(video_path)
|
|
|
|
if metadata is None:
|
|
print("Failed to probe video")
|
|
sys.exit(1)
|
|
|
|
# Save to JSON
|
|
output_file = save_probe_metadata(video_path, metadata)
|
|
|
|
# Print summary
|
|
print(f"\n✓ Video probed successfully!")
|
|
print(f"\nFile: {metadata['format'].get('filename')}")
|
|
print(f"Format: {metadata['format'].get('format_long_name')}")
|
|
print(f"Duration: {metadata['format'].get('duration', 0):.2f} seconds")
|
|
print(f"Size: {metadata['format'].get('size', 0) / 1024 / 1024:.2f} MB")
|
|
print(f"Bit rate: {metadata['format'].get('bit_rate', 0) / 1000:.0f} kbps")
|
|
|
|
if metadata['video_stream']:
|
|
vs = metadata['video_stream']
|
|
print(f"\nVideo Stream:")
|
|
print(f" Codec: {vs.get('codec_name')} ({vs.get('profile')})")
|
|
print(f" Resolution: {vs.get('width')}x{vs.get('height')}")
|
|
print(f" Frame rate: {vs.get('r_frame_rate')}")
|
|
print(f" Pixel format: {vs.get('pix_fmt')}")
|
|
|
|
if metadata['audio_streams']:
|
|
print(f"\nAudio Streams: {len(metadata['audio_streams'])}")
|
|
for i, audio in enumerate(metadata['audio_streams'], 1):
|
|
print(f" [{i}] {audio.get('codec_name')} - {audio.get('channels')} channels @ {audio.get('sample_rate')} Hz")
|
|
|
|
if metadata['subtitle_streams']:
|
|
print(f"\nSubtitle Streams: {len(metadata['subtitle_streams'])}")
|
|
for i, sub in enumerate(metadata['subtitle_streams'], 1):
|
|
print(f" [{i}] {sub.get('codec_name')} ({sub.get('language')})")
|
|
|
|
print(f"\n✓ Metadata saved to: {output_file}")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|