feat(player): implement SDL2 video playback with FFmpeg decoder

This commit is contained in:
accusys
2026-03-19 01:23:27 +08:00
parent 2d871a62c2
commit 0b75987fd0
10 changed files with 293 additions and 233 deletions

View File

@@ -19,16 +19,30 @@ pub struct Config {
#[arg(short = 'y', long = "yolo", help = "YOLO JSON file path")]
pub yolo: Option<PathBuf>,
#[arg(short = 'w', long = "width", default_value = "1280", help = "Window width")]
#[arg(
short = 'w',
long = "width",
default_value = "1280",
help = "Window width"
)]
pub width: u32,
#[arg(short = 'h', long = "height", default_value = "720", help = "Window height")]
#[arg(
short = 'h',
long = "height",
default_value = "720",
help = "Window height"
)]
pub height: u32,
#[arg(long = "fullscreen", help = "Start in fullscreen mode")]
pub fullscreen: bool,
#[arg(long = "locale", default_value = "en", help = "UI language (en, zh-TW, etc.)")]
#[arg(
long = "locale",
default_value = "en",
help = "UI language (en, zh-TW, etc.)"
)]
pub locale: String,
}

View File

@@ -3,8 +3,8 @@
//! Unified media player with ASR/YOLO/Chunks overlay support
use anyhow::Result;
use clap::Parser;
use log::{error, info};
use sdl2::pixels::PixelFormatEnum;
use std::path::Path;
mod config;
@@ -14,15 +14,13 @@ mod web;
use config::Config;
use overlay::{AsrLoader, YoloLoader};
use player::{Video, Renderer, PlaybackState};
use player::state::PlayerState;
use player::ffmpeg::FFmpegDecoder;
use player::state::{PlaybackState, PlayerState};
fn main() -> Result<()> {
env_logger::Builder::from_env(
env_logger::Env::default().default_filter_or("info")
).init();
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let config = Config::load();
let config = Config::load()?;
info!("MoMentry Playground starting...");
info!("Window: {}x{}", config.width, config.height);
@@ -35,19 +33,46 @@ fn main() -> Result<()> {
}
fn run(config: &Config) -> Result<()> {
let mut video = Video::new();
let mut renderer = Renderer::new("MoMentry Playground", config.width, config.height)?;
let sdl_context = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
let video_subsystem = sdl_context
.video()
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
let window = video_subsystem
.window("MoMentry Playground", config.width, config.height)
.position_centered()
.build()
.map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
let mut canvas = window
.into_canvas()
.build()
.map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
let texture_creator = canvas.texture_creator();
let mut decoder: Option<FFmpegDecoder> = None;
let mut texture: Option<sdl2::render::Texture> = None;
let mut asr: Option<AsrLoader> = None;
let mut yolo: Option<YoloLoader> = None;
if let Some(ref video_path) = config.video {
info!("Loading video: {:?}", video_path);
let info_data = video.open(video_path)?;
info!("Video info: {}x{} @ {:.2}fps, {} frames",
info_data.width, info_data.height, info_data.fps, info_data.total_frames);
let path = Path::new(video_path);
let mut dec = FFmpegDecoder::new(path)?;
let info = dec.get_info();
info!(
"Video info: {}x{} @ {:.2}fps, {} frames",
info.width, info.height, info.fps, info.frame_count
);
renderer.create_texture(info_data.width, info_data.height)?;
let tex = texture_creator
.create_texture_streaming(PixelFormatEnum::RGB24, info.width, info.height)
.map_err(|e| anyhow::anyhow!("Texture creation failed: {}", e))?;
texture = Some(tex);
dec.start_decoding(0)?;
decoder = Some(dec);
}
if let Some(ref asr_path) = config.asr {
@@ -76,65 +101,113 @@ fn run(config: &Config) -> Result<()> {
}
}
if config.fullscreen {
renderer.set_fullscreen(true)?;
}
let mut player_state = PlayerState::default();
if let Some(info) = video.get_info() {
player_state.total_frames = info.total_frames;
if let Some(ref dec) = decoder {
let info = dec.get_info();
player_state.total_frames = info.frame_count;
player_state.duration_ms = info.duration_ms;
player_state.fps = info.fps;
}
let mut event_pump = sdl_context
.event_pump()
.map_err(|e| anyhow::anyhow!("Event pump failed: {}", e))?;
info!("Main loop started - waiting for events...");
if let Some(ref video_path) = config.video {
video.play()?;
player_state.playback = PlaybackState::Playing;
run_playback_loop(&mut video, &mut renderer, &mut player_state, &mut asr, &mut yolo)?;
let mut running = true;
while running {
for event in event_pump.poll_iter() {
match event {
sdl2::event::Event::Quit { .. } => {
running = false;
}
sdl2::event::Event::KeyDown { keycode, .. } => {
if let Some(key) = keycode {
match key {
sdl2::keyboard::Keycode::Escape => running = false,
sdl2::keyboard::Keycode::Space => {
player_state.playback =
if player_state.playback == PlaybackState::Playing {
PlaybackState::Paused
} else {
PlaybackState::Playing
};
}
sdl2::keyboard::Keycode::S => {
player_state.show_subtitle = !player_state.show_subtitle;
}
sdl2::keyboard::Keycode::Y => {
player_state.show_yolo = !player_state.show_yolo;
}
sdl2::keyboard::Keycode::C => {
player_state.show_chunks = !player_state.show_chunks;
}
sdl2::keyboard::Keycode::M => {
player_state.muted = !player_state.muted;
}
sdl2::keyboard::Keycode::Left => {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame.saturating_sub(1);
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
}
sdl2::keyboard::Keycode::Right => {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame + 1;
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
}
_ => {}
}
}
}
_ => {}
}
}
loop {
std::thread::sleep(std::time::Duration::from_millis(100));
}
}
canvas.set_draw_color(sdl2::pixels::Color::BLACK);
canvas.clear();
fn run_playback_loop(
video: &mut Video,
renderer: &mut Renderer,
state: &mut PlayerState,
asr: &mut Option<AsrLoader>,
yolo: &mut Option<YoloLoader>,
) -> Result<()> {
let frame_duration = std::time::Duration::from_millis(16);
if player_state.playback == PlaybackState::Playing {
if let Some(ref mut dec) = decoder {
if let Some(ref mut tex) = texture {
match dec.read_frame() {
Ok(Some(data)) => {
let info = dec.get_info();
player_state.current_frame += 1;
player_state.current_time_ms =
((player_state.current_frame as f64 / info.fps) * 1000.0) as u64;
loop {
let start = std::time::Instant::now();
tex.update(None, &data, (info.width * 3) as usize)
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
match video.read_frame() {
Ok(Some(frame)) => {
state.current_frame = frame.frame_number;
state.current_time_ms = frame.timestamp_ms;
canvas
.copy(tex, None, None)
.map_err(|e| anyhow::anyhow!("Copy failed: {}", e))?;
renderer.update_texture(&frame.data)?;
if state.show_yolo {
if player_state.show_yolo {
if let Some(ref mut yolo_loader) = yolo {
let detections = yolo_loader.get_detections(frame.frame_number);
let detections =
yolo_loader.get_detections(player_state.current_frame);
for det in detections {
renderer.draw_bbox(
det.x1 as i32,
det.y1 as i32,
(det.x2 - det.x1) as u32,
(det.y2 - det.y1) as u32,
&det.class_name,
);
}
}
}
let x1 = det.x1 as i32;
let y1 = det.y1 as i32;
let w = (det.x2 - det.x1) as u32;
let h = (det.y2 - det.y1) as u32;
renderer.present();
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ =
canvas.draw_rect(sdl2::rect::Rect::new(x1, y1, w, h));
}
}
}
}
Ok(None) => {
info!("Playback ended");
@@ -145,12 +218,15 @@ fn run_playback_loop(
break;
}
}
let elapsed = start.elapsed();
if elapsed < frame_duration {
std::thread::sleep(frame_duration - elapsed);
}
}
}
canvas.present();
std::thread::sleep(std::time::Duration::from_millis(16));
}
info!("Application closed");
Ok(())
}

View File

@@ -28,17 +28,18 @@ impl AsrLoader {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASR file: {:?}", path))?;
let data: AsrData = serde_json::from_str(&content)
.with_context(|| "Failed to parse ASR JSON")?;
let data: AsrData =
serde_json::from_str(&content).with_context(|| "Failed to parse ASR JSON")?;
Ok(Self { data })
}
pub fn get_segment_at(&self, time_ms: f64) -> Option<&AsrSegment> {
let time_sec = time_ms / 1000.0;
self.data.segments.iter().find(|seg| {
time_sec >= seg.start && time_sec < seg.end
})
self.data
.segments
.iter()
.find(|seg| time_sec >= seg.start && time_sec < seg.end)
}
pub fn get_text_at(&self, time_ms: f64) -> Option<String> {

View File

@@ -5,7 +5,8 @@ use lru::LruCache;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::io::BufReader;
use std::num::NonZeroUsize;
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
@@ -52,53 +53,39 @@ pub struct YoloData {
pub struct YoloLoader {
data: YoloData,
cache: LruCache<u64, Vec<Detection>>,
frame_index: HashMap<u64, usize>,
file_path: String,
}
impl YoloLoader {
const CACHE_SIZE: usize = 60;
pub fn load(path: &Path) -> Result<Self> {
let file_path = path.to_string_lossy().to_string();
let file = File::open(path)
.with_context(|| format!("Failed to open YOLO file: {:?}", path))?;
let file =
File::open(path).with_context(|| format!("Failed to open YOLO file: {:?}", path))?;
let reader = BufReader::new(file);
let data: YoloData = serde_json::from_reader(reader)
.with_context(|| "Failed to parse YOLO JSON")?;
let data: YoloData =
serde_json::from_reader(reader).with_context(|| "Failed to parse YOLO JSON")?;
let mut frame_index = HashMap::new();
for (i, (key, frame)) in data.frames.iter().enumerate() {
if let Ok(frame_num) = key.parse::<u64>() {
frame_index.insert(frame_num, i);
}
let cache = LruCache::new(NonZeroUsize::new(Self::CACHE_SIZE).unwrap());
Ok(Self { data, cache })
}
Ok(Self {
data,
cache: LruCache::new(Self::CACHE_SIZE),
frame_index,
file_path,
})
}
pub fn get_detections(&mut self, frame: u64) -> Vec<&Detection> {
pub fn get_detections(&mut self, frame: u64) -> Vec<Detection> {
if let Some(dets) = self.cache.get(&frame) {
return dets.iter().collect();
return dets.clone();
}
if let Some(frame_data) = self.data.frames.get(&frame.to_string()) {
let dets: Vec<Detection> = frame_data.detections.clone();
let dets = frame_data.detections.clone();
self.cache.put(frame, dets.clone());
dets.iter().collect()
dets
} else {
Vec::new()
}
}
pub fn get_detections_at_time(&mut self, time_ms: u64) -> Vec<&Detection> {
pub fn get_detections_at_time(&mut self, time_ms: u64) -> Vec<Detection> {
let fps = self.data.metadata.fps;
let frame = ((time_ms as f64 / 1000.0) * fps) as u64;
self.get_detections(frame)

View File

@@ -1,10 +1,9 @@
//! FFmpeg 封裝
//! FFmpeg wrapper
use anyhow::{Context, Result};
use std::io::{BufReader, Read};
use std::path::Path;
use std::process::{Command, Stdio, Child, ChildStdout};
use std::io::{Read, BufReader};
use std::sync::{Arc, Mutex};
use std::process::{Child, ChildStdout, Command, Stdio};
#[derive(Debug, Clone)]
pub struct VideoInfo {
@@ -39,8 +38,10 @@ impl FFmpegDecoder {
fn probe(path: &Path) -> Result<VideoInfo> {
let output = Command::new("ffprobe")
.args([
"-v", "quiet",
"-print_format", "json",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
path.to_str().unwrap_or(""),
@@ -48,14 +49,12 @@ impl FFmpegDecoder {
.output()
.context("Failed to run ffprobe")?;
let json: serde_json::Value = serde_json::from_slice(&output.stdout)
.context("Failed to parse ffprobe output")?;
let json: serde_json::Value =
serde_json::from_slice(&output.stdout).context("Failed to parse ffprobe output")?;
let video_stream = json["streams"]
.as_array()
.and_then(|streams| {
streams.iter().find(|s| s["codec_type"] == "video")
})
.and_then(|streams| streams.iter().find(|s| s["codec_type"] == "video"))
.context("No video stream found")?;
let width = video_stream["width"].as_u64().unwrap_or(0) as u32;
@@ -65,7 +64,10 @@ impl FFmpegDecoder {
let (num, den) = {
let parts: Vec<&str> = fps_str.split('/').collect();
if parts.len() == 2 {
(parts[0].parse::<f64>().unwrap_or(30.0), parts[1].parse::<f64>().unwrap_or(1.0))
(
parts[0].parse::<f64>().unwrap_or(30.0),
parts[1].parse::<f64>().unwrap_or(1.0),
)
} else {
(fps_str.parse::<f64>().unwrap_or(30.0), 1.0)
}
@@ -77,7 +79,10 @@ impl FFmpegDecoder {
let duration_ms = (duration_sec * 1000.0) as u64;
let frame_count = (duration_sec * fps) as u64;
let codec = video_stream["codec_name"].as_str().unwrap_or("unknown").to_string();
let codec = video_stream["codec_name"]
.as_str()
.unwrap_or("unknown")
.to_string();
Ok(VideoInfo {
width,
@@ -94,16 +99,20 @@ impl FFmpegDecoder {
}
pub fn start_decoding(&mut self, start_ms: u64) -> Result<()> {
self.stop()?;
self.stop();
let start_sec = start_ms as f64 / 1000.0;
let mut child = Command::new("ffmpeg")
.args([
"-ss", &format!("{}", start_sec),
"-i", &self.path,
"-f", "rawvideo",
"-pix_fmt", "rgb24",
"-ss",
&format!("{}", start_sec),
"-i",
&self.path,
"-f",
"rawvideo",
"-pix_fmt",
"rgb24",
"-",
])
.stdout(Stdio::piped())
@@ -111,8 +120,7 @@ impl FFmpegDecoder {
.spawn()
.context("Failed to start ffmpeg")?;
let stdout = child.stdout.take()
.context("Failed to capture stdout")?;
let stdout = child.stdout.take().context("Failed to capture stdout")?;
self.process = Some(child);
self.stdout = Some(BufReader::new(stdout));

View File

@@ -2,10 +2,10 @@
//!
//! Video playback, frame decoding, and rendering
pub mod video;
pub mod ffmpeg;
pub mod renderer;
pub mod state;
pub mod video;
pub use video::Video;
pub use state::{PlayerState, PlaybackState};
pub use state::{PlaybackState, PlayerState};
pub use video::VideoPlayer;

View File

@@ -3,58 +3,42 @@
use anyhow::Result;
use sdl2::pixels::PixelFormatEnum;
use sdl2::rect::Rect;
use std::sync::{Arc, Mutex};
pub struct Renderer {
sdl: sdl2::Sdl,
video_subsystem: sdl2::VideoSubsystem,
window: sdl2::video::Window,
canvas: sdl2::render::Canvas<sdl2::video::Window>,
texture_creator: sdl2::render::TextureCreator<sdl2::video::WindowContext>,
texture: Option<sdl2::render::Texture>,
width: u32,
height: u32,
}
impl Renderer {
pub fn new(title: &str, width: u32, height: u32) -> Result<Self> {
let sdl = sdl2::init()?;
let video_subsystem = sdl.video()?;
let sdl = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
let video_subsystem = sdl
.video()
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
let window = video_subsystem
.window(title, width, height)
.position_centered()
.build()?;
.build()
.map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
let canvas = window.into_canvas().build()?;
let texture_creator = canvas.texture_creator();
let canvas = window
.into_canvas()
.build()
.map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
Ok(Self {
sdl,
video_subsystem,
window,
canvas,
texture_creator,
texture: None,
width,
height,
})
}
pub fn create_texture(&mut self, width: u32, height: u32) -> Result<()> {
self.texture = Some(
self.texture_creator
.create_texture_streaming(PixelFormatEnum::RGB24, width, height)?
);
self.width = width;
self.height = height;
Ok(())
}
pub fn update_texture(&mut self, data: &[u8]) -> Result<()> {
if let Some(ref mut texture) = self.texture {
texture.update(None, data, self.width as usize * 3)?;
}
pub fn update_frame(&mut self, texture: &sdl2::render::Texture) -> Result<()> {
let _ = self.canvas.copy(texture, None, None);
Ok(())
}
@@ -63,53 +47,43 @@ impl Renderer {
self.canvas.clear();
}
pub fn draw_bbox(&mut self, x: i32, y: i32, w: u32, h: u32, label: &str) {
// Draw rectangle border
self.canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
pub fn draw_bbox(&mut self, x: i32, y: i32, w: u32, h: u32, _label: &str) {
self.canvas
.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ = self.canvas.draw_rect(Rect::new(x, y, w, h));
// Draw label background
let label_rect = Rect::new(x, y - 20, 100, 20);
self.canvas.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180));
self.canvas
.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180));
let _ = self.canvas.fill_rect(label_rect);
}
pub fn present(&mut self) {
// Draw texture if available
if let Some(ref texture) = self.texture {
self.canvas.copy(texture, None, None).ok();
}
self.canvas.present();
}
pub fn set_fullscreen(&mut self, fullscreen: bool) -> Result<()> {
if fullscreen {
self.window.set_fullscreen(sdl2::video::FullscreenType::Desktop)?;
} else {
self.window.set_fullscreen(sdl2::video::FullscreenType::Off)?;
}
Ok(())
}
pub fn resize(&mut self, width: u32, height: u32) -> Result<()> {
self.window.set_size(width, height)?;
Ok(())
}
pub fn poll_events(&mut self) -> Vec<sdl2::event::Event> {
let mut events = Vec::new();
let pump = self.sdl.event_pump();
if let Ok(pump) = pump {
if let Ok(mut pump) = self.sdl.event_pump() {
for event in pump.poll_iter() {
events.push(event);
}
}
events
}
pub fn canvas(&mut self) -> &mut sdl2::render::Canvas<sdl2::video::Window> {
&mut self.canvas
}
pub fn width(&self) -> u32 {
self.width
}
pub fn height(&self) -> u32 {
self.height
}
}
impl Drop for Renderer {
fn drop(&mut self) {
// Cleanup handled automatically
}
fn drop(&mut self) {}
}

View File

@@ -1,9 +1,9 @@
//! 視頻播放核心實現
use anyhow::{Context, Result};
use std::collections::VecDeque;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::collections::VecDeque;
use crate::player::ffmpeg::{FFmpegDecoder, VideoInfo};
@@ -73,8 +73,8 @@ impl VideoPlayer {
}
pub fn seek_frame(&mut self, frame: u64) -> Result<()> {
if let Some(ref decoder) = self.decoder {
if let Some(info) = &self.info {
if let Some(ref mut decoder) = self.decoder {
if let Some(ref info) = self.info {
let timestamp_ms = (frame * 1000) / info.fps as u64;
decoder.seek(timestamp_ms)?;
self.current_frame = frame;
@@ -84,9 +84,9 @@ impl VideoPlayer {
}
pub fn seek_time(&mut self, ms: u64) -> Result<()> {
if let Some(ref decoder) = self.decoder {
if let Some(ref mut decoder) = self.decoder {
decoder.seek(ms)?;
if let Some(info) = &self.info {
if let Some(ref info) = self.info {
self.current_frame = (ms * info.fps as u64) / 1000;
}
}