Compare commits

...

13 Commits

Author SHA1 Message Date
f3443867c5 feat(player): improve video scaling, YOLO positioning, and controls 2026-03-19 01:40:25 +08:00
64f217fdc4 feat: add sdl2 ttf feature for text rendering 2026-03-19 01:35:20 +08:00
d89057de5b feat(overlay): update YOLO loader with proper metadata fields 2026-03-19 01:35:00 +08:00
9cf4c55dd7 feat(player): add ASR/YOLO overlays, zoom, and text rendering
- Add TTF text rendering for subtitles and YOLO labels
- Implement ASR subtitle display with background
- Add YOLO bbox rendering with class labels
- Add zoom in/out (+/-) and reset (Backquote)
- Add frame/time info display
- Fix YOLO metadata parsing for actual file format
- Add Shift+Arrow for 1-second seek
2026-03-19 01:34:32 +08:00
abce13e17f feat(web): update bridge module 2026-03-19 01:26:29 +08:00
27e1434eed feat(overlay): update YOLO loader 2026-03-19 01:26:23 +08:00
55bb5d62f1 feat(overlay): update ASR loader 2026-03-19 01:26:16 +08:00
19d60de245 feat(overlay): update overlay module 2026-03-19 01:26:12 +08:00
025d62362d feat(player): update video player 2026-03-19 01:26:10 +08:00
487ada4c10 feat(player): update SDL2 renderer 2026-03-19 01:25:50 +08:00
105a6ce834 feat(player): update FFmpeg decoder 2026-03-19 01:25:44 +08:00
f55d623dca feat(player): update config module 2026-03-19 01:25:32 +08:00
5868f0da05 feat(player): implement SDL2 video playback with FFmpeg decoder 2026-03-19 01:25:05 +08:00
10 changed files with 448 additions and 248 deletions

View File

@@ -15,46 +15,30 @@ name = "momentry_playground"
path = "src/lib.rs" path = "src/lib.rs"
[dependencies] [dependencies]
# Desktop window management
tao = "0.30" tao = "0.30"
wry = "0.54" wry = "0.54"
# Video/Audio sdl2 = { version = "0.38", features = ["ttf"] }
sdl2 = "0.38"
ffmpeg-sidecar = "2.4" ffmpeg-sidecar = "2.4"
# Error handling
anyhow = "1.0" anyhow = "1.0"
thiserror = "2.0" thiserror = "2.0"
# Serialization
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
# Async
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }
# HTTP client
reqwest = { version = "0.12", features = ["json"] } reqwest = { version = "0.12", features = ["json"] }
# URL parsing
url = "2.5" url = "2.5"
# Markdown (for potential MD viewing)
pulldown-cmark = "0.10" pulldown-cmark = "0.10"
# Logging
log = "0.4" log = "0.4"
env_logger = "0.11" env_logger = "0.11"
# CLI
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
dirs = "6" dirs = "6"
# LRU Cache for YOLO frame caching
lru = "0.12" lru = "0.12"
# Time
chrono = "0.4" chrono = "0.4"
[dev-dependencies] [dev-dependencies]

View File

@@ -1,5 +1,5 @@
//! Configuration module //! Configuration module
//! //!
//! Command line arguments and runtime configuration //! Command line arguments and runtime configuration
use anyhow::Result; use anyhow::Result;
@@ -19,16 +19,30 @@ pub struct Config {
#[arg(short = 'y', long = "yolo", help = "YOLO JSON file path")] #[arg(short = 'y', long = "yolo", help = "YOLO JSON file path")]
pub yolo: Option<PathBuf>, pub yolo: Option<PathBuf>,
#[arg(short = 'w', long = "width", default_value = "1280", help = "Window width")] #[arg(
short = 'w',
long = "width",
default_value = "1280",
help = "Window width"
)]
pub width: u32, pub width: u32,
#[arg(short = 'h', long = "height", default_value = "720", help = "Window height")] #[arg(
short = 'h',
long = "height",
default_value = "720",
help = "Window height"
)]
pub height: u32, pub height: u32,
#[arg(long = "fullscreen", help = "Start in fullscreen mode")] #[arg(long = "fullscreen", help = "Start in fullscreen mode")]
pub fullscreen: bool, pub fullscreen: bool,
#[arg(long = "locale", default_value = "en", help = "UI language (en, zh-TW, etc.)")] #[arg(
long = "locale",
default_value = "en",
help = "UI language (en, zh-TW, etc.)"
)]
pub locale: String, pub locale: String,
} }

View File

@@ -1,10 +1,12 @@
//! MoMentry Playground - Main entry point //! MoMentry Playground - Main entry point
//! //!
//! Unified media player with ASR/YOLO/Chunks overlay support //! Unified media player with ASR/YOLO/Chunks overlay support
use anyhow::Result; use anyhow::Result;
use clap::Parser; use log::{error, info, warn};
use log::{error, info}; use sdl2::pixels::PixelFormatEnum;
use sdl2::rect::Rect;
use sdl2::ttf::{self, Font};
use std::path::Path; use std::path::Path;
mod config; mod config;
@@ -14,42 +16,74 @@ mod web;
use config::Config; use config::Config;
use overlay::{AsrLoader, YoloLoader}; use overlay::{AsrLoader, YoloLoader};
use player::{Video, Renderer, PlaybackState}; use player::ffmpeg::FFmpegDecoder;
use player::state::PlayerState; use player::state::{PlaybackState, PlayerState};
fn main() -> Result<()> { fn main() -> Result<()> {
env_logger::Builder::from_env( env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
env_logger::Env::default().default_filter_or("info")
).init(); let config = Config::load()?;
let config = Config::load();
info!("MoMentry Playground starting..."); info!("MoMentry Playground starting...");
info!("Window: {}x{}", config.width, config.height); info!("Window: {}x{}", config.width, config.height);
if let Err(e) = run(&config) { if let Err(e) = run(&config) {
error!("Application error: {}", e); error!("Application error: {}", e);
std::process::exit(1); std::process::exit(1);
} }
Ok(()) Ok(())
} }
fn run(config: &Config) -> Result<()> { fn run(config: &Config) -> Result<()> {
let mut video = Video::new(); let sdl_context = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
let mut renderer = Renderer::new("MoMentry Playground", config.width, config.height)?; let video_subsystem = sdl_context
.video()
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
let ttf_context = ttf::init().map_err(|e| anyhow::anyhow!("TTF init failed: {}", e))?;
let font: Option<Font> = ttf_context
.load_font("/System/Library/Fonts/Supplemental/Arial.ttf", 18)
.ok();
let window = video_subsystem
.window("MoMentry Playground", config.width, config.height)
.position_centered()
.build()
.map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
let mut canvas = window
.into_canvas()
.build()
.map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
let texture_creator = canvas.texture_creator();
let mut decoder: Option<FFmpegDecoder> = None;
let mut texture: Option<sdl2::render::Texture> = None;
let mut video_info = None;
let mut asr: Option<AsrLoader> = None; let mut asr: Option<AsrLoader> = None;
let mut yolo: Option<YoloLoader> = None; let mut yolo: Option<YoloLoader> = None;
if let Some(ref video_path) = config.video { if let Some(ref video_path) = config.video {
info!("Loading video: {:?}", video_path); info!("Loading video: {:?}", video_path);
let info_data = video.open(video_path)?; let path = Path::new(video_path);
info!("Video info: {}x{} @ {:.2}fps, {} frames", let mut dec = FFmpegDecoder::new(path)?;
info_data.width, info_data.height, info_data.fps, info_data.total_frames); let info = dec.get_info();
video_info = Some(info.clone());
renderer.create_texture(info_data.width, info_data.height)?; info!(
"Video info: {}x{} @ {:.2}fps, {} frames",
info.width, info.height, info.fps, info.frame_count
);
let tex = texture_creator
.create_texture_streaming(PixelFormatEnum::RGB24, info.width, info.height)
.map_err(|e| anyhow::anyhow!("Texture creation failed: {}", e))?;
texture = Some(tex);
dec.start_decoding(0)?;
decoder = Some(dec);
} }
if let Some(ref asr_path) = config.asr { if let Some(ref asr_path) = config.asr {
info!("Loading ASR: {:?}", asr_path); info!("Loading ASR: {:?}", asr_path);
match AsrLoader::load(asr_path) { match AsrLoader::load(asr_path) {
@@ -62,7 +96,7 @@ fn run(config: &Config) -> Result<()> {
} }
} }
} }
if let Some(ref yolo_path) = config.yolo { if let Some(ref yolo_path) = config.yolo {
info!("Loading YOLO: {:?}", yolo_path); info!("Loading YOLO: {:?}", yolo_path);
match YoloLoader::load(yolo_path) { match YoloLoader::load(yolo_path) {
@@ -75,82 +109,269 @@ fn run(config: &Config) -> Result<()> {
} }
} }
} }
if config.fullscreen {
renderer.set_fullscreen(true)?;
}
let mut player_state = PlayerState::default(); let mut player_state = PlayerState::default();
if let Some(info) = video.get_info() { if let Some(ref info) = video_info {
player_state.total_frames = info.total_frames; player_state.total_frames = info.frame_count;
player_state.duration_ms = info.duration_ms; player_state.duration_ms = info.duration_ms;
player_state.fps = info.fps; player_state.fps = info.fps;
} }
info!("Main loop started - waiting for events...");
if let Some(ref video_path) = config.video {
video.play()?;
player_state.playback = PlaybackState::Playing;
run_playback_loop(&mut video, &mut renderer, &mut player_state, &mut asr, &mut yolo)?;
}
loop {
std::thread::sleep(std::time::Duration::from_millis(100));
}
}
fn run_playback_loop( let mut event_pump = sdl_context
video: &mut Video, .event_pump()
renderer: &mut Renderer, .map_err(|e| anyhow::anyhow!("Event pump failed: {}", e))?;
state: &mut PlayerState,
asr: &mut Option<AsrLoader>, let info_height: i32 = 50;
yolo: &mut Option<YoloLoader>,
) -> Result<()> { info!("Main loop started - waiting for events...");
let frame_duration = std::time::Duration::from_millis(16);
let mut running = true;
loop { while running {
let start = std::time::Instant::now(); for event in event_pump.poll_iter() {
match event {
match video.read_frame() { sdl2::event::Event::Quit { .. } => {
Ok(Some(frame)) => { running = false;
state.current_frame = frame.frame_number; }
state.current_time_ms = frame.timestamp_ms; sdl2::event::Event::KeyDown {
keycode,
renderer.update_texture(&frame.data)?; keymod,
..
if state.show_yolo { } => {
if let Some(ref mut yolo_loader) = yolo { if let Some(key) = keycode {
let detections = yolo_loader.get_detections(frame.frame_number); let shift = keymod.intersects(sdl2::keyboard::Mod::LSHIFTMOD)
for det in detections { || keymod.intersects(sdl2::keyboard::Mod::RSHIFTMOD);
renderer.draw_bbox( match key {
det.x1 as i32, sdl2::keyboard::Keycode::Escape => running = false,
det.y1 as i32, sdl2::keyboard::Keycode::Space => {
(det.x2 - det.x1) as u32, player_state.playback =
(det.y2 - det.y1) as u32, if player_state.playback == PlaybackState::Playing {
&det.class_name, PlaybackState::Paused
); } else {
PlaybackState::Playing
};
}
sdl2::keyboard::Keycode::S => {
player_state.show_subtitle = !player_state.show_subtitle;
info!("Subtitle: {}", if player_state.show_subtitle { "ON" } else { "OFF" });
}
sdl2::keyboard::Keycode::Y => {
player_state.show_yolo = !player_state.show_yolo;
info!("YOLO: {}", if player_state.show_yolo { "ON" } else { "OFF" });
}
sdl2::keyboard::Keycode::C => {
player_state.show_chunks = !player_state.show_chunks;
}
sdl2::keyboard::Keycode::M => {
player_state.muted = !player_state.muted;
}
sdl2::keyboard::Keycode::Left => {
let step = if shift { 60 } else { 1 };
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame.saturating_sub(step);
dec.seek(((current as f64 / player_state.fps) * 1000.0) as u64)?;
player_state.current_frame = current;
}
}
sdl2::keyboard::Keycode::Right => {
let step = if shift { 60 } else { 1 };
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame + step;
dec.seek(((current as f64 / player_state.fps) * 1000.0) as u64)?;
player_state.current_frame = current;
}
}
sdl2::keyboard::Keycode::Up => {
if player_state.zoom > 1.0 {
player_state.pan_y = (player_state.pan_y - 50.0).max(-500.0);
}
}
sdl2::keyboard::Keycode::Down => {
if player_state.zoom > 1.0 {
player_state.pan_y = (player_state.pan_y + 50.0).min(500.0);
}
}
sdl2::keyboard::Keycode::Equals | sdl2::keyboard::Keycode::KpPlus => {
player_state.zoom = (player_state.zoom * 1.2).min(10.0);
}
sdl2::keyboard::Keycode::Minus | sdl2::keyboard::Keycode::KpMinus => {
player_state.zoom = (player_state.zoom / 1.2).max(0.5);
if player_state.zoom == 1.0 {
player_state.pan_x = 0.0;
player_state.pan_y = 0.0;
}
}
sdl2::keyboard::Keycode::Backquote | sdl2::keyboard::Keycode::R => {
player_state.zoom = 1.0;
player_state.pan_x = 0.0;
player_state.pan_y = 0.0;
}
_ => {}
} }
} }
} }
sdl2::event::Event::MouseWheel { y, .. } => {
renderer.present(); if y > 0 {
} player_state.zoom = (player_state.zoom * 1.1).min(10.0);
Ok(None) => { } else if y < 0 {
info!("Playback ended"); player_state.zoom = (player_state.zoom / 1.1).max(0.5);
break; if player_state.zoom == 1.0 {
} player_state.pan_x = 0.0;
Err(e) => { player_state.pan_y = 0.0;
error!("Frame read error: {}", e); }
break; }
}
_ => {}
} }
} }
let elapsed = start.elapsed(); canvas.set_draw_color(sdl2::pixels::Color::BLACK);
if elapsed < frame_duration { canvas.clear();
std::thread::sleep(frame_duration - elapsed);
if player_state.playback == PlaybackState::Playing {
if let Some(ref mut dec) = decoder {
if let Some(ref mut tex) = texture {
match dec.read_frame() {
Ok(Some(data)) => {
if let Some(ref info) = video_info {
player_state.current_frame += 1;
player_state.current_time_ms =
((player_state.current_frame as f64 / info.fps) * 1000.0) as u64;
tex.update(None, &data, (info.width * 3) as usize)
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
}
}
Ok(None) => {
info!("Playback ended");
break;
}
Err(e) => {
warn!("Frame read error: {}", e);
break;
}
}
}
}
} }
let (vid_width, vid_height) = video_info
.as_ref()
.map(|i| (i.width, i.height))
.unwrap_or((config.width, config.height));
let scale: f64 = if player_state.zoom != 1.0 {
player_state.zoom as f64
} else {
let scale_x = config.width as f64 / vid_width as f64;
let scale_y = (config.height as i32 - info_height) as f64 / vid_height as f64;
scale_x.min(scale_y).min(1.0)
};
let scaled_w = (vid_width as f64 * scale) as u32;
let scaled_h = (vid_height as f64 * scale) as u32;
let offset_x =
((config.width as i32 - scaled_w as i32) / 2) as i32 + player_state.pan_x as i32;
let offset_y = ((config.height as i32 - info_height - scaled_h as i32) / 2) as i32
+ player_state.pan_y as i32;
if let Some(ref mut tex) = texture {
let dst = Rect::new(offset_x, offset_y, scaled_w, scaled_h);
canvas.copy(tex, None, Some(dst)).ok();
}
if player_state.show_yolo {
if let Some(ref mut yolo_loader) = yolo {
let detections = yolo_loader.get_detections(player_state.current_frame);
for det in detections {
let x1 = (det.x1 * scale) as i32 + offset_x;
let y1 = (det.y1 * scale) as i32 + offset_y;
let w = ((det.x2 - det.x1) * scale) as u32;
let h = ((det.y2 - det.y1) * scale) as u32;
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ = canvas.draw_rect(Rect::new(x1, y1, w, h));
if w > 30 && h > 10 {
if let Some(ref f) = font {
let label =
format!("{} {:.0}%", det.class_name, det.confidence * 100.0);
if let Ok(surface) =
f.render(&label).solid(sdl2::pixels::Color::RGB(0, 255, 0))
{
if let Ok(tex_label) =
texture_creator.create_texture_from_surface(&surface)
{
let lw = surface.width().min(w as u32);
let label_rect = Rect::new(x1, y1.saturating_sub(20), lw, 18);
canvas.copy(&tex_label, None, Some(label_rect)).ok();
}
}
}
}
}
}
}
if player_state.show_subtitle {
if let Some(ref asr_loader) = asr {
if let Some(text) = asr_loader.get_text_at(player_state.current_time_ms as f64) {
if let Some(ref f) = font {
if let Ok(surface) = f
.render(&text)
.blended(sdl2::pixels::Color::RGBA(255, 255, 255, 255))
{
if let Ok(tex_label) = texture_creator.create_texture_from_surface(&surface) {
let query = tex_label.query();
let x = (config.width - query.width) / 2;
let y = config.height - query.height - 20;
let rect = Rect::new(x as i32, y as i32, query.width, query.height);
canvas.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 200));
let _ = canvas.fill_rect(Rect::new(rect.x() - 8, rect.y() - 4, rect.width() + 16, rect.height() + 8));
canvas.copy(&tex_label, None, Some(rect)).ok();
}
}
}
}
}
}
if let Some(ref f) = font {
let time_str = format_time(player_state.current_time_ms);
let line1 = format!("Time: {} Frame: {}/{} FPS: {:.1}",
time_str, player_state.current_frame, player_state.total_frames, player_state.fps);
let line2 = format!("[S]ubtitle:{} [Y]OLO:{} [C]hunks [M]ute [+/-]Zoom{:.1}x [`]Reset",
if player_state.show_subtitle { " ON " } else { " OFF" },
if player_state.show_yolo { " ON " } else { " OFF" },
player_state.zoom
);
for (i, line) in [line1, line2].iter().enumerate() {
if let Ok(surface) = f.render(line).solid(sdl2::pixels::Color::RGB(180, 180, 180)) {
if let Ok(tex_label) = texture_creator.create_texture_from_surface(&surface) {
let rect = Rect::new(10, 8 + (i as i32 * 20), surface.width(), surface.height());
canvas.copy(&tex_label, None, Some(rect)).ok();
}
}
}
}
canvas.present();
std::thread::sleep(std::time::Duration::from_millis(16));
} }
info!("Application closed");
Ok(()) Ok(())
} }
fn format_time(ms: u64) -> String {
let total_secs = ms / 1000;
let hours = total_secs / 3600;
let minutes = (total_secs % 3600) / 60;
let seconds = total_secs % 60;
let millis = ms % 1000;
if hours > 0 {
format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
} else {
format!("{:02}:{:02}.{:03}", minutes, seconds, millis)
}
}

View File

@@ -27,18 +27,19 @@ impl AsrLoader {
pub fn load(path: &Path) -> Result<Self> { pub fn load(path: &Path) -> Result<Self> {
let content = std::fs::read_to_string(path) let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read ASR file: {:?}", path))?; .with_context(|| format!("Failed to read ASR file: {:?}", path))?;
let data: AsrData = serde_json::from_str(&content) let data: AsrData =
.with_context(|| "Failed to parse ASR JSON")?; serde_json::from_str(&content).with_context(|| "Failed to parse ASR JSON")?;
Ok(Self { data }) Ok(Self { data })
} }
pub fn get_segment_at(&self, time_ms: f64) -> Option<&AsrSegment> { pub fn get_segment_at(&self, time_ms: f64) -> Option<&AsrSegment> {
let time_sec = time_ms / 1000.0; let time_sec = time_ms / 1000.0;
self.data.segments.iter().find(|seg| { self.data
time_sec >= seg.start && time_sec < seg.end .segments
}) .iter()
.find(|seg| time_sec >= seg.start && time_sec < seg.end)
} }
pub fn get_text_at(&self, time_ms: f64) -> Option<String> { pub fn get_text_at(&self, time_ms: f64) -> Option<String> {

View File

@@ -1,5 +1,5 @@
//! Overlay module //! Overlay module
//! //!
//! ASR subtitle and YOLO bbox overlay management //! ASR subtitle and YOLO bbox overlay management
pub mod asr; pub mod asr;

View File

@@ -5,7 +5,8 @@ use lru::LruCache;
use serde::Deserialize; use serde::Deserialize;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader}; use std::io::BufReader;
use std::num::NonZeroUsize;
use std::path::Path; use std::path::Path;
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
@@ -40,65 +41,62 @@ pub struct YoloMetadata {
pub status: Option<String>, pub status: Option<String>,
pub total_detections: u64, pub total_detections: u64,
pub avg_detections_per_frame: f64, pub avg_detections_per_frame: f64,
#[serde(default)]
pub auto_save_interval: Option<u32>,
#[serde(default)]
pub processing_time: Option<f64>,
#[serde(default)]
pub avg_time_per_frame: Option<f64>,
#[serde(default)]
pub last_saved_at: Option<String>,
#[serde(default)]
pub completed_at: Option<String>,
#[serde(default)]
pub auto_save_count: Option<u32>,
} }
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct YoloData { pub struct YoloData {
pub metadata: YoloMetadata, pub metadata: YoloMetadata,
#[serde(flatten)]
pub frames: HashMap<String, FrameData>, pub frames: HashMap<String, FrameData>,
} }
pub struct YoloLoader { pub struct YoloLoader {
data: YoloData, data: YoloData,
cache: LruCache<u64, Vec<Detection>>, cache: LruCache<u64, Vec<Detection>>,
frame_index: HashMap<u64, usize>,
file_path: String,
} }
impl YoloLoader { impl YoloLoader {
const CACHE_SIZE: usize = 60; const CACHE_SIZE: usize = 60;
pub fn load(path: &Path) -> Result<Self> { pub fn load(path: &Path) -> Result<Self> {
let file_path = path.to_string_lossy().to_string(); let file =
File::open(path).with_context(|| format!("Failed to open YOLO file: {:?}", path))?;
let file = File::open(path)
.with_context(|| format!("Failed to open YOLO file: {:?}", path))?;
let reader = BufReader::new(file); let reader = BufReader::new(file);
let data: YoloData = serde_json::from_reader(reader) let data: YoloData =
.with_context(|| "Failed to parse YOLO JSON")?; serde_json::from_reader(reader).with_context(|| "Failed to parse YOLO JSON")?;
let mut frame_index = HashMap::new(); let cache = LruCache::new(NonZeroUsize::new(Self::CACHE_SIZE).unwrap());
for (i, (key, frame)) in data.frames.iter().enumerate() {
if let Ok(frame_num) = key.parse::<u64>() { Ok(Self { data, cache })
frame_index.insert(frame_num, i);
}
}
Ok(Self {
data,
cache: LruCache::new(Self::CACHE_SIZE),
frame_index,
file_path,
})
} }
pub fn get_detections(&mut self, frame: u64) -> Vec<&Detection> { pub fn get_detections(&mut self, frame: u64) -> Vec<Detection> {
if let Some(dets) = self.cache.get(&frame) { if let Some(dets) = self.cache.get(&frame) {
return dets.iter().collect(); return dets.clone();
} }
if let Some(frame_data) = self.data.frames.get(&frame.to_string()) { if let Some(frame_data) = self.data.frames.get(&frame.to_string()) {
let dets: Vec<Detection> = frame_data.detections.clone(); let dets = frame_data.detections.clone();
self.cache.put(frame, dets.clone()); self.cache.put(frame, dets.clone());
dets.iter().collect() dets
} else { } else {
Vec::new() Vec::new()
} }
} }
pub fn get_detections_at_time(&mut self, time_ms: u64) -> Vec<&Detection> { pub fn get_detections_at_time(&mut self, time_ms: u64) -> Vec<Detection> {
let fps = self.data.metadata.fps; let fps = self.data.metadata.fps;
let frame = ((time_ms as f64 / 1000.0) * fps) as u64; let frame = ((time_ms as f64 / 1000.0) * fps) as u64;
self.get_detections(frame) self.get_detections(frame)

View File

@@ -1,10 +1,9 @@
//! FFmpeg 封裝 //! FFmpeg wrapper
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use std::io::{BufReader, Read};
use std::path::Path; use std::path::Path;
use std::process::{Command, Stdio, Child, ChildStdout}; use std::process::{Child, ChildStdout, Command, Stdio};
use std::io::{Read, BufReader};
use std::sync::{Arc, Mutex};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct VideoInfo { pub struct VideoInfo {
@@ -27,7 +26,7 @@ impl FFmpegDecoder {
pub fn new(path: &Path) -> Result<Self> { pub fn new(path: &Path) -> Result<Self> {
let path_str = path.to_string_lossy().to_string(); let path_str = path.to_string_lossy().to_string();
let info = Self::probe(path)?; let info = Self::probe(path)?;
Ok(Self { Ok(Self {
path: path_str, path: path_str,
process: None, process: None,
@@ -39,8 +38,10 @@ impl FFmpegDecoder {
fn probe(path: &Path) -> Result<VideoInfo> { fn probe(path: &Path) -> Result<VideoInfo> {
let output = Command::new("ffprobe") let output = Command::new("ffprobe")
.args([ .args([
"-v", "quiet", "-v",
"-print_format", "json", "quiet",
"-print_format",
"json",
"-show_format", "-show_format",
"-show_streams", "-show_streams",
path.to_str().unwrap_or(""), path.to_str().unwrap_or(""),
@@ -48,24 +49,25 @@ impl FFmpegDecoder {
.output() .output()
.context("Failed to run ffprobe")?; .context("Failed to run ffprobe")?;
let json: serde_json::Value = serde_json::from_slice(&output.stdout) let json: serde_json::Value =
.context("Failed to parse ffprobe output")?; serde_json::from_slice(&output.stdout).context("Failed to parse ffprobe output")?;
let video_stream = json["streams"] let video_stream = json["streams"]
.as_array() .as_array()
.and_then(|streams| { .and_then(|streams| streams.iter().find(|s| s["codec_type"] == "video"))
streams.iter().find(|s| s["codec_type"] == "video")
})
.context("No video stream found")?; .context("No video stream found")?;
let width = video_stream["width"].as_u64().unwrap_or(0) as u32; let width = video_stream["width"].as_u64().unwrap_or(0) as u32;
let height = video_stream["height"].as_u64().unwrap_or(0) as u32; let height = video_stream["height"].as_u64().unwrap_or(0) as u32;
let fps_str = video_stream["r_frame_rate"].as_str().unwrap_or("30/1"); let fps_str = video_stream["r_frame_rate"].as_str().unwrap_or("30/1");
let (num, den) = { let (num, den) = {
let parts: Vec<&str> = fps_str.split('/').collect(); let parts: Vec<&str> = fps_str.split('/').collect();
if parts.len() == 2 { if parts.len() == 2 {
(parts[0].parse::<f64>().unwrap_or(30.0), parts[1].parse::<f64>().unwrap_or(1.0)) (
parts[0].parse::<f64>().unwrap_or(30.0),
parts[1].parse::<f64>().unwrap_or(1.0),
)
} else { } else {
(fps_str.parse::<f64>().unwrap_or(30.0), 1.0) (fps_str.parse::<f64>().unwrap_or(30.0), 1.0)
} }
@@ -75,9 +77,12 @@ impl FFmpegDecoder {
let duration_str = json["format"]["duration"].as_str().unwrap_or("0"); let duration_str = json["format"]["duration"].as_str().unwrap_or("0");
let duration_sec: f64 = duration_str.parse().unwrap_or(0.0); let duration_sec: f64 = duration_str.parse().unwrap_or(0.0);
let duration_ms = (duration_sec * 1000.0) as u64; let duration_ms = (duration_sec * 1000.0) as u64;
let frame_count = (duration_sec * fps) as u64; let frame_count = (duration_sec * fps) as u64;
let codec = video_stream["codec_name"].as_str().unwrap_or("unknown").to_string(); let codec = video_stream["codec_name"]
.as_str()
.unwrap_or("unknown")
.to_string();
Ok(VideoInfo { Ok(VideoInfo {
width, width,
@@ -94,16 +99,20 @@ impl FFmpegDecoder {
} }
pub fn start_decoding(&mut self, start_ms: u64) -> Result<()> { pub fn start_decoding(&mut self, start_ms: u64) -> Result<()> {
self.stop()?; self.stop();
let start_sec = start_ms as f64 / 1000.0; let start_sec = start_ms as f64 / 1000.0;
let mut child = Command::new("ffmpeg") let mut child = Command::new("ffmpeg")
.args([ .args([
"-ss", &format!("{}", start_sec), "-ss",
"-i", &self.path, &format!("{}", start_sec),
"-f", "rawvideo", "-i",
"-pix_fmt", "rgb24", &self.path,
"-f",
"rawvideo",
"-pix_fmt",
"rgb24",
"-", "-",
]) ])
.stdout(Stdio::piped()) .stdout(Stdio::piped())
@@ -111,12 +120,11 @@ impl FFmpegDecoder {
.spawn() .spawn()
.context("Failed to start ffmpeg")?; .context("Failed to start ffmpeg")?;
let stdout = child.stdout.take() let stdout = child.stdout.take().context("Failed to capture stdout")?;
.context("Failed to capture stdout")?;
self.process = Some(child); self.process = Some(child);
self.stdout = Some(BufReader::new(stdout)); self.stdout = Some(BufReader::new(stdout));
Ok(()) Ok(())
} }
@@ -135,7 +143,7 @@ impl FFmpegDecoder {
pub fn read_frame(&mut self) -> Result<Option<Vec<u8>>> { pub fn read_frame(&mut self) -> Result<Option<Vec<u8>>> {
let frame_size = (self.info.width * self.info.height * 3) as usize; let frame_size = (self.info.width * self.info.height * 3) as usize;
let mut buffer = vec![0u8; frame_size]; let mut buffer = vec![0u8; frame_size];
if let Some(ref mut reader) = self.stdout { if let Some(ref mut reader) = self.stdout {
match reader.read_exact(&mut buffer) { match reader.read_exact(&mut buffer) {
Ok(_) => Ok(Some(buffer)), Ok(_) => Ok(Some(buffer)),

View File

@@ -3,58 +3,42 @@
use anyhow::Result; use anyhow::Result;
use sdl2::pixels::PixelFormatEnum; use sdl2::pixels::PixelFormatEnum;
use sdl2::rect::Rect; use sdl2::rect::Rect;
use std::sync::{Arc, Mutex};
pub struct Renderer { pub struct Renderer {
sdl: sdl2::Sdl, sdl: sdl2::Sdl,
video_subsystem: sdl2::VideoSubsystem,
window: sdl2::video::Window,
canvas: sdl2::render::Canvas<sdl2::video::Window>, canvas: sdl2::render::Canvas<sdl2::video::Window>,
texture_creator: sdl2::render::TextureCreator<sdl2::video::WindowContext>,
texture: Option<sdl2::render::Texture>,
width: u32, width: u32,
height: u32, height: u32,
} }
impl Renderer { impl Renderer {
pub fn new(title: &str, width: u32, height: u32) -> Result<Self> { pub fn new(title: &str, width: u32, height: u32) -> Result<Self> {
let sdl = sdl2::init()?; let sdl = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
let video_subsystem = sdl.video()?; let video_subsystem = sdl
.video()
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
let window = video_subsystem let window = video_subsystem
.window(title, width, height) .window(title, width, height)
.position_centered() .position_centered()
.build()?; .build()
.map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
let canvas = window.into_canvas().build()?;
let texture_creator = canvas.texture_creator(); let canvas = window
.into_canvas()
.build()
.map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
Ok(Self { Ok(Self {
sdl, sdl,
video_subsystem,
window,
canvas, canvas,
texture_creator,
texture: None,
width, width,
height, height,
}) })
} }
pub fn create_texture(&mut self, width: u32, height: u32) -> Result<()> { pub fn update_frame(&mut self, texture: &sdl2::render::Texture) -> Result<()> {
self.texture = Some( let _ = self.canvas.copy(texture, None, None);
self.texture_creator
.create_texture_streaming(PixelFormatEnum::RGB24, width, height)?
);
self.width = width;
self.height = height;
Ok(())
}
pub fn update_texture(&mut self, data: &[u8]) -> Result<()> {
if let Some(ref mut texture) = self.texture {
texture.update(None, data, self.width as usize * 3)?;
}
Ok(()) Ok(())
} }
@@ -63,53 +47,43 @@ impl Renderer {
self.canvas.clear(); self.canvas.clear();
} }
pub fn draw_bbox(&mut self, x: i32, y: i32, w: u32, h: u32, label: &str) { pub fn draw_bbox(&mut self, x: i32, y: i32, w: u32, h: u32, _label: &str) {
// Draw rectangle border self.canvas
self.canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0)); .set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ = self.canvas.draw_rect(Rect::new(x, y, w, h)); let _ = self.canvas.draw_rect(Rect::new(x, y, w, h));
// Draw label background
let label_rect = Rect::new(x, y - 20, 100, 20); let label_rect = Rect::new(x, y - 20, 100, 20);
self.canvas.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180)); self.canvas
.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180));
let _ = self.canvas.fill_rect(label_rect); let _ = self.canvas.fill_rect(label_rect);
} }
pub fn present(&mut self) { pub fn present(&mut self) {
// Draw texture if available
if let Some(ref texture) = self.texture {
self.canvas.copy(texture, None, None).ok();
}
self.canvas.present(); self.canvas.present();
} }
pub fn set_fullscreen(&mut self, fullscreen: bool) -> Result<()> {
if fullscreen {
self.window.set_fullscreen(sdl2::video::FullscreenType::Desktop)?;
} else {
self.window.set_fullscreen(sdl2::video::FullscreenType::Off)?;
}
Ok(())
}
pub fn resize(&mut self, width: u32, height: u32) -> Result<()> {
self.window.set_size(width, height)?;
Ok(())
}
pub fn poll_events(&mut self) -> Vec<sdl2::event::Event> { pub fn poll_events(&mut self) -> Vec<sdl2::event::Event> {
let mut events = Vec::new(); let mut events = Vec::new();
let pump = self.sdl.event_pump(); if let Ok(mut pump) = self.sdl.event_pump() {
if let Ok(pump) = pump {
for event in pump.poll_iter() { for event in pump.poll_iter() {
events.push(event); events.push(event);
} }
} }
events events
} }
pub fn canvas(&mut self) -> &mut sdl2::render::Canvas<sdl2::video::Window> {
&mut self.canvas
}
pub fn width(&self) -> u32 {
self.width
}
pub fn height(&self) -> u32 {
self.height
}
} }
impl Drop for Renderer { impl Drop for Renderer {
fn drop(&mut self) { fn drop(&mut self) {}
// Cleanup handled automatically
}
} }

View File

@@ -1,9 +1,9 @@
//! 視頻播放核心實現 //! 視頻播放核心實現
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use std::collections::VecDeque;
use std::path::Path; use std::path::Path;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::collections::VecDeque;
use crate::player::ffmpeg::{FFmpegDecoder, VideoInfo}; use crate::player::ffmpeg::{FFmpegDecoder, VideoInfo};
@@ -44,12 +44,12 @@ impl VideoPlayer {
pub fn open(&mut self, path: &str) -> Result<()> { pub fn open(&mut self, path: &str) -> Result<()> {
let decoder = FFmpegDecoder::new(Path::new(path)) let decoder = FFmpegDecoder::new(Path::new(path))
.with_context(|| format!("Failed to open video: {}", path))?; .with_context(|| format!("Failed to open video: {}", path))?;
self.info = Some(decoder.get_info()); self.info = Some(decoder.get_info());
self.decoder = Some(decoder); self.decoder = Some(decoder);
self.state = PlayState::Stopped; self.state = PlayState::Stopped;
self.current_frame = 0; self.current_frame = 0;
Ok(()) Ok(())
} }
@@ -73,8 +73,8 @@ impl VideoPlayer {
} }
pub fn seek_frame(&mut self, frame: u64) -> Result<()> { pub fn seek_frame(&mut self, frame: u64) -> Result<()> {
if let Some(ref decoder) = self.decoder { if let Some(ref mut decoder) = self.decoder {
if let Some(info) = &self.info { if let Some(ref info) = self.info {
let timestamp_ms = (frame * 1000) / info.fps as u64; let timestamp_ms = (frame * 1000) / info.fps as u64;
decoder.seek(timestamp_ms)?; decoder.seek(timestamp_ms)?;
self.current_frame = frame; self.current_frame = frame;
@@ -84,9 +84,9 @@ impl VideoPlayer {
} }
pub fn seek_time(&mut self, ms: u64) -> Result<()> { pub fn seek_time(&mut self, ms: u64) -> Result<()> {
if let Some(ref decoder) = self.decoder { if let Some(ref mut decoder) = self.decoder {
decoder.seek(ms)?; decoder.seek(ms)?;
if let Some(info) = &self.info { if let Some(ref info) = self.info {
self.current_frame = (ms * info.fps as u64) / 1000; self.current_frame = (ms * info.fps as u64) / 1000;
} }
} }

View File

@@ -13,25 +13,25 @@ pub enum BridgeMessage {
SeekTime(f64), SeekTime(f64),
StepForward, StepForward,
StepBackward, StepBackward,
// UI state // UI state
SetVolume(f32), SetVolume(f32),
SetMuted(bool), SetMuted(bool),
SetSpeed(f32), SetSpeed(f32),
// Overlay toggles // Overlay toggles
ToggleSubtitle, ToggleSubtitle,
ToggleYolo, ToggleYolo,
ToggleChunks, ToggleChunks,
// Zoom/Pan // Zoom/Pan
Zoom(f32), Zoom(f32),
Pan(f32, f32), Pan(f32, f32),
ResetView, ResetView,
// File operations // File operations
OpenFile(String), OpenFile(String),
// Queries // Queries
GetState, GetState,
GetSubtitle(f64), GetSubtitle(f64),