feat(player): add ASR/YOLO overlays, zoom, and text rendering

- Add TTF text rendering for subtitles and YOLO labels
- Implement ASR subtitle display with background
- Add YOLO bbox rendering with class labels
- Add zoom in/out (+/-) and reset (Backquote)
- Add frame/time info display
- Fix YOLO metadata parsing for actual file format
- Add Shift+Arrow for 1-second seek
This commit is contained in:
accusys
2026-03-19 01:33:46 +08:00
parent 0b75987fd0
commit 5587e6a67a
5272 changed files with 103480 additions and 45 deletions

View File

@@ -3,8 +3,10 @@
//! Unified media player with ASR/YOLO/Chunks overlay support
use anyhow::Result;
use log::{error, info};
use log::{error, info, warn};
use sdl2::pixels::PixelFormatEnum;
use sdl2::rect::Rect;
use sdl2::ttf::{self, Font};
use std::path::Path;
mod config;
@@ -38,6 +40,11 @@ fn run(config: &Config) -> Result<()> {
.video()
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
let ttf_context = ttf::init().map_err(|e| anyhow::anyhow!("TTF init failed: {}", e))?;
let font: Option<Font> = ttf_context
.load_font("/System/Library/Fonts/Supplemental/Arial.ttf", 20)
.ok();
let window = video_subsystem
.window("MoMentry Playground", config.width, config.height)
.position_centered()
@@ -53,6 +60,7 @@ fn run(config: &Config) -> Result<()> {
let mut decoder: Option<FFmpegDecoder> = None;
let mut texture: Option<sdl2::render::Texture> = None;
let mut video_info = None;
let mut asr: Option<AsrLoader> = None;
let mut yolo: Option<YoloLoader> = None;
@@ -61,6 +69,7 @@ fn run(config: &Config) -> Result<()> {
let path = Path::new(video_path);
let mut dec = FFmpegDecoder::new(path)?;
let info = dec.get_info();
video_info = Some(info.clone());
info!(
"Video info: {}x{} @ {:.2}fps, {} frames",
info.width, info.height, info.fps, info.frame_count
@@ -102,8 +111,7 @@ fn run(config: &Config) -> Result<()> {
}
let mut player_state = PlayerState::default();
if let Some(ref dec) = decoder {
let info = dec.get_info();
if let Some(ref info) = video_info {
player_state.total_frames = info.frame_count;
player_state.duration_ms = info.duration_ms;
player_state.fps = info.fps;
@@ -122,8 +130,12 @@ fn run(config: &Config) -> Result<()> {
sdl2::event::Event::Quit { .. } => {
running = false;
}
sdl2::event::Event::KeyDown { keycode, .. } => {
sdl2::event::Event::KeyDown {
keycode, keymod, ..
} => {
if let Some(key) = keycode {
let shift = keymod.intersects(sdl2::keyboard::Mod::LSHIFTMOD)
|| keymod.intersects(sdl2::keyboard::Mod::RSHIFTMOD);
match key {
sdl2::keyboard::Keycode::Escape => running = false,
sdl2::keyboard::Keycode::Space => {
@@ -136,9 +148,21 @@ fn run(config: &Config) -> Result<()> {
}
sdl2::keyboard::Keycode::S => {
player_state.show_subtitle = !player_state.show_subtitle;
info!(
"Subtitle: {}",
if player_state.show_subtitle {
"ON"
} else {
"OFF"
}
);
}
sdl2::keyboard::Keycode::Y => {
player_state.show_yolo = !player_state.show_yolo;
info!(
"YOLO: {}",
if player_state.show_yolo { "ON" } else { "OFF" }
);
}
sdl2::keyboard::Keycode::C => {
player_state.show_chunks = !player_state.show_chunks;
@@ -146,24 +170,63 @@ fn run(config: &Config) -> Result<()> {
sdl2::keyboard::Keycode::M => {
player_state.muted = !player_state.muted;
}
sdl2::keyboard::Keycode::F => {
// Fullscreen toggle - skip for now to avoid borrow issues
}
sdl2::keyboard::Keycode::Left => {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame.saturating_sub(1);
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
if shift {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame.saturating_sub(60);
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
} else {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame.saturating_sub(1);
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
}
}
sdl2::keyboard::Keycode::Right => {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame + 1;
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
if shift {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame + 60;
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
} else {
if let Some(ref mut dec) = decoder {
let current = player_state.current_frame + 1;
dec.seek(
((current as f64 / player_state.fps) * 1000.0) as u64,
)?;
player_state.current_frame = current;
}
}
}
sdl2::keyboard::Keycode::Equals | sdl2::keyboard::Keycode::KpPlus => {
player_state.zoom = (player_state.zoom * 1.2).min(5.0);
}
sdl2::keyboard::Keycode::Minus | sdl2::keyboard::Keycode::KpMinus => {
player_state.zoom = (player_state.zoom / 1.2).max(0.5);
}
sdl2::keyboard::Keycode::Backquote => {
player_state.zoom = 1.0;
player_state.pan_x = 0.0;
player_state.pan_y = 0.0;
}
sdl2::keyboard::Keycode::R => {
player_state.zoom = 1.0;
player_state.pan_x = 0.0;
player_state.pan_y = 0.0;
}
_ => {}
}
}
@@ -180,33 +243,14 @@ fn run(config: &Config) -> Result<()> {
if let Some(ref mut tex) = texture {
match dec.read_frame() {
Ok(Some(data)) => {
let info = dec.get_info();
player_state.current_frame += 1;
player_state.current_time_ms =
((player_state.current_frame as f64 / info.fps) * 1000.0) as u64;
if let Some(ref info) = video_info {
player_state.current_frame += 1;
player_state.current_time_ms =
((player_state.current_frame as f64 / info.fps) * 1000.0)
as u64;
tex.update(None, &data, (info.width * 3) as usize)
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
canvas
.copy(tex, None, None)
.map_err(|e| anyhow::anyhow!("Copy failed: {}", e))?;
if player_state.show_yolo {
if let Some(ref mut yolo_loader) = yolo {
let detections =
yolo_loader.get_detections(player_state.current_frame);
for det in detections {
let x1 = det.x1 as i32;
let y1 = det.y1 as i32;
let w = (det.x2 - det.x1) as u32;
let h = (det.y2 - det.y1) as u32;
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ =
canvas.draw_rect(sdl2::rect::Rect::new(x1, y1, w, h));
}
}
tex.update(None, &data, (info.width * 3) as usize)
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
}
}
Ok(None) => {
@@ -214,7 +258,7 @@ fn run(config: &Config) -> Result<()> {
break;
}
Err(e) => {
error!("Frame read error: {}", e);
warn!("Frame read error: {}", e);
break;
}
}
@@ -222,6 +266,140 @@ fn run(config: &Config) -> Result<()> {
}
}
if let Some(ref mut tex) = texture {
let dst = if player_state.zoom != 1.0 {
let info = video_info.as_ref().unwrap();
let w = (info.width as f32 * player_state.zoom) as u32;
let h = (info.height as f32 * player_state.zoom) as u32;
let x = ((config.width as i32 - w as i32) / 2) as i32 + player_state.pan_x as i32;
let y = ((config.height as i32 - h as i32) / 2) as i32 + player_state.pan_y as i32;
Rect::new(x, y, w, h)
} else {
Rect::new(0, 0, 0, 0)
};
if player_state.zoom == 1.0 {
canvas.copy(tex, None, None).ok();
} else {
canvas.copy(tex, None, Some(dst)).ok();
}
}
if player_state.show_yolo {
if let Some(ref mut yolo_loader) = yolo {
let detections = yolo_loader.get_detections(player_state.current_frame);
for det in detections {
let x1 = (det.x1 as f32 * player_state.zoom) as i32
+ player_state.pan_x as i32
+ ((config.width as i32
- video_info.as_ref().map(|i| i.width as i32).unwrap_or(0))
/ 2);
let y1 = (det.y1 as f32 * player_state.zoom) as i32
+ player_state.pan_y as i32
+ ((config.height as i32
- video_info.as_ref().map(|i| i.height as i32).unwrap_or(0))
/ 2);
let w = ((det.x2 - det.x1) as f32 * player_state.zoom) as u32;
let h = ((det.y2 - det.y1) as f32 * player_state.zoom) as u32;
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
let _ = canvas.draw_rect(Rect::new(x1, y1, w, h));
if let Some(ref f) = font {
let label = format!("{} {:.0}%", det.class_name, det.confidence * 100.0);
if let Ok(surface) =
f.render(&label).solid(sdl2::pixels::Color::RGB(0, 255, 0))
{
let tex_label =
texture_creator.create_texture_from_surface(&surface).ok();
if let Some(tex_label) = tex_label {
let label_rect = Rect::new(x1, y1 - 24, w.min(150), 24);
canvas.copy(&tex_label, None, Some(label_rect)).ok();
}
}
}
}
}
}
if player_state.show_subtitle {
if let Some(ref asr_loader) = asr {
if let Some(text) = asr_loader.get_text_at(player_state.current_time_ms as f64) {
if let Some(ref f) = font {
if let Ok(surface) = f
.render(&text)
.blended(sdl2::pixels::Color::RGBA(255, 255, 255, 255))
{
let tex_label =
texture_creator.create_texture_from_surface(&surface).ok();
if let Some(tex_label) = tex_label {
let query = tex_label.query();
let x = (config.width - query.width) / 2;
let y = config.height - query.height - 40;
let rect = Rect::new(x as i32, y as i32, query.width, query.height);
canvas.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180));
let _ = canvas.fill_rect(Rect::new(
rect.x() - 10,
rect.y() - 5,
rect.width() + 20,
rect.height() + 10,
));
canvas.copy(&tex_label, None, Some(rect)).ok();
}
}
}
}
}
}
if let Some(ref f) = font {
let time_str = format_time(player_state.current_time_ms);
let frame_str = format!(
"Frame: {}/{} ({:.1}fps)",
player_state.current_frame, player_state.total_frames, player_state.fps
);
let status_parts = vec![
format!("Time: {}", time_str),
frame_str,
if player_state.show_subtitle {
"Subtitle: ON".to_string()
} else {
String::new()
},
if player_state.show_yolo {
"YOLO: ON".to_string()
} else {
String::new()
},
if player_state.zoom != 1.0 {
format!("Zoom: {:.1}x", player_state.zoom)
} else {
String::new()
},
];
let y_offset = 10;
for (i, part) in status_parts.iter().enumerate() {
if !part.is_empty() {
if let Ok(surface) = f
.render(part)
.solid(sdl2::pixels::Color::RGB(200, 200, 200))
{
let tex_label = texture_creator.create_texture_from_surface(&surface).ok();
if let Some(tex_label) = tex_label {
let rect = Rect::new(
10,
y_offset + (i as i32 * 22),
surface.width(),
surface.height(),
);
canvas.copy(&tex_label, None, Some(rect)).ok();
}
}
}
}
}
canvas.present();
std::thread::sleep(std::time::Duration::from_millis(16));
@@ -230,3 +408,16 @@ fn run(config: &Config) -> Result<()> {
info!("Application closed");
Ok(())
}
fn format_time(ms: u64) -> String {
let total_secs = ms / 1000;
let hours = total_secs / 3600;
let minutes = (total_secs % 3600) / 60;
let seconds = total_secs % 60;
let millis = ms % 1000;
if hours > 0 {
format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
} else {
format!("{:02}:{:02}.{:03}", minutes, seconds, millis)
}
}