feat(player): add ASR/YOLO overlays, zoom, and text rendering
- Add TTF text rendering for subtitles and YOLO labels - Implement ASR subtitle display with background - Add YOLO bbox rendering with class labels - Add zoom in/out (+/-) and reset (Backquote) - Add frame/time info display - Fix YOLO metadata parsing for actual file format - Add Shift+Arrow for 1-second seek
This commit is contained in:
277
src/main.rs
277
src/main.rs
@@ -3,8 +3,10 @@
|
||||
//! Unified media player with ASR/YOLO/Chunks overlay support
|
||||
|
||||
use anyhow::Result;
|
||||
use log::{error, info};
|
||||
use log::{error, info, warn};
|
||||
use sdl2::pixels::PixelFormatEnum;
|
||||
use sdl2::rect::Rect;
|
||||
use sdl2::ttf::{self, Font};
|
||||
use std::path::Path;
|
||||
|
||||
mod config;
|
||||
@@ -38,6 +40,11 @@ fn run(config: &Config) -> Result<()> {
|
||||
.video()
|
||||
.map_err(|e| anyhow::anyhow!("Video subsystem failed: {}", e))?;
|
||||
|
||||
let ttf_context = ttf::init().map_err(|e| anyhow::anyhow!("TTF init failed: {}", e))?;
|
||||
let font: Option<Font> = ttf_context
|
||||
.load_font("/System/Library/Fonts/Supplemental/Arial.ttf", 20)
|
||||
.ok();
|
||||
|
||||
let window = video_subsystem
|
||||
.window("MoMentry Playground", config.width, config.height)
|
||||
.position_centered()
|
||||
@@ -53,6 +60,7 @@ fn run(config: &Config) -> Result<()> {
|
||||
|
||||
let mut decoder: Option<FFmpegDecoder> = None;
|
||||
let mut texture: Option<sdl2::render::Texture> = None;
|
||||
let mut video_info = None;
|
||||
let mut asr: Option<AsrLoader> = None;
|
||||
let mut yolo: Option<YoloLoader> = None;
|
||||
|
||||
@@ -61,6 +69,7 @@ fn run(config: &Config) -> Result<()> {
|
||||
let path = Path::new(video_path);
|
||||
let mut dec = FFmpegDecoder::new(path)?;
|
||||
let info = dec.get_info();
|
||||
video_info = Some(info.clone());
|
||||
info!(
|
||||
"Video info: {}x{} @ {:.2}fps, {} frames",
|
||||
info.width, info.height, info.fps, info.frame_count
|
||||
@@ -102,8 +111,7 @@ fn run(config: &Config) -> Result<()> {
|
||||
}
|
||||
|
||||
let mut player_state = PlayerState::default();
|
||||
if let Some(ref dec) = decoder {
|
||||
let info = dec.get_info();
|
||||
if let Some(ref info) = video_info {
|
||||
player_state.total_frames = info.frame_count;
|
||||
player_state.duration_ms = info.duration_ms;
|
||||
player_state.fps = info.fps;
|
||||
@@ -122,8 +130,12 @@ fn run(config: &Config) -> Result<()> {
|
||||
sdl2::event::Event::Quit { .. } => {
|
||||
running = false;
|
||||
}
|
||||
sdl2::event::Event::KeyDown { keycode, .. } => {
|
||||
sdl2::event::Event::KeyDown {
|
||||
keycode, keymod, ..
|
||||
} => {
|
||||
if let Some(key) = keycode {
|
||||
let shift = keymod.intersects(sdl2::keyboard::Mod::LSHIFTMOD)
|
||||
|| keymod.intersects(sdl2::keyboard::Mod::RSHIFTMOD);
|
||||
match key {
|
||||
sdl2::keyboard::Keycode::Escape => running = false,
|
||||
sdl2::keyboard::Keycode::Space => {
|
||||
@@ -136,9 +148,21 @@ fn run(config: &Config) -> Result<()> {
|
||||
}
|
||||
sdl2::keyboard::Keycode::S => {
|
||||
player_state.show_subtitle = !player_state.show_subtitle;
|
||||
info!(
|
||||
"Subtitle: {}",
|
||||
if player_state.show_subtitle {
|
||||
"ON"
|
||||
} else {
|
||||
"OFF"
|
||||
}
|
||||
);
|
||||
}
|
||||
sdl2::keyboard::Keycode::Y => {
|
||||
player_state.show_yolo = !player_state.show_yolo;
|
||||
info!(
|
||||
"YOLO: {}",
|
||||
if player_state.show_yolo { "ON" } else { "OFF" }
|
||||
);
|
||||
}
|
||||
sdl2::keyboard::Keycode::C => {
|
||||
player_state.show_chunks = !player_state.show_chunks;
|
||||
@@ -146,24 +170,63 @@ fn run(config: &Config) -> Result<()> {
|
||||
sdl2::keyboard::Keycode::M => {
|
||||
player_state.muted = !player_state.muted;
|
||||
}
|
||||
sdl2::keyboard::Keycode::F => {
|
||||
// Fullscreen toggle - skip for now to avoid borrow issues
|
||||
}
|
||||
sdl2::keyboard::Keycode::Left => {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame.saturating_sub(1);
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
if shift {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame.saturating_sub(60);
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
}
|
||||
} else {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame.saturating_sub(1);
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
sdl2::keyboard::Keycode::Right => {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame + 1;
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
if shift {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame + 60;
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
}
|
||||
} else {
|
||||
if let Some(ref mut dec) = decoder {
|
||||
let current = player_state.current_frame + 1;
|
||||
dec.seek(
|
||||
((current as f64 / player_state.fps) * 1000.0) as u64,
|
||||
)?;
|
||||
player_state.current_frame = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
sdl2::keyboard::Keycode::Equals | sdl2::keyboard::Keycode::KpPlus => {
|
||||
player_state.zoom = (player_state.zoom * 1.2).min(5.0);
|
||||
}
|
||||
sdl2::keyboard::Keycode::Minus | sdl2::keyboard::Keycode::KpMinus => {
|
||||
player_state.zoom = (player_state.zoom / 1.2).max(0.5);
|
||||
}
|
||||
sdl2::keyboard::Keycode::Backquote => {
|
||||
player_state.zoom = 1.0;
|
||||
player_state.pan_x = 0.0;
|
||||
player_state.pan_y = 0.0;
|
||||
}
|
||||
sdl2::keyboard::Keycode::R => {
|
||||
player_state.zoom = 1.0;
|
||||
player_state.pan_x = 0.0;
|
||||
player_state.pan_y = 0.0;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
@@ -180,33 +243,14 @@ fn run(config: &Config) -> Result<()> {
|
||||
if let Some(ref mut tex) = texture {
|
||||
match dec.read_frame() {
|
||||
Ok(Some(data)) => {
|
||||
let info = dec.get_info();
|
||||
player_state.current_frame += 1;
|
||||
player_state.current_time_ms =
|
||||
((player_state.current_frame as f64 / info.fps) * 1000.0) as u64;
|
||||
if let Some(ref info) = video_info {
|
||||
player_state.current_frame += 1;
|
||||
player_state.current_time_ms =
|
||||
((player_state.current_frame as f64 / info.fps) * 1000.0)
|
||||
as u64;
|
||||
|
||||
tex.update(None, &data, (info.width * 3) as usize)
|
||||
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
|
||||
|
||||
canvas
|
||||
.copy(tex, None, None)
|
||||
.map_err(|e| anyhow::anyhow!("Copy failed: {}", e))?;
|
||||
|
||||
if player_state.show_yolo {
|
||||
if let Some(ref mut yolo_loader) = yolo {
|
||||
let detections =
|
||||
yolo_loader.get_detections(player_state.current_frame);
|
||||
for det in detections {
|
||||
let x1 = det.x1 as i32;
|
||||
let y1 = det.y1 as i32;
|
||||
let w = (det.x2 - det.x1) as u32;
|
||||
let h = (det.y2 - det.y1) as u32;
|
||||
|
||||
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
|
||||
let _ =
|
||||
canvas.draw_rect(sdl2::rect::Rect::new(x1, y1, w, h));
|
||||
}
|
||||
}
|
||||
tex.update(None, &data, (info.width * 3) as usize)
|
||||
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
@@ -214,7 +258,7 @@ fn run(config: &Config) -> Result<()> {
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Frame read error: {}", e);
|
||||
warn!("Frame read error: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -222,6 +266,140 @@ fn run(config: &Config) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref mut tex) = texture {
|
||||
let dst = if player_state.zoom != 1.0 {
|
||||
let info = video_info.as_ref().unwrap();
|
||||
let w = (info.width as f32 * player_state.zoom) as u32;
|
||||
let h = (info.height as f32 * player_state.zoom) as u32;
|
||||
let x = ((config.width as i32 - w as i32) / 2) as i32 + player_state.pan_x as i32;
|
||||
let y = ((config.height as i32 - h as i32) / 2) as i32 + player_state.pan_y as i32;
|
||||
Rect::new(x, y, w, h)
|
||||
} else {
|
||||
Rect::new(0, 0, 0, 0)
|
||||
};
|
||||
|
||||
if player_state.zoom == 1.0 {
|
||||
canvas.copy(tex, None, None).ok();
|
||||
} else {
|
||||
canvas.copy(tex, None, Some(dst)).ok();
|
||||
}
|
||||
}
|
||||
|
||||
if player_state.show_yolo {
|
||||
if let Some(ref mut yolo_loader) = yolo {
|
||||
let detections = yolo_loader.get_detections(player_state.current_frame);
|
||||
for det in detections {
|
||||
let x1 = (det.x1 as f32 * player_state.zoom) as i32
|
||||
+ player_state.pan_x as i32
|
||||
+ ((config.width as i32
|
||||
- video_info.as_ref().map(|i| i.width as i32).unwrap_or(0))
|
||||
/ 2);
|
||||
let y1 = (det.y1 as f32 * player_state.zoom) as i32
|
||||
+ player_state.pan_y as i32
|
||||
+ ((config.height as i32
|
||||
- video_info.as_ref().map(|i| i.height as i32).unwrap_or(0))
|
||||
/ 2);
|
||||
let w = ((det.x2 - det.x1) as f32 * player_state.zoom) as u32;
|
||||
let h = ((det.y2 - det.y1) as f32 * player_state.zoom) as u32;
|
||||
|
||||
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 255, 0));
|
||||
let _ = canvas.draw_rect(Rect::new(x1, y1, w, h));
|
||||
|
||||
if let Some(ref f) = font {
|
||||
let label = format!("{} {:.0}%", det.class_name, det.confidence * 100.0);
|
||||
if let Ok(surface) =
|
||||
f.render(&label).solid(sdl2::pixels::Color::RGB(0, 255, 0))
|
||||
{
|
||||
let tex_label =
|
||||
texture_creator.create_texture_from_surface(&surface).ok();
|
||||
if let Some(tex_label) = tex_label {
|
||||
let label_rect = Rect::new(x1, y1 - 24, w.min(150), 24);
|
||||
canvas.copy(&tex_label, None, Some(label_rect)).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if player_state.show_subtitle {
|
||||
if let Some(ref asr_loader) = asr {
|
||||
if let Some(text) = asr_loader.get_text_at(player_state.current_time_ms as f64) {
|
||||
if let Some(ref f) = font {
|
||||
if let Ok(surface) = f
|
||||
.render(&text)
|
||||
.blended(sdl2::pixels::Color::RGBA(255, 255, 255, 255))
|
||||
{
|
||||
let tex_label =
|
||||
texture_creator.create_texture_from_surface(&surface).ok();
|
||||
if let Some(tex_label) = tex_label {
|
||||
let query = tex_label.query();
|
||||
let x = (config.width - query.width) / 2;
|
||||
let y = config.height - query.height - 40;
|
||||
let rect = Rect::new(x as i32, y as i32, query.width, query.height);
|
||||
canvas.set_draw_color(sdl2::pixels::Color::RGBA(0, 0, 0, 180));
|
||||
let _ = canvas.fill_rect(Rect::new(
|
||||
rect.x() - 10,
|
||||
rect.y() - 5,
|
||||
rect.width() + 20,
|
||||
rect.height() + 10,
|
||||
));
|
||||
canvas.copy(&tex_label, None, Some(rect)).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref f) = font {
|
||||
let time_str = format_time(player_state.current_time_ms);
|
||||
let frame_str = format!(
|
||||
"Frame: {}/{} ({:.1}fps)",
|
||||
player_state.current_frame, player_state.total_frames, player_state.fps
|
||||
);
|
||||
let status_parts = vec![
|
||||
format!("Time: {}", time_str),
|
||||
frame_str,
|
||||
if player_state.show_subtitle {
|
||||
"Subtitle: ON".to_string()
|
||||
} else {
|
||||
String::new()
|
||||
},
|
||||
if player_state.show_yolo {
|
||||
"YOLO: ON".to_string()
|
||||
} else {
|
||||
String::new()
|
||||
},
|
||||
if player_state.zoom != 1.0 {
|
||||
format!("Zoom: {:.1}x", player_state.zoom)
|
||||
} else {
|
||||
String::new()
|
||||
},
|
||||
];
|
||||
|
||||
let y_offset = 10;
|
||||
for (i, part) in status_parts.iter().enumerate() {
|
||||
if !part.is_empty() {
|
||||
if let Ok(surface) = f
|
||||
.render(part)
|
||||
.solid(sdl2::pixels::Color::RGB(200, 200, 200))
|
||||
{
|
||||
let tex_label = texture_creator.create_texture_from_surface(&surface).ok();
|
||||
if let Some(tex_label) = tex_label {
|
||||
let rect = Rect::new(
|
||||
10,
|
||||
y_offset + (i as i32 * 22),
|
||||
surface.width(),
|
||||
surface.height(),
|
||||
);
|
||||
canvas.copy(&tex_label, None, Some(rect)).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
canvas.present();
|
||||
|
||||
std::thread::sleep(std::time::Duration::from_millis(16));
|
||||
@@ -230,3 +408,16 @@ fn run(config: &Config) -> Result<()> {
|
||||
info!("Application closed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn format_time(ms: u64) -> String {
|
||||
let total_secs = ms / 1000;
|
||||
let hours = total_secs / 3600;
|
||||
let minutes = (total_secs % 3600) / 60;
|
||||
let seconds = total_secs % 60;
|
||||
let millis = ms % 1000;
|
||||
if hours > 0 {
|
||||
format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
|
||||
} else {
|
||||
format!("{:02}:{:02}.{:03}", minutes, seconds, millis)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user