feat(player): add natural language search with subtitle index
This commit is contained in:
96
src/search/mod.rs
Normal file
96
src/search/mod.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use anyhow::{Context, Result};
|
||||
use log::{error, info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchResult {
|
||||
pub frame: u64,
|
||||
pub time_ms: u64,
|
||||
pub text: String,
|
||||
pub score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VectorSearcher {
|
||||
qdrant_url: String,
|
||||
collection: String,
|
||||
asr_loader: Option<super::overlay::AsrLoader>,
|
||||
}
|
||||
|
||||
impl VectorSearcher {
|
||||
pub fn new(qdrant_url: &str, collection: &str) -> Self {
|
||||
Self {
|
||||
qdrant_url: qdrant_url.to_string(),
|
||||
collection: collection.to_string(),
|
||||
asr_loader: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_asr(&mut self, path: &Path) -> Result<()> {
|
||||
let loader =
|
||||
super::overlay::AsrLoader::load(path).context("Failed to load ASR for search")?;
|
||||
info!(
|
||||
"Loaded ASR with {} segments for search",
|
||||
loader.segment_count()
|
||||
);
|
||||
self.asr_loader = Some(loader);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn search(&self, query: &str) -> Vec<SearchResult> {
|
||||
info!("Searching for: {}", query);
|
||||
|
||||
if let Some(ref asr) = self.asr_loader {
|
||||
let query_lower = query.to_lowercase();
|
||||
let mut results: Vec<SearchResult> = Vec::new();
|
||||
|
||||
for segment in asr.get_all_segments() {
|
||||
let text_lower = segment.text.to_lowercase();
|
||||
if text_lower.contains(&query_lower) {
|
||||
let score = self.calculate_score(&query_lower, &text_lower);
|
||||
results.push(SearchResult {
|
||||
frame: (segment.start * 60.0) as u64,
|
||||
time_ms: (segment.start * 1000.0) as u64,
|
||||
text: segment.text.clone(),
|
||||
score,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
results.truncate(10);
|
||||
|
||||
info!("Found {} results", results.len());
|
||||
return results;
|
||||
}
|
||||
|
||||
warn!("No ASR loaded for search");
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn calculate_score(&self, query: &str, text: &str) -> f32 {
|
||||
let query_words: Vec<&str> = query.split_whitespace().collect();
|
||||
let text_words: Vec<&str> = text.split_whitespace().collect();
|
||||
|
||||
let mut matches = 0;
|
||||
for qw in &query_words {
|
||||
for tw in &text_words {
|
||||
if tw.contains(qw) {
|
||||
matches += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(matches as f32) / (query_words.len() as f32)
|
||||
}
|
||||
|
||||
pub fn is_available(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_searcher() -> VectorSearcher {
|
||||
VectorSearcher::new("http://localhost:6333", "AccusysDB")
|
||||
}
|
||||
Reference in New Issue
Block a user