97 lines
2.7 KiB
Rust
97 lines
2.7 KiB
Rust
use anyhow::{Context, Result};
|
|
use log::{error, info, warn};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::path::Path;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct SearchResult {
|
|
pub frame: u64,
|
|
pub time_ms: u64,
|
|
pub text: String,
|
|
pub score: f32,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct VectorSearcher {
|
|
qdrant_url: String,
|
|
collection: String,
|
|
asr_loader: Option<super::overlay::AsrLoader>,
|
|
}
|
|
|
|
impl VectorSearcher {
|
|
pub fn new(qdrant_url: &str, collection: &str) -> Self {
|
|
Self {
|
|
qdrant_url: qdrant_url.to_string(),
|
|
collection: collection.to_string(),
|
|
asr_loader: None,
|
|
}
|
|
}
|
|
|
|
pub fn load_asr(&mut self, path: &Path) -> Result<()> {
|
|
let loader =
|
|
super::overlay::AsrLoader::load(path).context("Failed to load ASR for search")?;
|
|
info!(
|
|
"Loaded ASR with {} segments for search",
|
|
loader.segment_count()
|
|
);
|
|
self.asr_loader = Some(loader);
|
|
Ok(())
|
|
}
|
|
|
|
pub fn search(&self, query: &str) -> Vec<SearchResult> {
|
|
info!("Searching for: {}", query);
|
|
|
|
if let Some(ref asr) = self.asr_loader {
|
|
let query_lower = query.to_lowercase();
|
|
let mut results: Vec<SearchResult> = Vec::new();
|
|
|
|
for segment in asr.get_all_segments() {
|
|
let text_lower = segment.text.to_lowercase();
|
|
if text_lower.contains(&query_lower) {
|
|
let score = self.calculate_score(&query_lower, &text_lower);
|
|
results.push(SearchResult {
|
|
frame: (segment.start * 60.0) as u64,
|
|
time_ms: (segment.start * 1000.0) as u64,
|
|
text: segment.text.clone(),
|
|
score,
|
|
});
|
|
}
|
|
}
|
|
|
|
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
|
results.truncate(10);
|
|
|
|
info!("Found {} results", results.len());
|
|
return results;
|
|
}
|
|
|
|
warn!("No ASR loaded for search");
|
|
Vec::new()
|
|
}
|
|
|
|
fn calculate_score(&self, query: &str, text: &str) -> f32 {
|
|
let query_words: Vec<&str> = query.split_whitespace().collect();
|
|
let text_words: Vec<&str> = text.split_whitespace().collect();
|
|
|
|
let mut matches = 0;
|
|
for qw in &query_words {
|
|
for tw in &text_words {
|
|
if tw.contains(qw) {
|
|
matches += 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
(matches as f32) / (query_words.len() as f32)
|
|
}
|
|
|
|
pub fn is_available(&self) -> bool {
|
|
true
|
|
}
|
|
}
|
|
|
|
pub fn create_searcher() -> VectorSearcher {
|
|
VectorSearcher::new("http://localhost:6333", "AccusysDB")
|
|
}
|