Initial commit: rust-scribe transcription tool
This commit is contained in:
14
.cargo/config.toml
Normal file
14
.cargo/config.toml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
[target.aarch64-apple-darwin]
|
||||||
|
rustflags = [
|
||||||
|
"-L", "/opt/homebrew/lib",
|
||||||
|
"-l", "c++",
|
||||||
|
# 正確写法:使用 -C link-arg= 將參數傳遞給連結器
|
||||||
|
"-C", "link-arg=-framework",
|
||||||
|
"-C", "link-arg=Metal",
|
||||||
|
"-C", "link-arg=-framework",
|
||||||
|
"-C", "link-arg=Foundation",
|
||||||
|
"-C", "link-arg=-framework",
|
||||||
|
"-C", "link-arg=QuartzCore",
|
||||||
|
"-C", "link-arg=-framework",
|
||||||
|
"-C", "link-arg=CoreGraphics"
|
||||||
|
]
|
||||||
21
.cargo/config.toml.bak
Normal file
21
.cargo/config.toml.bak
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
[target.aarch64-apple-darwin]
|
||||||
|
rustflags = [
|
||||||
|
"-L", "/opt/homebrew/lib",
|
||||||
|
"-l", "c++",
|
||||||
|
# 新增:連結 Metal 相關框架 (解決 Undefined symbols 錯誤)
|
||||||
|
"-framework", "Metal",
|
||||||
|
"-framework", "Foundation",
|
||||||
|
"-framework", "QuartzCore",
|
||||||
|
"-framework", "CoreGraphics"
|
||||||
|
]
|
||||||
|
|
||||||
|
# 如果是 Intel Mac,取消下面註解並使用
|
||||||
|
# [target.x86_64-apple-darwin]
|
||||||
|
# rustflags = [
|
||||||
|
# "-L", "/usr/local/lib",
|
||||||
|
# "-l", "c++",
|
||||||
|
# "-framework", "Metal",
|
||||||
|
# "-framework", "Foundation",
|
||||||
|
# "-framework", "QuartzCore",
|
||||||
|
# "-framework", "CoreGraphics"
|
||||||
|
# ]
|
||||||
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/target
|
||||||
173
AGENTS.md
Normal file
173
AGENTS.md
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
# AGENTS.md - rust-scribe
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
rust-scribe is a high-performance video/audio transcriber with timestamps using Rust and Whisper. It extracts audio from video files and transcribes them using the Whisper.cpp library.
|
||||||
|
|
||||||
|
## Build Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the project (debug mode)
|
||||||
|
cargo build
|
||||||
|
|
||||||
|
# Build release version
|
||||||
|
cargo build --release
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
cargo run --release -- <input_file> --model <model_path> [options]
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
cargo run --release -- video.mp4 --model models/ggml-base.bin --language zh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all tests
|
||||||
|
cargo test
|
||||||
|
|
||||||
|
# Run a single test by name
|
||||||
|
cargo test <test_name>
|
||||||
|
|
||||||
|
# Run tests with output
|
||||||
|
cargo test -- --nocapture
|
||||||
|
```
|
||||||
|
|
||||||
|
### Linting and Formatting
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run clippy for linting
|
||||||
|
cargo clippy
|
||||||
|
|
||||||
|
# Fix clippy suggestions automatically
|
||||||
|
cargo clippy --fix
|
||||||
|
|
||||||
|
# Format code
|
||||||
|
cargo fmt
|
||||||
|
|
||||||
|
# Check formatting
|
||||||
|
cargo fmt --check
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- **ffmpeg-next** (v8.0): FFmpeg bindings for audio extraction and resampling
|
||||||
|
- **whisper-rs** (v0.12): Rust bindings for Whisper.cpp
|
||||||
|
- **whisper-rs-sys** (v0.10): Low-level Whisper bindings
|
||||||
|
- **clap** (v4.5): CLI argument parsing
|
||||||
|
- **anyhow** (v1.0): Error handling
|
||||||
|
- **ndarray** (v0.15): Array operations
|
||||||
|
|
||||||
|
## Code Style Guidelines
|
||||||
|
|
||||||
|
### Formatting
|
||||||
|
- Use `cargo fmt` for consistent formatting
|
||||||
|
- 4-space indentation (Rust default)
|
||||||
|
- Maximum line length: 100 characters (default)
|
||||||
|
|
||||||
|
### Imports
|
||||||
|
- Group imports by crate: std → external → local
|
||||||
|
- Use absolute paths with `crate::` for internal modules
|
||||||
|
- Prefer bringing traits into scope when using them
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use std::path::Path;
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use clap::Parser;
|
||||||
|
use ffmpeg_next as ffmpeg;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Naming Conventions
|
||||||
|
- **Variables/functions**: snake_case (e.g., `extract_audio_to_f32`, `audio_data`)
|
||||||
|
- **Types/Enums**: PascalCase (e.g., `Args`, `WhisperContext`)
|
||||||
|
- **Constants**: SCREAMING_SNAKE_CASE (e.g., `WHISPER_SAMPLE_RATE`)
|
||||||
|
- **Files**: snake_case (e.g., `main.rs`)
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
- Use `anyhow::Result<T>` for application-level error handling
|
||||||
|
- Use `?` operator for propagating errors
|
||||||
|
- Use `Context` trait for adding context to errors
|
||||||
|
- Use `anyhow::bail!` for early returns with errors
|
||||||
|
- Provide descriptive error messages in Chinese or English
|
||||||
|
|
||||||
|
```rust
|
||||||
|
fn load_config() -> Result<Config> {
|
||||||
|
let file = File::open("config.toml")
|
||||||
|
.context("Failed to open config file")?;
|
||||||
|
// ...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Unsafe Code
|
||||||
|
- Minimize unsafe code; isolate it in small, well-documented functions
|
||||||
|
- Use `unsafe` block only when necessary (e.g., FFI callbacks)
|
||||||
|
- Document preconditions and invariants
|
||||||
|
|
||||||
|
```rust
|
||||||
|
unsafe extern "C" fn progress_callback(...) {
|
||||||
|
// Document what this callback does
|
||||||
|
// Keep unsafe block minimal
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
- Add doc comments (`///`) for public functions
|
||||||
|
- Document parameters and return values
|
||||||
|
- Include usage examples for complex functions
|
||||||
|
|
||||||
|
### Performance Considerations
|
||||||
|
- Use `AtomicU64`/`AtomicBool` for global state in callbacks
|
||||||
|
- Pre-allocate vectors with `Vec::with_capacity()` when size is known
|
||||||
|
- Use `saturating_*` operations to prevent overflow
|
||||||
|
- Reuse objects instead of creating new ones in loops
|
||||||
|
|
||||||
|
### Type Annotations
|
||||||
|
- Prefer explicit types for function signatures
|
||||||
|
- Use type inference for obvious local variables
|
||||||
|
- Use primitive types (`u32`, `f64`, etc.) over aliases
|
||||||
|
|
||||||
|
### Control Flow
|
||||||
|
- Use early returns to reduce nesting
|
||||||
|
- Prefer `?` over `match` for simple error propagation
|
||||||
|
- Use `if let` for optional values when pattern matching is simple
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
rust-scribe/
|
||||||
|
├── src/
|
||||||
|
│ └── main.rs # Main application code
|
||||||
|
├── models/ # Whisper model files
|
||||||
|
├── Cargo.toml # Project manifest
|
||||||
|
└── .cargo/
|
||||||
|
└── config.toml # Cargo configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### CLI Arguments
|
||||||
|
- `input_file` (positional): Path to video/audio file
|
||||||
|
- `--model`: Path to Whisper model file
|
||||||
|
- `--language`: Target language (optional, auto-detects if not specified)
|
||||||
|
- `--verbose`: Enable verbose output
|
||||||
|
|
||||||
|
### Model Requirements
|
||||||
|
Place Whisper model files (e.g., `ggml-base.bin`) in the `models/` directory.
|
||||||
|
|
||||||
|
## Common Tasks
|
||||||
|
|
||||||
|
### Adding a New Dependency
|
||||||
|
Add to `[dependencies]` section in `Cargo.toml`:
|
||||||
|
```toml
|
||||||
|
package_name = "version"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adding a New Feature
|
||||||
|
1. Implement the feature in a new function in `src/main.rs`
|
||||||
|
2. Add CLI argument if needed in `Args` struct
|
||||||
|
3. Test with sample audio/video files
|
||||||
|
|
||||||
|
### Debugging
|
||||||
|
- Use `eprintln!` for debug output (goes to stderr)
|
||||||
|
- Use `println!` for progress messages
|
||||||
|
- Enable `--verbose` flag for Whisper debug output
|
||||||
718
Cargo.lock
generated
Normal file
718
Cargo.lock
generated
Normal file
@@ -0,0 +1,718 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.6.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"once_cell_polyfill",
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.102"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.69.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools 0.12.1",
|
||||||
|
"lazy_static",
|
||||||
|
"lazycell",
|
||||||
|
"log",
|
||||||
|
"prettyplease",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash 1.1.0",
|
||||||
|
"shlex",
|
||||||
|
"syn",
|
||||||
|
"which",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.72.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"clang-sys",
|
||||||
|
"itertools 0.13.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash 2.1.1",
|
||||||
|
"shlex",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "2.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.2.56"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
|
||||||
|
dependencies = [
|
||||||
|
"find-msvc-tools",
|
||||||
|
"shlex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.5.60"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.60"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.5.55"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cmake"
|
||||||
|
version = "0.1.57"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "errno"
|
||||||
|
version = "0.3.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ffmpeg-next"
|
||||||
|
version = "8.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d658424d233cbd993a972dd73a66ca733acd12a494c68995c9ac32ae1fe65b40"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"ffmpeg-sys-next",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ffmpeg-sys-next"
|
||||||
|
version = "8.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9bca20aa4ee774fe384c2490096c122b0b23cf524a9910add0686691003d797b"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen 0.72.1",
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"num_cpus",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "find-msvc-tools"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fs_extra"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "home"
|
||||||
|
version = "0.5.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazycell"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.182"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.4.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.29"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matrixmultiply"
|
||||||
|
version = "0.3.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"rawpointer",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "minimal-lexical"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndarray"
|
||||||
|
version = "0.15.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
|
||||||
|
dependencies = [
|
||||||
|
"matrixmultiply",
|
||||||
|
"num-complex",
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
"rawpointer",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "7.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"minimal-lexical",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-complex"
|
||||||
|
version = "0.4.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-integer"
|
||||||
|
version = "0.1.46"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.17.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.21.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell_polyfill"
|
||||||
|
version = "1.70.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prettyplease"
|
||||||
|
version = "0.2.37"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.106"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rawpointer"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rust-scribe"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap",
|
||||||
|
"ffmpeg-next",
|
||||||
|
"ndarray",
|
||||||
|
"whisper-rs",
|
||||||
|
"whisper-rs-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "0.38.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shlex"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.117"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "vcpkg"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "which"
|
||||||
|
version = "4.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"home",
|
||||||
|
"once_cell",
|
||||||
|
"rustix",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "whisper-rs"
|
||||||
|
version = "0.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c597ac8a9d5c4719fee232abc871da184ea50a4fea38d2d00348fd95072b2b0"
|
||||||
|
dependencies = [
|
||||||
|
"whisper-rs-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "whisper-rs-sys"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d22f00ed0995463eecc34ef89905845f6bf6fd37ea70789fed180520050da8f8"
|
||||||
|
dependencies = [
|
||||||
|
"bindgen 0.69.5",
|
||||||
|
"cfg-if",
|
||||||
|
"cmake",
|
||||||
|
"fs_extra",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.61.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
34
Cargo.toml
Normal file
34
Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
[package]
|
||||||
|
name = "rust-scribe"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
description = "A high-performance video/audio transcriber with timestamps using Rust and Whisper."
|
||||||
|
authors = ["Warren Lo"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# FFmpeg 綁定:用於音頻提取和重採樣
|
||||||
|
ffmpeg-next = "8.0"
|
||||||
|
|
||||||
|
# Whisper.cpp 的 Rust 綁定:核心轉寫引擎
|
||||||
|
whisper-rs = "0.12"
|
||||||
|
|
||||||
|
whisper-rs-sys = "0.10"
|
||||||
|
|
||||||
|
# 用於命令行參數解析 (比手動解析更專業)
|
||||||
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
|
|
||||||
|
# 用於處理錯誤
|
||||||
|
anyhow = "1.0"
|
||||||
|
|
||||||
|
# 可選:如果 whisper-rs 需要額外的數學運算支持
|
||||||
|
ndarray = "0.15"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# --- 新增以下內容 ---
|
||||||
|
[target.'cfg(target_os = "macos")'.dependencies]
|
||||||
|
# 如果未來需要顯式依賴某些 crate 可放在這裡
|
||||||
|
|
||||||
|
# 關鍵:告訴 cargo 在 macOS 上編譯 whisper-rs (當啟用 metal 時) 需要連結哪些框架
|
||||||
|
# 注意:whisper-rs 的 build script 通常會自動處理,但有時需要手動干預
|
||||||
|
# 更可靠的方法是在 .cargo/config.toml 中設置
|
||||||
73
README.md
Normal file
73
README.md
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# rust-scribe
|
||||||
|
|
||||||
|
`rust-scribe` 是一個高效能的影片/音頻逐字稿轉寫工具,使用 Rust 語言編寫,並結合了 [Whisper.cpp](https://github.com/ggerganov/whisper.cpp) 的強大轉寫能力與 [FFmpeg](https://ffmpeg.org/) 的音頻處理功能,可自動生成帶有時間碼的逐字稿。
|
||||||
|
|
||||||
|
## 功能特性
|
||||||
|
|
||||||
|
- **高效音頻提取**:利用 FFmpeg 直接從影片或音頻檔案中提取並重採樣為 Whisper 所需的格式 (16kHz, Mono, f32)。
|
||||||
|
- **精準轉寫**:基於 OpenAI Whisper 模型,提供高精度的語音轉文字功能。
|
||||||
|
- **時間碼支援**:自動產生精確到毫秒的逐字稿時間戳。
|
||||||
|
- **語言自動檢測**:若未指定語言,可自動偵測輸入音頻的語言。
|
||||||
|
- **優化進度顯示**:在轉寫過程中提供即時的處理進度、耗時與預估剩餘時間。
|
||||||
|
|
||||||
|
## 前置需求
|
||||||
|
|
||||||
|
在編譯與執行前,請確保系統已安裝以下環境:
|
||||||
|
|
||||||
|
1. **Rust 環境**:請安裝 [Rustup](https://rustup.rs/)。
|
||||||
|
2. **FFmpeg**:系統必須安裝 FFmpeg 開發庫。
|
||||||
|
- macOS: `brew install ffmpeg`
|
||||||
|
- Ubuntu/Debian: `sudo apt install libavcodec-dev libavformat-dev libavutil-dev libswresample-dev`
|
||||||
|
3. **Whisper 模型**:請準備 Whisper 的 `.bin` 模型檔案 (例如 `ggml-base.bin`),並放置於 `models/` 目錄中。
|
||||||
|
|
||||||
|
## 編譯與安裝
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 複製專案
|
||||||
|
git clone <repository-url>
|
||||||
|
cd rust-scribe
|
||||||
|
|
||||||
|
# 編譯 Release 版本 (建議)
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
## 使用方式
|
||||||
|
|
||||||
|
執行程式時,需指定輸入檔案路徑及模型路徑:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./target/release/rust-scribe <input_file> --model models/<your-model.bin>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 參數說明
|
||||||
|
|
||||||
|
- `<input_file>`: 欲轉寫的影片或音頻檔案路徑 (位置參數)。
|
||||||
|
- `-m, --model <MODEL_PATH>`: Whisper 模型檔案路徑 (必填)。
|
||||||
|
- `-l, --language <LANG>`: 指定轉寫語言 (例如 `zh`, `en`)。若不指定,系統將自動偵測 (選填)。
|
||||||
|
- `-v, --verbose`: 開啟詳細轉寫進度輸出 (選填)。
|
||||||
|
|
||||||
|
**使用範例**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 指定使用 base 模型,自動檢測語言
|
||||||
|
cargo run --release -- video.mp4 --model models/ggml-base.bin
|
||||||
|
|
||||||
|
# 指定使用中文進行轉寫
|
||||||
|
cargo run --release -- lecture.mkv --model models/ggml-base.bin --language zh
|
||||||
|
```
|
||||||
|
|
||||||
|
## 專案結構
|
||||||
|
|
||||||
|
```
|
||||||
|
rust-scribe/
|
||||||
|
├── src/
|
||||||
|
│ └── main.rs # 核心邏輯 (音頻處理、Whisper 轉寫、CLI 介面)
|
||||||
|
├── models/ # 存放 Whisper 模型檔案
|
||||||
|
├── Cargo.toml # 專案依賴與配置
|
||||||
|
└── .cargo/
|
||||||
|
└── config.toml # Cargo 編譯配置
|
||||||
|
```
|
||||||
|
|
||||||
|
## 開發者規範
|
||||||
|
|
||||||
|
請參閱 `AGENTS.md` 了解詳細的代碼風格、編譯與測試指南。
|
||||||
2
models/.gitignore
vendored
Normal file
2
models/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*.bin
|
||||||
|
*.ggml
|
||||||
432
src/main.rs
Normal file
432
src/main.rs
Normal file
@@ -0,0 +1,432 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use clap::Parser;
|
||||||
|
use ffmpeg_next as ffmpeg;
|
||||||
|
use ffmpeg::format::input;
|
||||||
|
use ffmpeg::media::Type;
|
||||||
|
use ffmpeg::codec::context::Context as CodecContext;
|
||||||
|
use ffmpeg::frame::Audio as AudioFrame;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
use std::io::{self, Write};
|
||||||
|
use std::ffi::c_void;
|
||||||
|
|
||||||
|
// 導入 Whisper 相關類型
|
||||||
|
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||||
|
use whisper_rs_sys::{whisper_context, whisper_state};
|
||||||
|
|
||||||
|
const WHISPER_SAMPLE_RATE: u32 = 16000;
|
||||||
|
|
||||||
|
// --- 全局狀態 ---
|
||||||
|
static START_TIME_NANOS: AtomicU64 = AtomicU64::new(0);
|
||||||
|
static IS_FIRST_PROGRESS: AtomicBool = AtomicBool::new(true);
|
||||||
|
static TOTAL_DURATION_SEC_X100: AtomicU64 = AtomicU64::new(0);
|
||||||
|
|
||||||
|
// 用於優化預估的全局原子變量
|
||||||
|
// 存儲上一次的預估剩餘秒數 (x100),用於平滑處理
|
||||||
|
static LAST_REMAINING_SEC_X100: AtomicU64 = AtomicU64::new(0);
|
||||||
|
// 存儲上次更新時的進度,防止同一進度重複計算
|
||||||
|
static LAST_PROGRESS: AtomicU64 = AtomicU64::new(0);
|
||||||
|
// ----------------
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(index = 1)]
|
||||||
|
input_file: String,
|
||||||
|
#[arg(short, long)]
|
||||||
|
model: String,
|
||||||
|
#[arg(short, long, default_value = None)]
|
||||||
|
language: Option<String>,
|
||||||
|
#[arg(short, long, default_value_t = false)]
|
||||||
|
verbose: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// C 語言風格的回調函數
|
||||||
|
unsafe extern "C" fn progress_callback(
|
||||||
|
_ctx: *mut whisper_context,
|
||||||
|
_state: *mut whisper_state,
|
||||||
|
progress: i32,
|
||||||
|
_user_data: *mut c_void,
|
||||||
|
) {
|
||||||
|
let now_nanos = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos() as u64;
|
||||||
|
|
||||||
|
// 初始化開始時間
|
||||||
|
let start_nanos = START_TIME_NANOS.load(Ordering::Relaxed);
|
||||||
|
if start_nanos == 0 {
|
||||||
|
if let Err(existing) = START_TIME_NANOS.compare_exchange(
|
||||||
|
0, now_nanos, Ordering::Relaxed, Ordering::Relaxed
|
||||||
|
) {
|
||||||
|
if existing == 0 { START_TIME_NANOS.store(now_nanos, Ordering::Relaxed); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let actual_start_nanos = START_TIME_NANOS.load(Ordering::Relaxed);
|
||||||
|
if actual_start_nanos == 0 { return; }
|
||||||
|
|
||||||
|
let elapsed_nanos = now_nanos.saturating_sub(actual_start_nanos);
|
||||||
|
let elapsed_sec = elapsed_nanos as f64 / 1_000_000_000.0;
|
||||||
|
|
||||||
|
let total_sec_x100 = TOTAL_DURATION_SEC_X100.load(Ordering::Relaxed);
|
||||||
|
let total_sec = if total_sec_x100 > 0 {
|
||||||
|
total_sec_x100 as f64 / 100.0
|
||||||
|
} else {
|
||||||
|
1.0
|
||||||
|
};
|
||||||
|
|
||||||
|
let current_percent = (progress as f64).min(100.0) / 100.0;
|
||||||
|
|
||||||
|
// 【優化點 1】忽略過早的進度 (< 5%),此時數據極不穩定
|
||||||
|
if current_percent < 0.05 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 計算原始預估
|
||||||
|
let raw_remaining_sec = if current_percent >= 0.99 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
let estimated_total = elapsed_sec / current_percent;
|
||||||
|
estimated_total - elapsed_sec
|
||||||
|
};
|
||||||
|
|
||||||
|
// 【優化點 2】讀取上一次的預估值進行平滑
|
||||||
|
let last_rem_x100 = LAST_REMAINING_SEC_X100.load(Ordering::Relaxed);
|
||||||
|
let last_rem_sec = last_rem_x100 as f64 / 100.0;
|
||||||
|
|
||||||
|
// 如果這是第一次有效計算,或者進度變化很小,直接使用原始值
|
||||||
|
let last_prog = LAST_PROGRESS.load(Ordering::Relaxed);
|
||||||
|
|
||||||
|
let mut final_remaining_sec = raw_remaining_sec;
|
||||||
|
|
||||||
|
if last_prog > 0 && (progress as u64) > last_prog {
|
||||||
|
// 【優化點 3】滑動平均:新預估 = 舊預估 * 0.7 + 新計算 * 0.3
|
||||||
|
// 這樣可以防止數字劇烈跳變
|
||||||
|
final_remaining_sec = last_rem_sec * 0.6 + raw_remaining_sec * 0.4;
|
||||||
|
|
||||||
|
// 【優化點 4】限制最大增長幅度:如果新預估比舊預估大很多,說明前面卡住了,
|
||||||
|
// 不要讓剩餘時間無限增加,設置一個上限(例如最多增加 30 秒)
|
||||||
|
if final_remaining_sec > last_rem_sec + 30.0 {
|
||||||
|
final_remaining_sec = last_rem_sec + 30.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 確保不為負數
|
||||||
|
if final_remaining_sec < 0.0 {
|
||||||
|
final_remaining_sec = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 更新全局狀態
|
||||||
|
LAST_REMAINING_SEC_X100.store((final_remaining_sec * 100.0) as u64, Ordering::Relaxed);
|
||||||
|
LAST_PROGRESS.store(progress as u64, Ordering::Relaxed);
|
||||||
|
|
||||||
|
let rem_min = final_remaining_sec as u32 / 60;
|
||||||
|
let rem_s = final_remaining_sec as u32 % 60;
|
||||||
|
|
||||||
|
let elapsed_min = elapsed_sec as u32 / 60;
|
||||||
|
let elapsed_s = elapsed_sec as u32 % 60;
|
||||||
|
|
||||||
|
// 計算已處理時長 (用於分子)
|
||||||
|
let processed_sec = total_sec * current_percent;
|
||||||
|
let proc_min = processed_sec as u32 / 60;
|
||||||
|
let proc_s = processed_sec as u32 % 60;
|
||||||
|
let tot_min = total_sec as u32 / 60;
|
||||||
|
let tot_s = total_sec as u32 % 60;
|
||||||
|
|
||||||
|
let percent_display = current_percent * 100.0;
|
||||||
|
|
||||||
|
let stderr = io::stderr();
|
||||||
|
let mut handle = stderr.lock();
|
||||||
|
|
||||||
|
if IS_FIRST_PROGRESS.load(Ordering::Relaxed) {
|
||||||
|
let _ = writeln!(handle);
|
||||||
|
IS_FIRST_PROGRESS.store(false, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 顯示邏輯:如果進度太低,顯示"計算中"
|
||||||
|
let remaining_str = if current_percent < 0.10 {
|
||||||
|
"計算中...".to_string()
|
||||||
|
} else {
|
||||||
|
format!("{:02}:{:02}", rem_min, rem_s)
|
||||||
|
};
|
||||||
|
|
||||||
|
let _ = write!(
|
||||||
|
handle,
|
||||||
|
"\r🔄 識別進度: [{:02}:{:02}/{:02}:{:02}] {:.1}% | 耗時: {:02}:{:02} | 預計剩餘: {} ",
|
||||||
|
proc_min, proc_s, tot_min, tot_s, percent_display,
|
||||||
|
elapsed_min, elapsed_s,
|
||||||
|
remaining_str
|
||||||
|
);
|
||||||
|
|
||||||
|
if percent_display >= 99.9 {
|
||||||
|
let _ = writeln!(handle);
|
||||||
|
} else {
|
||||||
|
let _ = handle.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
if !Path::new(&args.input_file).exists() {
|
||||||
|
anyhow::bail!("錯誤:找不到輸入文件 '{}'", args.input_file);
|
||||||
|
}
|
||||||
|
if !Path::new(&args.model).exists() {
|
||||||
|
anyhow::bail!("錯誤:找不到模型文件 '{}'", args.model);
|
||||||
|
}
|
||||||
|
|
||||||
|
ffmpeg::init().context("Failed to initialize FFmpeg")?;
|
||||||
|
|
||||||
|
println!("🎬 正在處理文件:{}", args.input_file);
|
||||||
|
println!("🧠 載入模型:{}", args.model);
|
||||||
|
|
||||||
|
let audio_data = extract_audio_to_f32(&args.input_file)
|
||||||
|
.context("Failed to extract and process audio")?;
|
||||||
|
|
||||||
|
if audio_data.is_empty() {
|
||||||
|
anyhow::bail!("錯誤:未能從文件中提取有效音頻數據");
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_samples = audio_data.len() as f64;
|
||||||
|
let total_duration_sec = total_samples / WHISPER_SAMPLE_RATE as f64;
|
||||||
|
|
||||||
|
println!("✅ 音頻準備完成 (樣本數:{}, 時長:{:.2} 分鐘)", total_samples as usize, total_duration_sec / 60.0);
|
||||||
|
|
||||||
|
// 重置全局狀態
|
||||||
|
TOTAL_DURATION_SEC_X100.store((total_duration_sec * 100.0) as u64, Ordering::Relaxed);
|
||||||
|
START_TIME_NANOS.store(0, Ordering::Relaxed);
|
||||||
|
IS_FIRST_PROGRESS.store(true, Ordering::Relaxed);
|
||||||
|
LAST_REMAINING_SEC_X100.store(0, Ordering::Relaxed);
|
||||||
|
LAST_PROGRESS.store(0, Ordering::Relaxed);
|
||||||
|
|
||||||
|
println!("⏳ 正在初始化 Whisper 模型...");
|
||||||
|
let ctx = WhisperContext::new_with_params(
|
||||||
|
&args.model,
|
||||||
|
WhisperContextParameters::default()
|
||||||
|
).context("Failed to load Whisper model")?;
|
||||||
|
|
||||||
|
let mut state = ctx.create_state()
|
||||||
|
.context("Failed to create Whisper state")?;
|
||||||
|
|
||||||
|
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||||
|
params.set_print_special(false);
|
||||||
|
params.set_print_progress(args.verbose);
|
||||||
|
params.set_print_realtime(false);
|
||||||
|
params.set_print_timestamps(false);
|
||||||
|
params.set_single_segment(false);
|
||||||
|
|
||||||
|
if let Some(lang) = &args.language {
|
||||||
|
params.set_language(Some(lang.as_str()));
|
||||||
|
} else {
|
||||||
|
params.set_language(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
params.set_progress_callback(Some(progress_callback));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("🎙️ 正在進行語音識別 (這可能需要幾分鐘)...");
|
||||||
|
state.full(params, &audio_data)
|
||||||
|
.context("Whisper inference failed")?;
|
||||||
|
|
||||||
|
eprintln!("\n✅ 識別完成!");
|
||||||
|
|
||||||
|
println!("\n=== 逐字稿 (帶時間碼) ===\n");
|
||||||
|
|
||||||
|
let num_segments = state.full_n_segments()
|
||||||
|
.context("Failed to get segment count")?;
|
||||||
|
|
||||||
|
if num_segments == 0 {
|
||||||
|
println!("未檢測到任何語音內容。");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..num_segments {
|
||||||
|
let text = match state.full_get_segment_text(i) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("⚠️ 警告:串流 #{} 包含無效字符 (UTF-8 Error),已跳過。詳情:{}", i, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let start_ts = match state.full_get_segment_t0(i) {
|
||||||
|
Ok(t) => t as f64 / 100.0,
|
||||||
|
Err(_) => {
|
||||||
|
eprintln!("⚠️ 警告:無法獲取串流 #{} 的時間戳", i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let time_str = format_time(start_ts);
|
||||||
|
let clean_text = text.trim();
|
||||||
|
|
||||||
|
if !clean_text.is_empty() {
|
||||||
|
println!("[{}] {}", time_str, clean_text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.language.is_none() {
|
||||||
|
if let Ok(lang_id) = state.full_lang_id_from_state() {
|
||||||
|
let lang_name = get_language_name(lang_id);
|
||||||
|
println!("\n🌍 自動檢測語言:{} (ID: {})", lang_name, lang_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_language_name(lang_id: i32) -> &'static str {
|
||||||
|
match lang_id {
|
||||||
|
0 => "English", 1 => "Chinese", 2 => "German", 3 => "Spanish",
|
||||||
|
4 => "Russian", 5 => "Korean", 6 => "French", 7 => "Japanese",
|
||||||
|
8 => "Portuguese", 9 => "Turkish", 10 => "Polish", 11 => "Catalan",
|
||||||
|
12 => "Dutch", 13 => "Arabic", 14 => "Swedish", 15 => "Italian",
|
||||||
|
16 => "Indonesian", 17 => "Hindi", 18 => "Finnish", 19 => "Vietnamese",
|
||||||
|
20 => "Hebrew", 21 => "Ukrainian", 22 => "Greek", 23 => "Malay",
|
||||||
|
24 => "Czech", 25 => "Romanian", 26 => "Danish", 27 => "Hungarian",
|
||||||
|
28 => "Tamil", 29 => "Norwegian", 30 => "Thai", 31 => "Urdu",
|
||||||
|
32 => "Croatian", 33 => "Bulgarian", 34 => "Lithuanian", 35 => "Latin",
|
||||||
|
36 => "Maori", 37 => "Malayalam", 38 => "Welsh", 39 => "Slovak",
|
||||||
|
40 => "Telugu", 41 => "Persian", 42 => "Latvian", 43 => "Bengali",
|
||||||
|
44 => "Serbian", 45 => "Azerbaijani", 46 => "Slovenian", 47 => "Kannada",
|
||||||
|
48 => "Estonian", 49 => "Macedonian", 50 => "Breton", 51 => "Basque",
|
||||||
|
52 => "Icelandic", 53 => "Armenian", 54 => "Nepali", 55 => "Mongolian",
|
||||||
|
56 => "Bosnian", 57 => "Kazakh", 58 => "Albanian", 59 => "Swahili",
|
||||||
|
60 => "Galician", 61 => "Marathi", 62 => "Punjabi", 63 => "Sinhala",
|
||||||
|
64 => "Khmer", 65 => "Shona", 66 => "Yoruba", 67 => "Somali",
|
||||||
|
68 => "Afrikaans", 69 => "Occitan", 70 => "Georgian", 71 => "Belarusian",
|
||||||
|
72 => "Tajik", 73 => "Sindhi", 74 => "Gujarati", 75 => "Amharic",
|
||||||
|
76 => "Yiddish", 77 => "Lao", 78 => "Uzbek", 79 => "Faroese",
|
||||||
|
80 => "Haitian Creole", 81 => "Pashto", 82 => "Turkmen", 83 => "Nynorsk",
|
||||||
|
84 => "Maltese", 85 => "Sanskrit", 86 => "Luxembourgish", 87 => "Myanmar",
|
||||||
|
88 => "Tibetan", 89 => "Tagalog", 90 => "Malagasy", 91 => "Assamese",
|
||||||
|
92 => "Tatar", 93 => "Hawaiian", 94 => "Lingala", 95 => "Hausa",
|
||||||
|
96 => "Bashkir", 97 => "Javanese", 98 => "Sundanese", 99 => "Cantonese",
|
||||||
|
_ => "Unknown",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_time(seconds: f64) -> String {
|
||||||
|
let total_secs = seconds as u64;
|
||||||
|
let millis = ((seconds - total_secs as f64) * 1000.0).round() as u32;
|
||||||
|
let (millis, total_secs) = if millis >= 1000 {
|
||||||
|
(millis - 1000, total_secs + 1)
|
||||||
|
} else {
|
||||||
|
(millis, total_secs)
|
||||||
|
};
|
||||||
|
let h = total_secs / 3600;
|
||||||
|
let m = (total_secs % 3600) / 60;
|
||||||
|
let s = total_secs % 60;
|
||||||
|
format!("{:02}:{:02}:{:02}.{:03}", h, m, s, millis)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_audio_to_f32(input_path: &str) -> Result<Vec<f32>> {
|
||||||
|
let mut ictx = input(&input_path)?;
|
||||||
|
let stream_index = ictx.streams().best(Type::Audio)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("未找到音頻串流"))?
|
||||||
|
.index();
|
||||||
|
let stream = ictx.stream(stream_index).expect("Stream should exist");
|
||||||
|
let codec_params = stream.parameters();
|
||||||
|
let codec_id = codec_params.id();
|
||||||
|
let codec_decoder = ffmpeg::codec::decoder::find(codec_id)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("未找到對應的解碼器"))?;
|
||||||
|
let mut context = CodecContext::new_with_codec(codec_decoder);
|
||||||
|
context.set_parameters(codec_params)?;
|
||||||
|
let mut decoder = context.decoder().audio()?;
|
||||||
|
let out_format = ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed);
|
||||||
|
let out_channel_layout = ffmpeg::channel_layout::ChannelLayout::MONO;
|
||||||
|
let out_sample_rate = WHISPER_SAMPLE_RATE;
|
||||||
|
let mut resampler: Option<ffmpeg::software::resampling::Context> = None;
|
||||||
|
let mut decoded_samples = Vec::new();
|
||||||
|
let mut error_count = 0;
|
||||||
|
for (stream, packet) in ictx.packets() {
|
||||||
|
if stream.index() != stream_index { continue; }
|
||||||
|
if let Err(e) = decoder.send_packet(&packet) { eprintln!("⚠️ 發送包失敗:{}", e); continue; }
|
||||||
|
let mut decoded_frame = AudioFrame::empty();
|
||||||
|
while decoder.receive_frame(&mut decoded_frame).is_ok() {
|
||||||
|
let in_format = decoded_frame.format();
|
||||||
|
let mut in_channel_layout = decoded_frame.channel_layout();
|
||||||
|
let in_sample_rate = decoded_frame.rate();
|
||||||
|
let channels = decoded_frame.channels();
|
||||||
|
if channels == 0 || in_channel_layout.is_empty() {
|
||||||
|
let safe_layout = if channels == 1 { ffmpeg::channel_layout::ChannelLayout::MONO } else if channels > 1 { ffmpeg::channel_layout::ChannelLayout::STEREO } else { ffmpeg::channel_layout::ChannelLayout::STEREO };
|
||||||
|
in_channel_layout = safe_layout;
|
||||||
|
}
|
||||||
|
let mut resampled_frame = AudioFrame::empty();
|
||||||
|
if resampler.is_none() {
|
||||||
|
eprintln!("ℹ️ 初始化重採樣器...");
|
||||||
|
match ffmpeg::software::resampling::Context::get(in_format, in_channel_layout, in_sample_rate, out_format, out_channel_layout, out_sample_rate) {
|
||||||
|
Ok(new_resampler) => {
|
||||||
|
resampler = Some(new_resampler);
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut resampled_frame) {
|
||||||
|
if resampled_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &resampled_frame); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(init_err) => { eprintln!("❌ 無法初始化重採樣器:{}. 跳過此幀。", init_err); error_count += 1; continue; }
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let run_result = resampler.as_mut().unwrap().run(&decoded_frame, &mut resampled_frame);
|
||||||
|
match run_result {
|
||||||
|
Ok(delay_opt) => {
|
||||||
|
if resampled_frame.samples() > 0 { if let Err(e) = append_frame_samples(&mut decoded_samples, &resampled_frame) { eprintln!("⚠️ 追加樣本失敗:{}", e); } }
|
||||||
|
if let Some(_delay) = delay_opt {
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut flush_frame = AudioFrame::empty();
|
||||||
|
while let Ok(Some(_)) = r.flush(&mut flush_frame) { if flush_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &flush_frame); } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
let err_msg = format!("{}", e);
|
||||||
|
if err_msg.contains("Output changed") || err_msg.contains("Invalid") {
|
||||||
|
eprintln!("\n⚠️ 檢測到音頻參數變化 ('{}'),重置重採樣器...", err_msg);
|
||||||
|
drop(resampler.take()); error_count += 1;
|
||||||
|
match ffmpeg::software::resampling::Context::get(in_format, in_channel_layout, in_sample_rate, out_format, out_channel_layout, out_sample_rate) {
|
||||||
|
Ok(new_resampler) => {
|
||||||
|
resampler = Some(new_resampler);
|
||||||
|
let mut retry_frame = AudioFrame::empty();
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut retry_frame) { if retry_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &retry_frame); } }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(init_err) => { eprintln!("❌ 重置重採樣器失敗:{}. 跳過此幀。", init_err); }
|
||||||
|
}
|
||||||
|
} else { eprintln!("❌ 嚴重錯誤:{}. 停止處理。", e); return Err(e).context("Audio resampling failed unrecoverably"); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if error_count > 0 { eprintln!("⚠️ 總共跳過或重置了 {} 次音頻處理。", error_count); }
|
||||||
|
decoder.send_eof().ok();
|
||||||
|
let mut decoded_frame = AudioFrame::empty();
|
||||||
|
while decoder.receive_frame(&mut decoded_frame).is_ok() {
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut resampled_frame = AudioFrame::empty();
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut resampled_frame) { if resampled_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &resampled_frame); } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut flush_frame = AudioFrame::empty();
|
||||||
|
while let Ok(Some(_)) = r.flush(&mut flush_frame) { if flush_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &flush_frame); } }
|
||||||
|
}
|
||||||
|
Ok(decoded_samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_frame_samples(buffer: &mut Vec<f32>, frame: &AudioFrame) -> Result<()> {
|
||||||
|
if frame.format() != ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed) { return Err(anyhow::anyhow!("Unexpected audio sample format")); }
|
||||||
|
let data = frame.data(0);
|
||||||
|
let len = frame.samples();
|
||||||
|
let byte_len = len * 4;
|
||||||
|
if data.len() < byte_len { return Err(anyhow::anyhow!("Audio frame data size mismatch")); }
|
||||||
|
let slice = &data[0..byte_len];
|
||||||
|
let ptr = slice.as_ptr() as *const f32;
|
||||||
|
let f32_slice = unsafe { std::slice::from_raw_parts(ptr, len) };
|
||||||
|
buffer.extend_from_slice(f32_slice);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
401
src/main.rs.bak
Normal file
401
src/main.rs.bak
Normal file
@@ -0,0 +1,401 @@
|
|||||||
|
use anyhow::{Context, Result};
|
||||||
|
use clap::Parser;
|
||||||
|
use ffmpeg_next as ffmpeg;
|
||||||
|
use ffmpeg::format::input;
|
||||||
|
use ffmpeg::media::Type;
|
||||||
|
use ffmpeg::codec::context::Context as CodecContext;
|
||||||
|
use ffmpeg::frame::Audio as AudioFrame;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
use std::io::{self, Write};
|
||||||
|
use std::ffi::c_void;
|
||||||
|
|
||||||
|
// 導入 Whisper 相關類型
|
||||||
|
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||||
|
use whisper_rs_sys::{whisper_context, whisper_state};
|
||||||
|
|
||||||
|
const WHISPER_SAMPLE_RATE: u32 = 16000;
|
||||||
|
|
||||||
|
// --- 全局狀態 (使用原子變量確保線程安全) ---
|
||||||
|
static START_TIME_NANOS: AtomicU64 = AtomicU64::new(0);
|
||||||
|
static IS_FIRST_PROGRESS: AtomicBool = AtomicBool::new(true);
|
||||||
|
// 存儲音頻總時長 (秒 * 100, 用於整數運算避免浮點數原子操作)
|
||||||
|
static TOTAL_DURATION_SEC_X100: AtomicU64 = AtomicU64::new(0);
|
||||||
|
// -----------------------------------------
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// 輸入的視頻或音頻文件路徑
|
||||||
|
#[arg(index = 1)]
|
||||||
|
input_file: String,
|
||||||
|
|
||||||
|
/// Whisper 模型文件路徑 (.bin 格式)
|
||||||
|
#[arg(short, long)]
|
||||||
|
model: String,
|
||||||
|
|
||||||
|
/// 目標語言代碼 (例如:zh, en)。留空則自動檢測。
|
||||||
|
#[arg(short, long, default_value = None)]
|
||||||
|
language: Option<String>,
|
||||||
|
|
||||||
|
/// 是否顯示詳細日誌
|
||||||
|
#[arg(short, long, default_value_t = false)]
|
||||||
|
verbose: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// C 語言風格的回調函數
|
||||||
|
unsafe extern "C" fn progress_callback(
|
||||||
|
_ctx: *mut whisper_context,
|
||||||
|
_state: *mut whisper_state,
|
||||||
|
progress: i32,
|
||||||
|
_user_data: *mut c_void,
|
||||||
|
) {
|
||||||
|
let now_nanos = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos() as u64;
|
||||||
|
|
||||||
|
// 初始化開始時間
|
||||||
|
let start_nanos = START_TIME_NANOS.load(Ordering::Relaxed);
|
||||||
|
if start_nanos == 0 {
|
||||||
|
if let Err(existing) = START_TIME_NANOS.compare_exchange(
|
||||||
|
0, now_nanos, Ordering::Relaxed, Ordering::Relaxed
|
||||||
|
) {
|
||||||
|
if existing == 0 { START_TIME_NANOS.store(now_nanos, Ordering::Relaxed); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let actual_start_nanos = START_TIME_NANOS.load(Ordering::Relaxed);
|
||||||
|
if actual_start_nanos == 0 { return; }
|
||||||
|
|
||||||
|
let elapsed_nanos = now_nanos.saturating_sub(actual_start_nanos);
|
||||||
|
let elapsed_sec = elapsed_nanos as f64 / 1_000_000_000.0;
|
||||||
|
|
||||||
|
// 獲取總時長 (還原為 f64)
|
||||||
|
let total_sec_x100 = TOTAL_DURATION_SEC_X100.load(Ordering::Relaxed);
|
||||||
|
let total_sec = if total_sec_x100 > 0 {
|
||||||
|
total_sec_x100 as f64 / 100.0
|
||||||
|
} else {
|
||||||
|
1.0 // 防禦性默認值
|
||||||
|
};
|
||||||
|
|
||||||
|
// 計算當前已處理的時長 (基於百分比估算,因為 whisper 回調只給百分比)
|
||||||
|
// 注意:Whisper 的 progress 是基於編碼器處理的塊數,與時間大致成正比
|
||||||
|
let current_percent = (progress as f64).min(100.0) / 100.0;
|
||||||
|
let processed_sec = total_sec * current_percent;
|
||||||
|
|
||||||
|
if progress > 0 {
|
||||||
|
let percent = current_percent * 100.0;
|
||||||
|
|
||||||
|
// 計算剩餘時間
|
||||||
|
let rem_sec = if percent >= 99.9 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
let est_total = elapsed_sec / current_percent;
|
||||||
|
(est_total - elapsed_sec).max(0.0)
|
||||||
|
};
|
||||||
|
|
||||||
|
let rem_min = rem_sec as u32 / 60;
|
||||||
|
let rem_s = rem_sec as u32 % 60;
|
||||||
|
|
||||||
|
let elapsed_min = elapsed_sec as u32 / 60;
|
||||||
|
let elapsed_s = elapsed_sec as u32 % 60;
|
||||||
|
|
||||||
|
let stderr = io::stderr();
|
||||||
|
let mut handle = stderr.lock();
|
||||||
|
|
||||||
|
if IS_FIRST_PROGRESS.load(Ordering::Relaxed) {
|
||||||
|
let _ = writeln!(handle);
|
||||||
|
IS_FIRST_PROGRESS.store(false, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 顯示格式:[已處理時長 / 總時長] 百分比 | 耗時 | 剩餘
|
||||||
|
// 為了美觀,時長格式化為 MM:SS
|
||||||
|
let proc_min = processed_sec as u32 / 60;
|
||||||
|
let proc_s = processed_sec as u32 % 60;
|
||||||
|
let tot_min = total_sec as u32 / 60;
|
||||||
|
let tot_s = total_sec as u32 % 60;
|
||||||
|
|
||||||
|
let _ = write!(
|
||||||
|
handle,
|
||||||
|
"\r🔄 識別進度: [{:02}:{:02}/{:02}:{:02}] {:.1}% | 耗時: {:02}:{:02} | 預計剩餘: {:02}:{:02} ",
|
||||||
|
proc_min, proc_s, tot_min, tot_s, percent,
|
||||||
|
elapsed_min, elapsed_s,
|
||||||
|
rem_min, rem_s
|
||||||
|
);
|
||||||
|
|
||||||
|
if percent >= 99.9 {
|
||||||
|
let _ = writeln!(handle);
|
||||||
|
} else {
|
||||||
|
let _ = handle.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
if !Path::new(&args.input_file).exists() {
|
||||||
|
anyhow::bail!("錯誤:找不到輸入文件 '{}'", args.input_file);
|
||||||
|
}
|
||||||
|
if !Path::new(&args.model).exists() {
|
||||||
|
anyhow::bail!("錯誤:找不到模型文件 '{}'", args.model);
|
||||||
|
}
|
||||||
|
|
||||||
|
ffmpeg::init().context("Failed to initialize FFmpeg")?;
|
||||||
|
|
||||||
|
println!("🎬 正在處理文件:{}", args.input_file);
|
||||||
|
println!("🧠 載入模型:{}", args.model);
|
||||||
|
|
||||||
|
let audio_data = extract_audio_to_f32(&args.input_file)
|
||||||
|
.context("Failed to extract and process audio")?;
|
||||||
|
|
||||||
|
if audio_data.is_empty() {
|
||||||
|
anyhow::bail!("錯誤:未能從文件中提取有效音頻數據");
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_samples = audio_data.len() as f64;
|
||||||
|
let total_duration_sec = total_samples / WHISPER_SAMPLE_RATE as f64;
|
||||||
|
|
||||||
|
println!("✅ 音頻準備完成 (樣本數:{}, 時長:{:.2} 分鐘)", total_samples as usize, total_duration_sec / 60.0);
|
||||||
|
|
||||||
|
// --- 將總時長存入全局原子變量,供回調使用 ---
|
||||||
|
// 存儲為 整数 (秒 * 100) 以避免浮點數原子操作的複雜性
|
||||||
|
TOTAL_DURATION_SEC_X100.store((total_duration_sec * 100.0) as u64, Ordering::Relaxed);
|
||||||
|
// -------------------------------------------
|
||||||
|
|
||||||
|
println!("⏳ 正在初始化 Whisper 模型...");
|
||||||
|
let ctx = WhisperContext::new_with_params(
|
||||||
|
&args.model,
|
||||||
|
WhisperContextParameters::default()
|
||||||
|
).context("Failed to load Whisper model")?;
|
||||||
|
|
||||||
|
let mut state = ctx.create_state()
|
||||||
|
.context("Failed to create Whisper state")?;
|
||||||
|
|
||||||
|
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||||
|
params.set_print_special(false);
|
||||||
|
params.set_print_progress(args.verbose);
|
||||||
|
params.set_print_realtime(false);
|
||||||
|
params.set_print_timestamps(false);
|
||||||
|
params.set_single_segment(false);
|
||||||
|
|
||||||
|
if let Some(lang) = &args.language {
|
||||||
|
params.set_language(Some(lang.as_str()));
|
||||||
|
} else {
|
||||||
|
params.set_language(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 重置全局狀態
|
||||||
|
START_TIME_NANOS.store(0, Ordering::Relaxed);
|
||||||
|
IS_FIRST_PROGRESS.store(true, Ordering::Relaxed);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
params.set_progress_callback(Some(progress_callback));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("🎙️ 正在進行語音識別 (這可能需要幾分鐘)...");
|
||||||
|
state.full(params, &audio_data)
|
||||||
|
.context("Whisper inference failed")?;
|
||||||
|
|
||||||
|
eprintln!("\n✅ 識別完成!");
|
||||||
|
|
||||||
|
println!("\n=== 逐字稿 (帶時間碼) ===\n");
|
||||||
|
|
||||||
|
let num_segments = state.full_n_segments()
|
||||||
|
.context("Failed to get segment count")?;
|
||||||
|
|
||||||
|
if num_segments == 0 {
|
||||||
|
println!("未檢測到任何語音內容。");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..num_segments {
|
||||||
|
let text = match state.full_get_segment_text(i) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("⚠️ 警告:串流 #{} 包含無效字符 (UTF-8 Error),已跳過。詳情:{}", i, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let start_ts = match state.full_get_segment_t0(i) {
|
||||||
|
Ok(t) => t as f64 / 100.0,
|
||||||
|
Err(_) => {
|
||||||
|
eprintln!("⚠️ 警告:無法獲取串流 #{} 的時間戳", i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let time_str = format_time(start_ts);
|
||||||
|
let clean_text = text.trim();
|
||||||
|
|
||||||
|
if !clean_text.is_empty() {
|
||||||
|
println!("[{}] {}", time_str, clean_text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.language.is_none() {
|
||||||
|
if let Ok(lang_id) = state.full_lang_id_from_state() {
|
||||||
|
let lang_name = get_language_name(lang_id);
|
||||||
|
println!("\n🌍 自動檢測語言:{} (ID: {})", lang_name, lang_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... (其餘函數 get_language_name, format_time, extract_audio_to_f32, append_frame_samples 保持不變) ...
|
||||||
|
fn get_language_name(lang_id: i32) -> &'static str {
|
||||||
|
match lang_id {
|
||||||
|
0 => "English", 1 => "Chinese", 2 => "German", 3 => "Spanish",
|
||||||
|
4 => "Russian", 5 => "Korean", 6 => "French", 7 => "Japanese",
|
||||||
|
8 => "Portuguese", 9 => "Turkish", 10 => "Polish", 11 => "Catalan",
|
||||||
|
12 => "Dutch", 13 => "Arabic", 14 => "Swedish", 15 => "Italian",
|
||||||
|
16 => "Indonesian", 17 => "Hindi", 18 => "Finnish", 19 => "Vietnamese",
|
||||||
|
20 => "Hebrew", 21 => "Ukrainian", 22 => "Greek", 23 => "Malay",
|
||||||
|
24 => "Czech", 25 => "Romanian", 26 => "Danish", 27 => "Hungarian",
|
||||||
|
28 => "Tamil", 29 => "Norwegian", 30 => "Thai", 31 => "Urdu",
|
||||||
|
32 => "Croatian", 33 => "Bulgarian", 34 => "Lithuanian", 35 => "Latin",
|
||||||
|
36 => "Maori", 37 => "Malayalam", 38 => "Welsh", 39 => "Slovak",
|
||||||
|
40 => "Telugu", 41 => "Persian", 42 => "Latvian", 43 => "Bengali",
|
||||||
|
44 => "Serbian", 45 => "Azerbaijani", 46 => "Slovenian", 47 => "Kannada",
|
||||||
|
48 => "Estonian", 49 => "Macedonian", 50 => "Breton", 51 => "Basque",
|
||||||
|
52 => "Icelandic", 53 => "Armenian", 54 => "Nepali", 55 => "Mongolian",
|
||||||
|
56 => "Bosnian", 57 => "Kazakh", 58 => "Albanian", 59 => "Swahili",
|
||||||
|
60 => "Galician", 61 => "Marathi", 62 => "Punjabi", 63 => "Sinhala",
|
||||||
|
64 => "Khmer", 65 => "Shona", 66 => "Yoruba", 67 => "Somali",
|
||||||
|
68 => "Afrikaans", 69 => "Occitan", 70 => "Georgian", 71 => "Belarusian",
|
||||||
|
72 => "Tajik", 73 => "Sindhi", 74 => "Gujarati", 75 => "Amharic",
|
||||||
|
76 => "Yiddish", 77 => "Lao", 78 => "Uzbek", 79 => "Faroese",
|
||||||
|
80 => "Haitian Creole", 81 => "Pashto", 82 => "Turkmen", 83 => "Nynorsk",
|
||||||
|
84 => "Maltese", 85 => "Sanskrit", 86 => "Luxembourgish", 87 => "Myanmar",
|
||||||
|
88 => "Tibetan", 89 => "Tagalog", 90 => "Malagasy", 91 => "Assamese",
|
||||||
|
92 => "Tatar", 93 => "Hawaiian", 94 => "Lingala", 95 => "Hausa",
|
||||||
|
96 => "Bashkir", 97 => "Javanese", 98 => "Sundanese", 99 => "Cantonese",
|
||||||
|
_ => "Unknown",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_time(seconds: f64) -> String {
|
||||||
|
let total_secs = seconds as u64;
|
||||||
|
let millis = ((seconds - total_secs as f64) * 1000.0).round() as u32;
|
||||||
|
let (millis, total_secs) = if millis >= 1000 {
|
||||||
|
(millis - 1000, total_secs + 1)
|
||||||
|
} else {
|
||||||
|
(millis, total_secs)
|
||||||
|
};
|
||||||
|
let h = total_secs / 3600;
|
||||||
|
let m = (total_secs % 3600) / 60;
|
||||||
|
let s = total_secs % 60;
|
||||||
|
format!("{:02}:{:02}:{:02}.{:03}", h, m, s, millis)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_audio_to_f32(input_path: &str) -> Result<Vec<f32>> {
|
||||||
|
let mut ictx = input(&input_path)?;
|
||||||
|
let stream_index = ictx.streams().best(Type::Audio)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("未找到音頻串流"))?
|
||||||
|
.index();
|
||||||
|
let stream = ictx.stream(stream_index).expect("Stream should exist");
|
||||||
|
let codec_params = stream.parameters();
|
||||||
|
let codec_id = codec_params.id();
|
||||||
|
let codec_decoder = ffmpeg::codec::decoder::find(codec_id)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("未找到對應的解碼器"))?;
|
||||||
|
let mut context = CodecContext::new_with_codec(codec_decoder);
|
||||||
|
context.set_parameters(codec_params)?;
|
||||||
|
let mut decoder = context.decoder().audio()?;
|
||||||
|
let out_format = ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed);
|
||||||
|
let out_channel_layout = ffmpeg::channel_layout::ChannelLayout::MONO;
|
||||||
|
let out_sample_rate = WHISPER_SAMPLE_RATE;
|
||||||
|
let mut resampler: Option<ffmpeg::software::resampling::Context> = None;
|
||||||
|
let mut decoded_samples = Vec::new();
|
||||||
|
let mut error_count = 0;
|
||||||
|
for (stream, packet) in ictx.packets() {
|
||||||
|
if stream.index() != stream_index { continue; }
|
||||||
|
if let Err(e) = decoder.send_packet(&packet) { eprintln!("⚠️ 發送包失敗:{}", e); continue; }
|
||||||
|
let mut decoded_frame = AudioFrame::empty();
|
||||||
|
while decoder.receive_frame(&mut decoded_frame).is_ok() {
|
||||||
|
let in_format = decoded_frame.format();
|
||||||
|
let mut in_channel_layout = decoded_frame.channel_layout();
|
||||||
|
let in_sample_rate = decoded_frame.rate();
|
||||||
|
let channels = decoded_frame.channels();
|
||||||
|
if channels == 0 || in_channel_layout.is_empty() {
|
||||||
|
let safe_layout = if channels == 1 { ffmpeg::channel_layout::ChannelLayout::MONO } else if channels > 1 { ffmpeg::channel_layout::ChannelLayout::STEREO } else { ffmpeg::channel_layout::ChannelLayout::STEREO };
|
||||||
|
in_channel_layout = safe_layout;
|
||||||
|
}
|
||||||
|
let mut resampled_frame = AudioFrame::empty();
|
||||||
|
if resampler.is_none() {
|
||||||
|
eprintln!("ℹ️ 初始化重採樣器...");
|
||||||
|
match ffmpeg::software::resampling::Context::get(in_format, in_channel_layout, in_sample_rate, out_format, out_channel_layout, out_sample_rate) {
|
||||||
|
Ok(new_resampler) => {
|
||||||
|
resampler = Some(new_resampler);
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut resampled_frame) {
|
||||||
|
if resampled_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &resampled_frame); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(init_err) => { eprintln!("❌ 無法初始化重採樣器:{}. 跳過此幀。", init_err); error_count += 1; continue; }
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let run_result = resampler.as_mut().unwrap().run(&decoded_frame, &mut resampled_frame);
|
||||||
|
match run_result {
|
||||||
|
Ok(delay_opt) => {
|
||||||
|
if resampled_frame.samples() > 0 { if let Err(e) = append_frame_samples(&mut decoded_samples, &resampled_frame) { eprintln!("⚠️ 追加樣本失敗:{}", e); } }
|
||||||
|
if let Some(_delay) = delay_opt {
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut flush_frame = AudioFrame::empty();
|
||||||
|
while let Ok(Some(_)) = r.flush(&mut flush_frame) { if flush_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &flush_frame); } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
let err_msg = format!("{}", e);
|
||||||
|
if err_msg.contains("Output changed") || err_msg.contains("Invalid") {
|
||||||
|
eprintln!("\n⚠️ 檢測到音頻參數變化 ('{}'),重置重採樣器...", err_msg);
|
||||||
|
drop(resampler.take()); error_count += 1;
|
||||||
|
match ffmpeg::software::resampling::Context::get(in_format, in_channel_layout, in_sample_rate, out_format, out_channel_layout, out_sample_rate) {
|
||||||
|
Ok(new_resampler) => {
|
||||||
|
resampler = Some(new_resampler);
|
||||||
|
let mut retry_frame = AudioFrame::empty();
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut retry_frame) { if retry_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &retry_frame); } }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(init_err) => { eprintln!("❌ 重置重採樣器失敗:{}. 跳過此幀。", init_err); }
|
||||||
|
}
|
||||||
|
} else { eprintln!("❌ 嚴重錯誤:{}. 停止處理。", e); return Err(e).context("Audio resampling failed unrecoverably"); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if error_count > 0 { eprintln!("⚠️ 總共跳過或重置了 {} 次音頻處理。", error_count); }
|
||||||
|
decoder.send_eof().ok();
|
||||||
|
let mut decoded_frame = AudioFrame::empty();
|
||||||
|
while decoder.receive_frame(&mut decoded_frame).is_ok() {
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut resampled_frame = AudioFrame::empty();
|
||||||
|
if let Ok(_) = r.run(&decoded_frame, &mut resampled_frame) { if resampled_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &resampled_frame); } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(r) = resampler.as_mut() {
|
||||||
|
let mut flush_frame = AudioFrame::empty();
|
||||||
|
while let Ok(Some(_)) = r.flush(&mut flush_frame) { if flush_frame.samples() > 0 { let _ = append_frame_samples(&mut decoded_samples, &flush_frame); } }
|
||||||
|
}
|
||||||
|
Ok(decoded_samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_frame_samples(buffer: &mut Vec<f32>, frame: &AudioFrame) -> Result<()> {
|
||||||
|
if frame.format() != ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed) { return Err(anyhow::anyhow!("Unexpected audio sample format")); }
|
||||||
|
let data = frame.data(0);
|
||||||
|
let len = frame.samples();
|
||||||
|
let byte_len = len * 4;
|
||||||
|
if data.len() < byte_len { return Err(anyhow::anyhow!("Audio frame data size mismatch")); }
|
||||||
|
let slice = &data[0..byte_len];
|
||||||
|
let ptr = slice.as_ptr() as *const f32;
|
||||||
|
let f32_slice = unsafe { std::slice::from_raw_parts(ptr, len) };
|
||||||
|
buffer.extend_from_slice(f32_slice);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user