feat(voice): add ai.json voice presets, pitch shift, and config path fix
- Voice settings per model in src/voice/ai.json (v2/v3 presets) - Pitch shift via rodio speed() for higher-pitched voice (default 1.35) - Load .env from $cfg/ai.syui.log/.env (fixed path) - Remove Google STT dependency, ElevenLabs-only Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
16
src/voice/ai.json
Normal file
16
src/voice/ai.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"eleven_multilingual_v2": {
|
||||
"stability": 0.5,
|
||||
"similarity_boost": 0.8,
|
||||
"style": 0.2,
|
||||
"speed": 0.85,
|
||||
"pitch": 1.35
|
||||
},
|
||||
"eleven_v3": {
|
||||
"stability": 0.5,
|
||||
"similarity_boost": 0.8,
|
||||
"style": 0.2,
|
||||
"speed": 0.75,
|
||||
"pitch": 1.35
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,9 @@
|
||||
pub mod tts;
|
||||
pub mod stt;
|
||||
|
||||
/// Load .env file from cwd, setting vars that aren't already set.
|
||||
/// Load .env file from $cfg/ai.syui.log/.env
|
||||
fn load_dotenv() {
|
||||
for dir in &[".", env!("CARGO_MANIFEST_DIR")] {
|
||||
let path = std::path::Path::new(dir).join(".env");
|
||||
let path = format!("{}/ai.syui.log/.env", crate::config::config_dir());
|
||||
if let Ok(content) = std::fs::read_to_string(&path) {
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
@@ -17,8 +16,6 @@ fn load_dotenv() {
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +65,7 @@ impl VoiceSystem {
|
||||
Ok(data) => data,
|
||||
Err(e) => { eprintln!("tts error: {e}"); return; }
|
||||
};
|
||||
if let Err(e) = tts::play_audio(&audio) {
|
||||
if let Err(e) = tts::play_audio(&audio, &self.config.tts_model) {
|
||||
eprintln!("audio play error: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,19 @@
|
||||
use std::io::{Cursor, Read};
|
||||
use std::io::Cursor;
|
||||
use rodio::Source;
|
||||
use crate::voice::VoiceConfig;
|
||||
|
||||
const VOICE_SETTINGS: &str = include_str!("ai.json");
|
||||
|
||||
/// Load voice settings for the current model from ai.json.
|
||||
fn load_settings(model: &str) -> serde_json::Value {
|
||||
let all: serde_json::Value = serde_json::from_str(VOICE_SETTINGS).unwrap_or_default();
|
||||
all.get(model).cloned().unwrap_or_else(|| all["eleven_v3"].clone())
|
||||
}
|
||||
|
||||
/// Synthesize text to audio bytes via ElevenLabs API.
|
||||
pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
||||
let settings = load_settings(&config.tts_model);
|
||||
|
||||
let url = format!(
|
||||
"https://api.elevenlabs.io/v1/text-to-speech/{}",
|
||||
config.tts_voice_id
|
||||
@@ -12,8 +23,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
||||
"text": text,
|
||||
"model_id": config.tts_model,
|
||||
"voice_settings": {
|
||||
"stability": 0.5,
|
||||
"similarity_boost": 0.75
|
||||
"stability": settings["stability"],
|
||||
"similarity_boost": settings["similarity_boost"],
|
||||
"style": settings["style"],
|
||||
"speed": settings["speed"]
|
||||
}
|
||||
});
|
||||
|
||||
@@ -37,8 +50,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
||||
.map_err(|e| format!("TTS read error: {e}"))
|
||||
}
|
||||
|
||||
/// Play audio bytes (MP3) using rodio.
|
||||
pub fn play_audio(data: &[u8]) -> Result<(), String> {
|
||||
/// Play audio bytes (MP3) using rodio with pitch shift from ai.json.
|
||||
pub fn play_audio(data: &[u8], model: &str) -> Result<(), String> {
|
||||
let settings = load_settings(model);
|
||||
|
||||
let (_stream, handle) = rodio::OutputStream::try_default()
|
||||
.map_err(|e| format!("audio output error: {e}"))?;
|
||||
let sink = rodio::Sink::try_new(&handle)
|
||||
@@ -48,7 +63,12 @@ pub fn play_audio(data: &[u8]) -> Result<(), String> {
|
||||
let source = rodio::Decoder::new(cursor)
|
||||
.map_err(|e| format!("audio decode error: {e}"))?;
|
||||
|
||||
sink.append(source);
|
||||
let pitch = std::env::var("TTS_PITCH")
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<f32>().ok())
|
||||
.unwrap_or_else(|| settings["pitch"].as_f64().unwrap_or(1.35) as f32);
|
||||
|
||||
sink.append(source.speed(pitch));
|
||||
sink.sleep_until_end();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user