feat(voice): add ai.json voice presets, pitch shift, and config path fix
- Voice settings per model in src/voice/ai.json (v2/v3 presets) - Pitch shift via rodio speed() for higher-pitched voice (default 1.35) - Load .env from $cfg/ai.syui.log/.env (fixed path) - Remove Google STT dependency, ElevenLabs-only Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
16
src/voice/ai.json
Normal file
16
src/voice/ai.json
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"eleven_multilingual_v2": {
|
||||||
|
"stability": 0.5,
|
||||||
|
"similarity_boost": 0.8,
|
||||||
|
"style": 0.2,
|
||||||
|
"speed": 0.85,
|
||||||
|
"pitch": 1.35
|
||||||
|
},
|
||||||
|
"eleven_v3": {
|
||||||
|
"stability": 0.5,
|
||||||
|
"similarity_boost": 0.8,
|
||||||
|
"style": 0.2,
|
||||||
|
"speed": 0.75,
|
||||||
|
"pitch": 1.35
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,23 +1,20 @@
|
|||||||
pub mod tts;
|
pub mod tts;
|
||||||
pub mod stt;
|
pub mod stt;
|
||||||
|
|
||||||
/// Load .env file from cwd, setting vars that aren't already set.
|
/// Load .env file from $cfg/ai.syui.log/.env
|
||||||
fn load_dotenv() {
|
fn load_dotenv() {
|
||||||
for dir in &[".", env!("CARGO_MANIFEST_DIR")] {
|
let path = format!("{}/ai.syui.log/.env", crate::config::config_dir());
|
||||||
let path = std::path::Path::new(dir).join(".env");
|
if let Ok(content) = std::fs::read_to_string(&path) {
|
||||||
if let Ok(content) = std::fs::read_to_string(&path) {
|
for line in content.lines() {
|
||||||
for line in content.lines() {
|
let line = line.trim();
|
||||||
let line = line.trim();
|
if line.is_empty() || line.starts_with('#') { continue; }
|
||||||
if line.is_empty() || line.starts_with('#') { continue; }
|
if let Some((key, val)) = line.split_once('=') {
|
||||||
if let Some((key, val)) = line.split_once('=') {
|
let key = key.trim();
|
||||||
let key = key.trim();
|
let val = val.trim();
|
||||||
let val = val.trim();
|
if std::env::var(key).is_err() {
|
||||||
if std::env::var(key).is_err() {
|
std::env::set_var(key, val);
|
||||||
std::env::set_var(key, val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -68,7 +65,7 @@ impl VoiceSystem {
|
|||||||
Ok(data) => data,
|
Ok(data) => data,
|
||||||
Err(e) => { eprintln!("tts error: {e}"); return; }
|
Err(e) => { eprintln!("tts error: {e}"); return; }
|
||||||
};
|
};
|
||||||
if let Err(e) = tts::play_audio(&audio) {
|
if let Err(e) = tts::play_audio(&audio, &self.config.tts_model) {
|
||||||
eprintln!("audio play error: {e}");
|
eprintln!("audio play error: {e}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,19 @@
|
|||||||
use std::io::{Cursor, Read};
|
use std::io::Cursor;
|
||||||
|
use rodio::Source;
|
||||||
use crate::voice::VoiceConfig;
|
use crate::voice::VoiceConfig;
|
||||||
|
|
||||||
|
const VOICE_SETTINGS: &str = include_str!("ai.json");
|
||||||
|
|
||||||
|
/// Load voice settings for the current model from ai.json.
|
||||||
|
fn load_settings(model: &str) -> serde_json::Value {
|
||||||
|
let all: serde_json::Value = serde_json::from_str(VOICE_SETTINGS).unwrap_or_default();
|
||||||
|
all.get(model).cloned().unwrap_or_else(|| all["eleven_v3"].clone())
|
||||||
|
}
|
||||||
|
|
||||||
/// Synthesize text to audio bytes via ElevenLabs API.
|
/// Synthesize text to audio bytes via ElevenLabs API.
|
||||||
pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
||||||
|
let settings = load_settings(&config.tts_model);
|
||||||
|
|
||||||
let url = format!(
|
let url = format!(
|
||||||
"https://api.elevenlabs.io/v1/text-to-speech/{}",
|
"https://api.elevenlabs.io/v1/text-to-speech/{}",
|
||||||
config.tts_voice_id
|
config.tts_voice_id
|
||||||
@@ -12,8 +23,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
|||||||
"text": text,
|
"text": text,
|
||||||
"model_id": config.tts_model,
|
"model_id": config.tts_model,
|
||||||
"voice_settings": {
|
"voice_settings": {
|
||||||
"stability": 0.5,
|
"stability": settings["stability"],
|
||||||
"similarity_boost": 0.75
|
"similarity_boost": settings["similarity_boost"],
|
||||||
|
"style": settings["style"],
|
||||||
|
"speed": settings["speed"]
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -37,8 +50,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
|
|||||||
.map_err(|e| format!("TTS read error: {e}"))
|
.map_err(|e| format!("TTS read error: {e}"))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Play audio bytes (MP3) using rodio.
|
/// Play audio bytes (MP3) using rodio with pitch shift from ai.json.
|
||||||
pub fn play_audio(data: &[u8]) -> Result<(), String> {
|
pub fn play_audio(data: &[u8], model: &str) -> Result<(), String> {
|
||||||
|
let settings = load_settings(model);
|
||||||
|
|
||||||
let (_stream, handle) = rodio::OutputStream::try_default()
|
let (_stream, handle) = rodio::OutputStream::try_default()
|
||||||
.map_err(|e| format!("audio output error: {e}"))?;
|
.map_err(|e| format!("audio output error: {e}"))?;
|
||||||
let sink = rodio::Sink::try_new(&handle)
|
let sink = rodio::Sink::try_new(&handle)
|
||||||
@@ -48,7 +63,12 @@ pub fn play_audio(data: &[u8]) -> Result<(), String> {
|
|||||||
let source = rodio::Decoder::new(cursor)
|
let source = rodio::Decoder::new(cursor)
|
||||||
.map_err(|e| format!("audio decode error: {e}"))?;
|
.map_err(|e| format!("audio decode error: {e}"))?;
|
||||||
|
|
||||||
sink.append(source);
|
let pitch = std::env::var("TTS_PITCH")
|
||||||
|
.ok()
|
||||||
|
.and_then(|v| v.parse::<f32>().ok())
|
||||||
|
.unwrap_or_else(|| settings["pitch"].as_f64().unwrap_or(1.35) as f32);
|
||||||
|
|
||||||
|
sink.append(source.speed(pitch));
|
||||||
sink.sleep_until_end();
|
sink.sleep_until_end();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user