feat(voice): add ai.json voice presets, pitch shift, and config path fix

- Voice settings per model in src/voice/ai.json (v2/v3 presets) - Pitch shift via rodio speed() for higher-pitched voice (default 1.35) - Load .env from $cfg/ai.syui.log/.env (fixed path) - Remove Google STT dependency, ElevenLabs-only Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 08:47:16 +09:00
parent aaebea081d
commit 2b5fbef6cd
3 changed files with 54 additions and 21 deletions
--- a/src/voice/ai.json
+++ b/src/voice/ai.json
@@ -0,0 +1,16 @@
+{
+  "eleven_multilingual_v2": {
+    "stability": 0.5,
+    "similarity_boost": 0.8,
+    "style": 0.2,
+    "speed": 0.85,
+    "pitch": 1.35
+  },
+  "eleven_v3": {
+    "stability": 0.5,
+    "similarity_boost": 0.8,
+    "style": 0.2,
+    "speed": 0.75,
+    "pitch": 1.35
+  }
+}
--- a/src/voice/mod.rs
+++ b/src/voice/mod.rs
@@ -1,10 +1,9 @@
 pub mod tts;
 pub mod stt;

-/// Load .env file from cwd, setting vars that aren't already set.
+/// Load .env file from $cfg/ai.syui.log/.env
 fn load_dotenv() {
-    for dir in &[".", env!("CARGO_MANIFEST_DIR")] {
-        let path = std::path::Path::new(dir).join(".env");
+    let path = format!("{}/ai.syui.log/.env", crate::config::config_dir());
    if let Ok(content) = std::fs::read_to_string(&path) {
        for line in content.lines() {
            let line = line.trim();
@@ -17,8 +16,6 @@ fn load_dotenv() {
                }
            }
        }
-            break;
-        }
    }
 }

@@ -68,7 +65,7 @@ impl VoiceSystem {
            Ok(data) => data,
            Err(e) => { eprintln!("tts error: {e}"); return; }
        };
-        if let Err(e) = tts::play_audio(&audio) {
+        if let Err(e) = tts::play_audio(&audio, &self.config.tts_model) {
            eprintln!("audio play error: {e}");
        }
    }
--- a/src/voice/tts.rs
+++ b/src/voice/tts.rs
@@ -1,8 +1,19 @@
-use std::io::{Cursor, Read};
+use std::io::Cursor;
+use rodio::Source;
 use crate::voice::VoiceConfig;

+const VOICE_SETTINGS: &str = include_str!("ai.json");
+
+/// Load voice settings for the current model from ai.json.
+fn load_settings(model: &str) -> serde_json::Value {
+    let all: serde_json::Value = serde_json::from_str(VOICE_SETTINGS).unwrap_or_default();
+    all.get(model).cloned().unwrap_or_else(|| all["eleven_v3"].clone())
+}
+
 /// Synthesize text to audio bytes via ElevenLabs API.
 pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
+    let settings = load_settings(&config.tts_model);
+
    let url = format!(
        "https://api.elevenlabs.io/v1/text-to-speech/{}",
        config.tts_voice_id
@@ -12,8 +23,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
        "text": text,
        "model_id": config.tts_model,
        "voice_settings": {
-            "stability": 0.5,
-            "similarity_boost": 0.75
+            "stability": settings["stability"],
+            "similarity_boost": settings["similarity_boost"],
+            "style": settings["style"],
+            "speed": settings["speed"]
        }
    });

@@ -37,8 +50,10 @@ pub fn synthesize(config: &VoiceConfig, text: &str) -> Result<Vec<u8>, String> {
        .map_err(|e| format!("TTS read error: {e}"))
 }

-/// Play audio bytes (MP3) using rodio.
-pub fn play_audio(data: &[u8]) -> Result<(), String> {
+/// Play audio bytes (MP3) using rodio with pitch shift from ai.json.
+pub fn play_audio(data: &[u8], model: &str) -> Result<(), String> {
+    let settings = load_settings(model);
+
    let (_stream, handle) = rodio::OutputStream::try_default()
        .map_err(|e| format!("audio output error: {e}"))?;
    let sink = rodio::Sink::try_new(&handle)
@@ -48,7 +63,12 @@ pub fn play_audio(data: &[u8]) -> Result<(), String> {
    let source = rodio::Decoder::new(cursor)
        .map_err(|e| format!("audio decode error: {e}"))?;

-    sink.append(source);
+    let pitch = std::env::var("TTS_PITCH")
+        .ok()
+        .and_then(|v| v.parse::<f32>().ok())
+        .unwrap_or_else(|| settings["pitch"].as_f64().unwrap_or(1.35) as f32);
+
+    sink.append(source.speed(pitch));
    sink.sleep_until_end();
    Ok(())
 }