From 16d946315616c9d79d873a4630eb71b98f5e5f3e Mon Sep 17 00:00:00 2001 From: syui Date: Sun, 18 Jan 2026 16:43:42 +0900 Subject: [PATCH] add translate --- .env.example | 3 + Cargo.toml | 1 + src/commands/lang.rs | 10 ++ src/commands/mod.rs | 1 + src/lms/mod.rs | 1 + src/lms/translate.rs | 236 +++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 21 +++- 7 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 .env.example create mode 100644 src/commands/lang.rs create mode 100644 src/lms/mod.rs create mode 100644 src/lms/translate.rs diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..647e7fa --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +# LMS Translation API +TRANSLATE_URL=http://127.0.0.1:1234/v1 +TRANSLATE_MODEL=plamo-2-translate diff --git a/Cargo.toml b/Cargo.toml index d4485b1..79929ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,3 +21,4 @@ anyhow = "1.0" dirs = "5.0" chrono = { version = "0.4", features = ["serde"] } rand = "0.8" +dotenvy = "0.15" diff --git a/src/commands/lang.rs b/src/commands/lang.rs new file mode 100644 index 0000000..84d677b --- /dev/null +++ b/src/commands/lang.rs @@ -0,0 +1,10 @@ +use anyhow::Result; +use std::path::Path; + +use crate::lms; + +/// Translate content files from one language to another +pub async fn translate(input: &str, from: &str, to: &str) -> Result<()> { + let path = Path::new(input); + lms::translate::run(path, from, to).await +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 78d475d..f454245 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -2,3 +2,4 @@ pub mod auth; pub mod token; pub mod post; pub mod gen; +pub mod lang; diff --git a/src/lms/mod.rs b/src/lms/mod.rs new file mode 100644 index 0000000..d6edf7c --- /dev/null +++ b/src/lms/mod.rs @@ -0,0 +1 @@ +pub mod translate; diff --git a/src/lms/translate.rs b/src/lms/translate.rs new file mode 100644 index 0000000..4b9928e --- /dev/null +++ b/src/lms/translate.rs @@ -0,0 +1,236 @@ +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use std::env; +use std::fs; +use std::path::Path; + +#[derive(Debug, Serialize)] +struct ChatMessage { + role: String, + content: String, +} + +#[derive(Debug, Serialize)] +struct ChatRequest { + model: String, + messages: Vec, +} + +#[derive(Debug, Deserialize)] +struct ChatChoice { + message: ChatMessageResponse, +} + +#[derive(Debug, Deserialize)] +struct ChatMessageResponse { + content: String, +} + +#[derive(Debug, Deserialize)] +struct ChatResponse { + choices: Vec, +} + +/// Translate a file or folder +pub async fn run(input: &Path, from: &str, to: &str) -> Result<()> { + if input.is_dir() { + translate_folder(input, from, to).await + } else { + translate_file(input, from, to).await + } +} + +async fn translate_text( + client: &reqwest::Client, + url: &str, + model: &str, + text: &str, + from: &str, + to: &str, +) -> Result { + let from_lang = lang_name(from); + let to_lang = lang_name(to); + + let system_content = "<|plamo:op|>dataset\ntranslation".to_string(); + let user_content = format!( + "<|plamo:op|>input lang={}\n{}\n<|plamo:op|>output lang={}", + from_lang, text, to_lang + ); + + let req = ChatRequest { + model: model.to_string(), + messages: vec![ + ChatMessage { + role: "system".to_string(), + content: system_content, + }, + ChatMessage { + role: "user".to_string(), + content: user_content, + }, + ], + }; + + let res = client.post(url).json(&req).send().await?; + + if !res.status().is_success() { + let status = res.status(); + let body = res.text().await?; + return Err(anyhow!("Translation failed ({}): {}", status, body)); + } + + let chat_res: ChatResponse = res.json().await?; + chat_res + .choices + .first() + .map(|c| c.message.content.trim().to_string()) + .ok_or_else(|| anyhow!("No translation result")) +} + +async fn translate_file(input: &Path, from: &str, to: &str) -> Result<()> { + let translate_url = + env::var("TRANSLATE_URL").unwrap_or_else(|_| "http://127.0.0.1:1234/v1".to_string()); + let model = + env::var("TRANSLATE_MODEL").unwrap_or_else(|_| "plamo-2-translate".to_string()); + + println!("Translating: {}", input.display()); + + // Read input JSON + let content = fs::read_to_string(input)?; + let mut record: serde_json::Value = serde_json::from_str(&content)?; + + // Handle both direct format and wrapped format (with "value" field) + let value = if record.get("value").is_some() { + record.get_mut("value").unwrap() + } else { + &mut record + }; + + // Check if already translated + if value + .get("translations") + .and_then(|t| t.get(to)) + .is_some() + { + println!(" Skipped (already has {} translation)", to); + return Ok(()); + } + + let client = reqwest::Client::new(); + let url = format!("{}/chat/completions", translate_url); + + // Translate title if exists + let translated_title = if let Some(title) = value.get("title").and_then(|v| v.as_str()) { + if !title.is_empty() { + Some(translate_text(&client, &url, &model, title, from, to).await?) + } else { + None + } + } else { + None + }; + + // Get and translate content + let text = value + .get("content") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("No 'content' field in JSON"))?; + + let translated_content = translate_text(&client, &url, &model, text, from, to).await?; + + // Add translation to value + let translations = value + .as_object_mut() + .ok_or_else(|| anyhow!("Invalid JSON"))? + .entry("translations") + .or_insert_with(|| serde_json::json!({})); + + let mut translation_entry = serde_json::json!({ + "content": translated_content + }); + + if let Some(title) = translated_title { + translation_entry + .as_object_mut() + .unwrap() + .insert("title".to_string(), serde_json::json!(title)); + } + + translations + .as_object_mut() + .ok_or_else(|| anyhow!("Invalid translations field"))? + .insert(to.to_string(), translation_entry); + + // Write back + let output = serde_json::to_string_pretty(&record)?; + fs::write(input, output)?; + + println!(" OK"); + + Ok(()) +} + +async fn translate_folder(dir: &Path, from: &str, to: &str) -> Result<()> { + let mut files: Vec<_> = fs::read_dir(dir)? + .filter_map(|e| e.ok()) + .filter(|e| { + e.path() + .extension() + .map(|ext| ext == "json") + .unwrap_or(false) + }) + .collect(); + + files.sort_by_key(|e| e.path()); + + println!("Translating {} files ({} -> {})", files.len(), from, to); + + let mut success = 0; + let mut skipped = 0; + let mut failed = 0; + + for entry in files { + let path = entry.path(); + match translate_file(&path, from, to).await { + Ok(_) => { + // Check if it was actually translated or skipped + let content = fs::read_to_string(&path)?; + let record: serde_json::Value = serde_json::from_str(&content)?; + let value = record.get("value").unwrap_or(&record); + if value + .get("translations") + .and_then(|t| t.get(to)) + .is_some() + { + success += 1; + } else { + skipped += 1; + } + } + Err(e) => { + eprintln!(" ERROR {}: {}", path.display(), e); + failed += 1; + } + } + } + + println!( + "\nDone: {} translated, {} skipped, {} failed", + success, skipped, failed + ); + + Ok(()) +} + +fn lang_name(code: &str) -> &str { + match code { + "ja" => "Japanese", + "en" => "English", + "zh" => "Chinese", + "ko" => "Korean", + "fr" => "French", + "de" => "German", + "es" => "Spanish", + _ => code, + } +} diff --git a/src/main.rs b/src/main.rs index 2cff85a..48f74af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ mod commands; mod lexicons; +mod lms; use anyhow::Result; use clap::{Parser, Subcommand}; @@ -72,7 +73,7 @@ enum Commands { #[command(alias = "s")] Sync { /// Output directory - #[arg(short, long, default_value = "content")] + #[arg(short, long, default_value = "public/content")] output: String, }, @@ -85,10 +86,25 @@ enum Commands { #[arg(short, long, default_value = "./src/lexicons")] output: String, }, + + /// Translate content files + Lang { + /// Input file or directory + input: String, + /// Source language + #[arg(short, long, default_value = "ja")] + from: String, + /// Target language + #[arg(short, long, default_value = "en")] + to: String, + }, } #[tokio::main] async fn main() -> Result<()> { + // Load .env file if exists + dotenvy::dotenv().ok(); + let cli = Cli::parse(); match cli.command { @@ -113,6 +129,9 @@ async fn main() -> Result<()> { Commands::Gen { input, output } => { commands::gen::generate(&input, &output)?; } + Commands::Lang { input, from, to } => { + commands::lang::translate(&input, &from, &to).await?; + } } Ok(())