add translate

This commit is contained in:
2026-01-18 16:43:42 +09:00
parent 8f8b2b7d28
commit 16d9463156
7 changed files with 272 additions and 1 deletions

3
.env.example Normal file
View File

@@ -0,0 +1,3 @@
# LMS Translation API
TRANSLATE_URL=http://127.0.0.1:1234/v1
TRANSLATE_MODEL=plamo-2-translate

View File

@@ -21,3 +21,4 @@ anyhow = "1.0"
dirs = "5.0"
chrono = { version = "0.4", features = ["serde"] }
rand = "0.8"
dotenvy = "0.15"

10
src/commands/lang.rs Normal file
View File

@@ -0,0 +1,10 @@
use anyhow::Result;
use std::path::Path;
use crate::lms;
/// Translate content files from one language to another
pub async fn translate(input: &str, from: &str, to: &str) -> Result<()> {
let path = Path::new(input);
lms::translate::run(path, from, to).await
}

View File

@@ -2,3 +2,4 @@ pub mod auth;
pub mod token;
pub mod post;
pub mod gen;
pub mod lang;

1
src/lms/mod.rs Normal file
View File

@@ -0,0 +1 @@
pub mod translate;

236
src/lms/translate.rs Normal file
View File

@@ -0,0 +1,236 @@
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::env;
use std::fs;
use std::path::Path;
#[derive(Debug, Serialize)]
struct ChatMessage {
role: String,
content: String,
}
#[derive(Debug, Serialize)]
struct ChatRequest {
model: String,
messages: Vec<ChatMessage>,
}
#[derive(Debug, Deserialize)]
struct ChatChoice {
message: ChatMessageResponse,
}
#[derive(Debug, Deserialize)]
struct ChatMessageResponse {
content: String,
}
#[derive(Debug, Deserialize)]
struct ChatResponse {
choices: Vec<ChatChoice>,
}
/// Translate a file or folder
pub async fn run(input: &Path, from: &str, to: &str) -> Result<()> {
if input.is_dir() {
translate_folder(input, from, to).await
} else {
translate_file(input, from, to).await
}
}
async fn translate_text(
client: &reqwest::Client,
url: &str,
model: &str,
text: &str,
from: &str,
to: &str,
) -> Result<String> {
let from_lang = lang_name(from);
let to_lang = lang_name(to);
let system_content = "<|plamo:op|>dataset\ntranslation".to_string();
let user_content = format!(
"<|plamo:op|>input lang={}\n{}\n<|plamo:op|>output lang={}",
from_lang, text, to_lang
);
let req = ChatRequest {
model: model.to_string(),
messages: vec![
ChatMessage {
role: "system".to_string(),
content: system_content,
},
ChatMessage {
role: "user".to_string(),
content: user_content,
},
],
};
let res = client.post(url).json(&req).send().await?;
if !res.status().is_success() {
let status = res.status();
let body = res.text().await?;
return Err(anyhow!("Translation failed ({}): {}", status, body));
}
let chat_res: ChatResponse = res.json().await?;
chat_res
.choices
.first()
.map(|c| c.message.content.trim().to_string())
.ok_or_else(|| anyhow!("No translation result"))
}
async fn translate_file(input: &Path, from: &str, to: &str) -> Result<()> {
let translate_url =
env::var("TRANSLATE_URL").unwrap_or_else(|_| "http://127.0.0.1:1234/v1".to_string());
let model =
env::var("TRANSLATE_MODEL").unwrap_or_else(|_| "plamo-2-translate".to_string());
println!("Translating: {}", input.display());
// Read input JSON
let content = fs::read_to_string(input)?;
let mut record: serde_json::Value = serde_json::from_str(&content)?;
// Handle both direct format and wrapped format (with "value" field)
let value = if record.get("value").is_some() {
record.get_mut("value").unwrap()
} else {
&mut record
};
// Check if already translated
if value
.get("translations")
.and_then(|t| t.get(to))
.is_some()
{
println!(" Skipped (already has {} translation)", to);
return Ok(());
}
let client = reqwest::Client::new();
let url = format!("{}/chat/completions", translate_url);
// Translate title if exists
let translated_title = if let Some(title) = value.get("title").and_then(|v| v.as_str()) {
if !title.is_empty() {
Some(translate_text(&client, &url, &model, title, from, to).await?)
} else {
None
}
} else {
None
};
// Get and translate content
let text = value
.get("content")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("No 'content' field in JSON"))?;
let translated_content = translate_text(&client, &url, &model, text, from, to).await?;
// Add translation to value
let translations = value
.as_object_mut()
.ok_or_else(|| anyhow!("Invalid JSON"))?
.entry("translations")
.or_insert_with(|| serde_json::json!({}));
let mut translation_entry = serde_json::json!({
"content": translated_content
});
if let Some(title) = translated_title {
translation_entry
.as_object_mut()
.unwrap()
.insert("title".to_string(), serde_json::json!(title));
}
translations
.as_object_mut()
.ok_or_else(|| anyhow!("Invalid translations field"))?
.insert(to.to_string(), translation_entry);
// Write back
let output = serde_json::to_string_pretty(&record)?;
fs::write(input, output)?;
println!(" OK");
Ok(())
}
async fn translate_folder(dir: &Path, from: &str, to: &str) -> Result<()> {
let mut files: Vec<_> = fs::read_dir(dir)?
.filter_map(|e| e.ok())
.filter(|e| {
e.path()
.extension()
.map(|ext| ext == "json")
.unwrap_or(false)
})
.collect();
files.sort_by_key(|e| e.path());
println!("Translating {} files ({} -> {})", files.len(), from, to);
let mut success = 0;
let mut skipped = 0;
let mut failed = 0;
for entry in files {
let path = entry.path();
match translate_file(&path, from, to).await {
Ok(_) => {
// Check if it was actually translated or skipped
let content = fs::read_to_string(&path)?;
let record: serde_json::Value = serde_json::from_str(&content)?;
let value = record.get("value").unwrap_or(&record);
if value
.get("translations")
.and_then(|t| t.get(to))
.is_some()
{
success += 1;
} else {
skipped += 1;
}
}
Err(e) => {
eprintln!(" ERROR {}: {}", path.display(), e);
failed += 1;
}
}
}
println!(
"\nDone: {} translated, {} skipped, {} failed",
success, skipped, failed
);
Ok(())
}
fn lang_name(code: &str) -> &str {
match code {
"ja" => "Japanese",
"en" => "English",
"zh" => "Chinese",
"ko" => "Korean",
"fr" => "French",
"de" => "German",
"es" => "Spanish",
_ => code,
}
}

View File

@@ -1,5 +1,6 @@
mod commands;
mod lexicons;
mod lms;
use anyhow::Result;
use clap::{Parser, Subcommand};
@@ -72,7 +73,7 @@ enum Commands {
#[command(alias = "s")]
Sync {
/// Output directory
#[arg(short, long, default_value = "content")]
#[arg(short, long, default_value = "public/content")]
output: String,
},
@@ -85,10 +86,25 @@ enum Commands {
#[arg(short, long, default_value = "./src/lexicons")]
output: String,
},
/// Translate content files
Lang {
/// Input file or directory
input: String,
/// Source language
#[arg(short, long, default_value = "ja")]
from: String,
/// Target language
#[arg(short, long, default_value = "en")]
to: String,
},
}
#[tokio::main]
async fn main() -> Result<()> {
// Load .env file if exists
dotenvy::dotenv().ok();
let cli = Cli::parse();
match cli.command {
@@ -113,6 +129,9 @@ async fn main() -> Result<()> {
Commands::Gen { input, output } => {
commands::gen::generate(&input, &output)?;
}
Commands::Lang { input, from, to } => {
commands::lang::translate(&input, &from, &to).await?;
}
}
Ok(())