This commit is contained in:
2026-01-18 18:08:53 +09:00
commit 5fe9e0a3f9
56 changed files with 6679 additions and 0 deletions

244
src/lms/translate.rs Normal file
View File

@@ -0,0 +1,244 @@
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::env;
use std::fs;
use std::path::Path;
#[derive(Debug, Serialize)]
struct ChatMessage {
role: String,
content: String,
}
#[derive(Debug, Serialize)]
struct ChatRequest {
model: String,
messages: Vec<ChatMessage>,
}
#[derive(Debug, Deserialize)]
struct ChatChoice {
message: ChatMessageResponse,
}
#[derive(Debug, Deserialize)]
struct ChatMessageResponse {
content: String,
}
#[derive(Debug, Deserialize)]
struct ChatResponse {
choices: Vec<ChatChoice>,
}
/// Translate a file or folder
pub async fn run(input: &Path, from: &str, to: &str) -> Result<()> {
if input.is_dir() {
translate_folder(input, from, to).await
} else {
translate_file(input, from, to).await
}
}
async fn translate_text(
client: &reqwest::Client,
url: &str,
model: &str,
text: &str,
from: &str,
to: &str,
) -> Result<String> {
let from_lang = lang_name(from);
let to_lang = lang_name(to);
let system_content = "<|plamo:op|>dataset\ntranslation".to_string();
let user_content = format!(
"<|plamo:op|>input lang={}\n{}\n<|plamo:op|>output lang={}",
from_lang, text, to_lang
);
let req = ChatRequest {
model: model.to_string(),
messages: vec![
ChatMessage {
role: "system".to_string(),
content: system_content,
},
ChatMessage {
role: "user".to_string(),
content: user_content,
},
],
};
let res = client.post(url).json(&req).send().await?;
if !res.status().is_success() {
let status = res.status();
let body = res.text().await?;
return Err(anyhow!("Translation failed ({}): {}", status, body));
}
let chat_res: ChatResponse = res.json().await?;
chat_res
.choices
.first()
.map(|c| c.message.content.trim().to_string())
.ok_or_else(|| anyhow!("No translation result"))
}
async fn translate_file(input: &Path, from: &str, to: &str) -> Result<()> {
let translate_url =
env::var("TRANSLATE_URL").unwrap_or_else(|_| "http://127.0.0.1:1234/v1".to_string());
let model =
env::var("TRANSLATE_MODEL").unwrap_or_else(|_| "plamo-2-translate".to_string());
println!("Translating: {}", input.display());
// Read input JSON
let content = fs::read_to_string(input)?;
let mut record: serde_json::Value = serde_json::from_str(&content)?;
// Handle both direct format and wrapped format (with "value" field)
let value = if record.get("value").is_some() {
record.get_mut("value").unwrap()
} else {
&mut record
};
// Check if already translated
if value
.get("translations")
.and_then(|t| t.get(to))
.is_some()
{
println!(" Skipped (already has {} translation)", to);
return Ok(());
}
let client = reqwest::Client::new();
let url = format!("{}/chat/completions", translate_url);
// Translate title if exists
let translated_title = if let Some(title) = value.get("title").and_then(|v| v.as_str()) {
if !title.is_empty() {
Some(translate_text(&client, &url, &model, title, from, to).await?)
} else {
None
}
} else {
None
};
// Get and translate content
let text = value
.get("content")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow!("No 'content' field in JSON"))?;
let translated_content = translate_text(&client, &url, &model, text, from, to).await?;
// Add translation to value
let translations = value
.as_object_mut()
.ok_or_else(|| anyhow!("Invalid JSON"))?
.entry("translations")
.or_insert_with(|| serde_json::json!({}));
let mut translation_entry = serde_json::json!({
"content": translated_content
});
if let Some(title) = translated_title {
translation_entry
.as_object_mut()
.unwrap()
.insert("title".to_string(), serde_json::json!(title));
}
translations
.as_object_mut()
.ok_or_else(|| anyhow!("Invalid translations field"))?
.insert(to.to_string(), translation_entry);
// Write back
let output = serde_json::to_string_pretty(&record)?;
fs::write(input, output)?;
println!(" OK");
Ok(())
}
fn collect_json_files(dir: &Path, files: &mut Vec<std::path::PathBuf>) -> Result<()> {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
collect_json_files(&path, files)?;
} else if path.extension().map(|e| e == "json").unwrap_or(false) {
// Skip non-post files (describe.json, self.json, index.json)
let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if filename != "describe.json" && filename != "self.json" && filename != "index.json" {
files.push(path);
}
}
}
Ok(())
}
async fn translate_folder(dir: &Path, from: &str, to: &str) -> Result<()> {
let mut files = Vec::new();
collect_json_files(dir, &mut files)?;
files.sort();
println!("Translating {} files ({} -> {})", files.len(), from, to);
let mut success = 0;
let mut skipped = 0;
let mut failed = 0;
for path in &files {
match translate_file(path, from, to).await {
Ok(_) => {
// Check if it was actually translated or skipped
let content = fs::read_to_string(&path)?;
let record: serde_json::Value = serde_json::from_str(&content)?;
let value = record.get("value").unwrap_or(&record);
if value
.get("translations")
.and_then(|t| t.get(to))
.is_some()
{
success += 1;
} else {
skipped += 1;
}
}
Err(e) => {
eprintln!(" ERROR {}: {}", path.display(), e);
failed += 1;
}
}
}
println!(
"\nDone: {} translated, {} skipped, {} failed",
success, skipped, failed
);
Ok(())
}
fn lang_name(code: &str) -> &str {
match code {
"ja" => "Japanese",
"en" => "English",
"zh" => "Chinese",
"ko" => "Korean",
"fr" => "French",
"de" => "German",
"es" => "Spanish",
_ => code,
}
}