add translate
This commit is contained in:
3
.env.example
Normal file
3
.env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
# LMS Translation API
|
||||
TRANSLATE_URL=http://127.0.0.1:1234/v1
|
||||
TRANSLATE_MODEL=plamo-2-translate
|
||||
@@ -21,3 +21,4 @@ anyhow = "1.0"
|
||||
dirs = "5.0"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
rand = "0.8"
|
||||
dotenvy = "0.15"
|
||||
|
||||
10
src/commands/lang.rs
Normal file
10
src/commands/lang.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
use anyhow::Result;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::lms;
|
||||
|
||||
/// Translate content files from one language to another
|
||||
pub async fn translate(input: &str, from: &str, to: &str) -> Result<()> {
|
||||
let path = Path::new(input);
|
||||
lms::translate::run(path, from, to).await
|
||||
}
|
||||
@@ -2,3 +2,4 @@ pub mod auth;
|
||||
pub mod token;
|
||||
pub mod post;
|
||||
pub mod gen;
|
||||
pub mod lang;
|
||||
|
||||
1
src/lms/mod.rs
Normal file
1
src/lms/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod translate;
|
||||
236
src/lms/translate.rs
Normal file
236
src/lms/translate.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ChatMessage {
|
||||
role: String,
|
||||
content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ChatRequest {
|
||||
model: String,
|
||||
messages: Vec<ChatMessage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChatChoice {
|
||||
message: ChatMessageResponse,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChatMessageResponse {
|
||||
content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChatResponse {
|
||||
choices: Vec<ChatChoice>,
|
||||
}
|
||||
|
||||
/// Translate a file or folder
|
||||
pub async fn run(input: &Path, from: &str, to: &str) -> Result<()> {
|
||||
if input.is_dir() {
|
||||
translate_folder(input, from, to).await
|
||||
} else {
|
||||
translate_file(input, from, to).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn translate_text(
|
||||
client: &reqwest::Client,
|
||||
url: &str,
|
||||
model: &str,
|
||||
text: &str,
|
||||
from: &str,
|
||||
to: &str,
|
||||
) -> Result<String> {
|
||||
let from_lang = lang_name(from);
|
||||
let to_lang = lang_name(to);
|
||||
|
||||
let system_content = "<|plamo:op|>dataset\ntranslation".to_string();
|
||||
let user_content = format!(
|
||||
"<|plamo:op|>input lang={}\n{}\n<|plamo:op|>output lang={}",
|
||||
from_lang, text, to_lang
|
||||
);
|
||||
|
||||
let req = ChatRequest {
|
||||
model: model.to_string(),
|
||||
messages: vec![
|
||||
ChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: system_content,
|
||||
},
|
||||
ChatMessage {
|
||||
role: "user".to_string(),
|
||||
content: user_content,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let res = client.post(url).json(&req).send().await?;
|
||||
|
||||
if !res.status().is_success() {
|
||||
let status = res.status();
|
||||
let body = res.text().await?;
|
||||
return Err(anyhow!("Translation failed ({}): {}", status, body));
|
||||
}
|
||||
|
||||
let chat_res: ChatResponse = res.json().await?;
|
||||
chat_res
|
||||
.choices
|
||||
.first()
|
||||
.map(|c| c.message.content.trim().to_string())
|
||||
.ok_or_else(|| anyhow!("No translation result"))
|
||||
}
|
||||
|
||||
async fn translate_file(input: &Path, from: &str, to: &str) -> Result<()> {
|
||||
let translate_url =
|
||||
env::var("TRANSLATE_URL").unwrap_or_else(|_| "http://127.0.0.1:1234/v1".to_string());
|
||||
let model =
|
||||
env::var("TRANSLATE_MODEL").unwrap_or_else(|_| "plamo-2-translate".to_string());
|
||||
|
||||
println!("Translating: {}", input.display());
|
||||
|
||||
// Read input JSON
|
||||
let content = fs::read_to_string(input)?;
|
||||
let mut record: serde_json::Value = serde_json::from_str(&content)?;
|
||||
|
||||
// Handle both direct format and wrapped format (with "value" field)
|
||||
let value = if record.get("value").is_some() {
|
||||
record.get_mut("value").unwrap()
|
||||
} else {
|
||||
&mut record
|
||||
};
|
||||
|
||||
// Check if already translated
|
||||
if value
|
||||
.get("translations")
|
||||
.and_then(|t| t.get(to))
|
||||
.is_some()
|
||||
{
|
||||
println!(" Skipped (already has {} translation)", to);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{}/chat/completions", translate_url);
|
||||
|
||||
// Translate title if exists
|
||||
let translated_title = if let Some(title) = value.get("title").and_then(|v| v.as_str()) {
|
||||
if !title.is_empty() {
|
||||
Some(translate_text(&client, &url, &model, title, from, to).await?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Get and translate content
|
||||
let text = value
|
||||
.get("content")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow!("No 'content' field in JSON"))?;
|
||||
|
||||
let translated_content = translate_text(&client, &url, &model, text, from, to).await?;
|
||||
|
||||
// Add translation to value
|
||||
let translations = value
|
||||
.as_object_mut()
|
||||
.ok_or_else(|| anyhow!("Invalid JSON"))?
|
||||
.entry("translations")
|
||||
.or_insert_with(|| serde_json::json!({}));
|
||||
|
||||
let mut translation_entry = serde_json::json!({
|
||||
"content": translated_content
|
||||
});
|
||||
|
||||
if let Some(title) = translated_title {
|
||||
translation_entry
|
||||
.as_object_mut()
|
||||
.unwrap()
|
||||
.insert("title".to_string(), serde_json::json!(title));
|
||||
}
|
||||
|
||||
translations
|
||||
.as_object_mut()
|
||||
.ok_or_else(|| anyhow!("Invalid translations field"))?
|
||||
.insert(to.to_string(), translation_entry);
|
||||
|
||||
// Write back
|
||||
let output = serde_json::to_string_pretty(&record)?;
|
||||
fs::write(input, output)?;
|
||||
|
||||
println!(" OK");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn translate_folder(dir: &Path, from: &str, to: &str) -> Result<()> {
|
||||
let mut files: Vec<_> = fs::read_dir(dir)?
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.path()
|
||||
.extension()
|
||||
.map(|ext| ext == "json")
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
|
||||
files.sort_by_key(|e| e.path());
|
||||
|
||||
println!("Translating {} files ({} -> {})", files.len(), from, to);
|
||||
|
||||
let mut success = 0;
|
||||
let mut skipped = 0;
|
||||
let mut failed = 0;
|
||||
|
||||
for entry in files {
|
||||
let path = entry.path();
|
||||
match translate_file(&path, from, to).await {
|
||||
Ok(_) => {
|
||||
// Check if it was actually translated or skipped
|
||||
let content = fs::read_to_string(&path)?;
|
||||
let record: serde_json::Value = serde_json::from_str(&content)?;
|
||||
let value = record.get("value").unwrap_or(&record);
|
||||
if value
|
||||
.get("translations")
|
||||
.and_then(|t| t.get(to))
|
||||
.is_some()
|
||||
{
|
||||
success += 1;
|
||||
} else {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" ERROR {}: {}", path.display(), e);
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"\nDone: {} translated, {} skipped, {} failed",
|
||||
success, skipped, failed
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn lang_name(code: &str) -> &str {
|
||||
match code {
|
||||
"ja" => "Japanese",
|
||||
"en" => "English",
|
||||
"zh" => "Chinese",
|
||||
"ko" => "Korean",
|
||||
"fr" => "French",
|
||||
"de" => "German",
|
||||
"es" => "Spanish",
|
||||
_ => code,
|
||||
}
|
||||
}
|
||||
21
src/main.rs
21
src/main.rs
@@ -1,5 +1,6 @@
|
||||
mod commands;
|
||||
mod lexicons;
|
||||
mod lms;
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::{Parser, Subcommand};
|
||||
@@ -72,7 +73,7 @@ enum Commands {
|
||||
#[command(alias = "s")]
|
||||
Sync {
|
||||
/// Output directory
|
||||
#[arg(short, long, default_value = "content")]
|
||||
#[arg(short, long, default_value = "public/content")]
|
||||
output: String,
|
||||
},
|
||||
|
||||
@@ -85,10 +86,25 @@ enum Commands {
|
||||
#[arg(short, long, default_value = "./src/lexicons")]
|
||||
output: String,
|
||||
},
|
||||
|
||||
/// Translate content files
|
||||
Lang {
|
||||
/// Input file or directory
|
||||
input: String,
|
||||
/// Source language
|
||||
#[arg(short, long, default_value = "ja")]
|
||||
from: String,
|
||||
/// Target language
|
||||
#[arg(short, long, default_value = "en")]
|
||||
to: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Load .env file if exists
|
||||
dotenvy::dotenv().ok();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
@@ -113,6 +129,9 @@ async fn main() -> Result<()> {
|
||||
Commands::Gen { input, output } => {
|
||||
commands::gen::generate(&input, &output)?;
|
||||
}
|
||||
Commands::Lang { input, from, to } => {
|
||||
commands::lang::translate(&input, &from, &to).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user