From 867c2209b1fa56224445380b28fee4bd4a0f7340 Mon Sep 17 00:00:00 2001 From: Gabe Date: Sat, 27 Sep 2025 23:33:33 +0800 Subject: [PATCH] feat: support batch langdetect --- src/apis/index.js | 53 +++++++++++++++++++++----------------- src/apis/trans.js | 44 +++++++++++++++++++++++-------- src/config/rules.js | 2 +- src/config/url.js | 1 + src/libs/batchQueue.js | 17 +++++------- src/views/Options/Rules.js | 2 +- 6 files changed, 71 insertions(+), 48 deletions(-) diff --git a/src/apis/index.js b/src/apis/index.js index 7a57c22..9cfeb67 100644 --- a/src/apis/index.js +++ b/src/apis/index.js @@ -2,16 +2,17 @@ import queryString from "query-string"; import { fetchData } from "../libs/fetch"; import { URL_CACHE_TRAN, + URL_CACHE_DELANG, KV_SALT_SYNC, OPT_LANGS_TO_SPEC, OPT_LANGS_SPEC_DEFAULT, API_SPE_TYPES, DEFAULT_API_SETTING, + OPT_TRANS_MICROSOFT, } from "../config"; import { sha256 } from "../libs/utils"; -import { msAuth } from "../libs/auth"; import { kissLog } from "../libs/log"; -import { handleTranslate } from "./trans"; +import { handleTranslate, handleMicrosoftLangdetect } from "./trans"; import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache"; import { getBatchQueue } from "../libs/batchQueue"; @@ -83,24 +84,25 @@ export const apiGoogleLangdetect = async (text) => { * @returns */ export const apiMicrosoftLangdetect = async (text) => { - const token = await msAuth(); - const input = - "https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0"; - const init = { - headers: { - "Content-type": "application/json", - Authorization: `Bearer ${token}`, - }, - method: "POST", - body: JSON.stringify([{ Text: text }]), - }; - const res = await fetchData(input, init, { - useCache: true, - }); + const cacheOpts = { text, detector: OPT_TRANS_MICROSOFT }; + const cacheInput = `${URL_CACHE_DELANG}?${queryString.stringify(cacheOpts)}`; + const cache = await getHttpCachePolyfill(cacheInput); + if (cache) { + return cache; + } - if (res?.[0]?.language) { - await putHttpCachePolyfill(input, init, res); - return res[0].language; + const key = `${URL_CACHE_DELANG}_${OPT_TRANS_MICROSOFT}`; + const queue = getBatchQueue(key, handleMicrosoftLangdetect, { + batchInterval: 500, + batchSize: 20, + batchLength: 100000, + }); + const lang = await queue.addTask(text); + + if (lang) { + putHttpCachePolyfill(cacheInput, null, lang); + console.log("handleMicrosoftLangdetect", { text, lang }); + return lang; } return ""; @@ -255,7 +257,9 @@ export const apiTranslate = async ({ let trText = ""; let srLang = ""; if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) { - const queue = getBatchQueue({ + const { apiSlug, batchInterval, batchSize, batchLength } = apiSetting; + const key = `${apiSlug}_${fromLang}_${toLang}`; + const queue = getBatchQueue(key, handleTranslate, { from, to, fromLang, @@ -264,17 +268,18 @@ export const apiTranslate = async ({ docInfo, apiSetting, usePool, - taskFn: handleTranslate, + batchInterval, + batchSize, + batchLength, }); - const tranlation = await queue.addTask({ text }); + const tranlation = await queue.addTask(text); if (Array.isArray(tranlation)) { [trText, srLang = ""] = tranlation; } else if (typeof tranlation === "string") { trText = tranlation; } } else { - const translations = await handleTranslate({ - texts: [text], + const translations = await handleTranslate([text], { from, to, fromLang, diff --git a/src/apis/trans.js b/src/apis/trans.js index 23127e0..76526fb 100644 --- a/src/apis/trans.js +++ b/src/apis/trans.js @@ -782,17 +782,10 @@ export const parseTransRes = async ( * @param {*} param0 * @returns */ -export const handleTranslate = async ({ - texts, - from, - to, - fromLang, - toLang, - langMap, - docInfo, - apiSetting, - usePool, -}) => { +export const handleTranslate = async ( + texts = [], + { from, to, fromLang, toLang, langMap, docInfo, apiSetting, usePool } +) => { let history = null; let hisMsgs = []; const { @@ -850,3 +843,32 @@ export const handleTranslate = async ({ ...apiSetting, }); }; + +/** + * Microsoft语言识别聚合及解析 + * @param {*} texts + * @returns + */ +export const handleMicrosoftLangdetect = async (texts = []) => { + const token = await msAuth(); + const input = + "https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0"; + const init = { + headers: { + "Content-type": "application/json", + Authorization: `Bearer ${token}`, + }, + method: "POST", + body: JSON.stringify(texts.map((text) => ({ Text: text }))), + }; + + const res = await fetchData(input, init, { + useCache: false, + }); + + if (Array.isArray(res)) { + return res.map((r) => r.language); + } + + return []; +}; diff --git a/src/config/rules.js b/src/config/rules.js index 3b3c438..5929395 100644 --- a/src/config/rules.js +++ b/src/config/rules.js @@ -132,7 +132,7 @@ export const GLOBLA_RULE = { transTag: DEFAULT_TRANS_TAG, // 译文元素标签 transTitle: "false", // 是否同时翻译页面标题 transSelected: "true", // 是否启用划词翻译 - detectRemote: "false", // 是否使用远程语言检测 + detectRemote: "true", // 是否使用远程语言检测 skipLangs: [], // 不翻译的语言 // fixerSelector: "", // 修复函数选择器 (暂时作废) // fixerFunc: "-", // 修复函数 (暂时作废) diff --git a/src/config/url.js b/src/config/url.js index 03b646a..9b03e60 100644 --- a/src/config/url.js +++ b/src/config/url.js @@ -1,6 +1,7 @@ import { APP_LCNAME } from "./app"; export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`; +export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`; export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker"; export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy"; diff --git a/src/libs/batchQueue.js b/src/libs/batchQueue.js index 204725f..bc2ad3b 100644 --- a/src/libs/batchQueue.js +++ b/src/libs/batchQueue.js @@ -11,11 +11,12 @@ import { * @returns */ const BatchQueue = ( - { taskFn, ...args }, + taskFn, { batchInterval = DEFAULT_BATCH_INTERVAL, batchSize = DEFAULT_BATCH_SIZE, batchLength = DEFAULT_BATCH_LENGTH, + ...args } = {} ) => { const queue = []; @@ -23,11 +24,7 @@ const BatchQueue = ( let timer = null; const sendBatchRequest = async (payloads) => { - const texts = payloads.map((item) => item.text); - return taskFn({ - ...args, - texts, - }); + return taskFn(payloads, args); }; const processQueue = async () => { @@ -47,7 +44,7 @@ const BatchQueue = ( let endIndex = 0; for (const task of queue) { - const textLength = task.payload.text?.length || 0; + const textLength = task.payload?.length || 0; if ( endIndex >= batchSize || (currentBatchLength + textLength > batchLength && endIndex > 0) @@ -135,14 +132,12 @@ const queueMap = new Map(); /** * 获取批处理实例 */ -export const getBatchQueue = (args) => { - const { from, to, apiSetting } = args; - const key = `${apiSetting.apiSlug}_${from}_${to}`; +export const getBatchQueue = (key, taskFn, args) => { if (queueMap.has(key)) { return queueMap.get(key); } - const queue = BatchQueue(args, apiSetting); + const queue = BatchQueue(taskFn, args); queueMap.set(key, queue); return queue; }; diff --git a/src/views/Options/Rules.js b/src/views/Options/Rules.js index e548a72..23cd482 100644 --- a/src/views/Options/Rules.js +++ b/src/views/Options/Rules.js @@ -113,7 +113,7 @@ function RuleFields({ rule, rules, setShow, setKeyword }) { transTag = DEFAULT_TRANS_TAG, transTitle = "false", transSelected = "true", - detectRemote = "false", + detectRemote = "true", skipLangs = [], // fixerSelector = "", // fixerFunc = "-",