feat: support batch langdetect

This commit is contained in:
Gabe
2025-09-27 23:33:33 +08:00
parent fffa448425
commit 867c2209b1
6 changed files with 71 additions and 48 deletions

View File

@@ -2,16 +2,17 @@ import queryString from "query-string";
import { fetchData } from "../libs/fetch"; import { fetchData } from "../libs/fetch";
import { import {
URL_CACHE_TRAN, URL_CACHE_TRAN,
URL_CACHE_DELANG,
KV_SALT_SYNC, KV_SALT_SYNC,
OPT_LANGS_TO_SPEC, OPT_LANGS_TO_SPEC,
OPT_LANGS_SPEC_DEFAULT, OPT_LANGS_SPEC_DEFAULT,
API_SPE_TYPES, API_SPE_TYPES,
DEFAULT_API_SETTING, DEFAULT_API_SETTING,
OPT_TRANS_MICROSOFT,
} from "../config"; } from "../config";
import { sha256 } from "../libs/utils"; import { sha256 } from "../libs/utils";
import { msAuth } from "../libs/auth";
import { kissLog } from "../libs/log"; import { kissLog } from "../libs/log";
import { handleTranslate } from "./trans"; import { handleTranslate, handleMicrosoftLangdetect } from "./trans";
import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache"; import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache";
import { getBatchQueue } from "../libs/batchQueue"; import { getBatchQueue } from "../libs/batchQueue";
@@ -83,24 +84,25 @@ export const apiGoogleLangdetect = async (text) => {
* @returns * @returns
*/ */
export const apiMicrosoftLangdetect = async (text) => { export const apiMicrosoftLangdetect = async (text) => {
const token = await msAuth(); const cacheOpts = { text, detector: OPT_TRANS_MICROSOFT };
const input = const cacheInput = `${URL_CACHE_DELANG}?${queryString.stringify(cacheOpts)}`;
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0"; const cache = await getHttpCachePolyfill(cacheInput);
const init = { if (cache) {
headers: { return cache;
"Content-type": "application/json", }
Authorization: `Bearer ${token}`,
},
method: "POST",
body: JSON.stringify([{ Text: text }]),
};
const res = await fetchData(input, init, {
useCache: true,
});
if (res?.[0]?.language) { const key = `${URL_CACHE_DELANG}_${OPT_TRANS_MICROSOFT}`;
await putHttpCachePolyfill(input, init, res); const queue = getBatchQueue(key, handleMicrosoftLangdetect, {
return res[0].language; batchInterval: 500,
batchSize: 20,
batchLength: 100000,
});
const lang = await queue.addTask(text);
if (lang) {
putHttpCachePolyfill(cacheInput, null, lang);
console.log("handleMicrosoftLangdetect", { text, lang });
return lang;
} }
return ""; return "";
@@ -255,7 +257,9 @@ export const apiTranslate = async ({
let trText = ""; let trText = "";
let srLang = ""; let srLang = "";
if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) { if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) {
const queue = getBatchQueue({ const { apiSlug, batchInterval, batchSize, batchLength } = apiSetting;
const key = `${apiSlug}_${fromLang}_${toLang}`;
const queue = getBatchQueue(key, handleTranslate, {
from, from,
to, to,
fromLang, fromLang,
@@ -264,17 +268,18 @@ export const apiTranslate = async ({
docInfo, docInfo,
apiSetting, apiSetting,
usePool, usePool,
taskFn: handleTranslate, batchInterval,
batchSize,
batchLength,
}); });
const tranlation = await queue.addTask({ text }); const tranlation = await queue.addTask(text);
if (Array.isArray(tranlation)) { if (Array.isArray(tranlation)) {
[trText, srLang = ""] = tranlation; [trText, srLang = ""] = tranlation;
} else if (typeof tranlation === "string") { } else if (typeof tranlation === "string") {
trText = tranlation; trText = tranlation;
} }
} else { } else {
const translations = await handleTranslate({ const translations = await handleTranslate([text], {
texts: [text],
from, from,
to, to,
fromLang, fromLang,

View File

@@ -782,17 +782,10 @@ export const parseTransRes = async (
* @param {*} param0 * @param {*} param0
* @returns * @returns
*/ */
export const handleTranslate = async ({ export const handleTranslate = async (
texts, texts = [],
from, { from, to, fromLang, toLang, langMap, docInfo, apiSetting, usePool }
to, ) => {
fromLang,
toLang,
langMap,
docInfo,
apiSetting,
usePool,
}) => {
let history = null; let history = null;
let hisMsgs = []; let hisMsgs = [];
const { const {
@@ -850,3 +843,32 @@ export const handleTranslate = async ({
...apiSetting, ...apiSetting,
}); });
}; };
/**
* Microsoft语言识别聚合及解析
* @param {*} texts
* @returns
*/
export const handleMicrosoftLangdetect = async (texts = []) => {
const token = await msAuth();
const input =
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
const init = {
headers: {
"Content-type": "application/json",
Authorization: `Bearer ${token}`,
},
method: "POST",
body: JSON.stringify(texts.map((text) => ({ Text: text }))),
};
const res = await fetchData(input, init, {
useCache: false,
});
if (Array.isArray(res)) {
return res.map((r) => r.language);
}
return [];
};

View File

@@ -132,7 +132,7 @@ export const GLOBLA_RULE = {
transTag: DEFAULT_TRANS_TAG, // 译文元素标签 transTag: DEFAULT_TRANS_TAG, // 译文元素标签
transTitle: "false", // 是否同时翻译页面标题 transTitle: "false", // 是否同时翻译页面标题
transSelected: "true", // 是否启用划词翻译 transSelected: "true", // 是否启用划词翻译
detectRemote: "false", // 是否使用远程语言检测 detectRemote: "true", // 是否使用远程语言检测
skipLangs: [], // 不翻译的语言 skipLangs: [], // 不翻译的语言
// fixerSelector: "", // 修复函数选择器 (暂时作废) // fixerSelector: "", // 修复函数选择器 (暂时作废)
// fixerFunc: "-", // 修复函数 (暂时作废) // fixerFunc: "-", // 修复函数 (暂时作废)

View File

@@ -1,6 +1,7 @@
import { APP_LCNAME } from "./app"; import { APP_LCNAME } from "./app";
export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`; export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`;
export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`;
export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker"; export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker";
export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy"; export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy";

View File

@@ -11,11 +11,12 @@ import {
* @returns * @returns
*/ */
const BatchQueue = ( const BatchQueue = (
{ taskFn, ...args }, taskFn,
{ {
batchInterval = DEFAULT_BATCH_INTERVAL, batchInterval = DEFAULT_BATCH_INTERVAL,
batchSize = DEFAULT_BATCH_SIZE, batchSize = DEFAULT_BATCH_SIZE,
batchLength = DEFAULT_BATCH_LENGTH, batchLength = DEFAULT_BATCH_LENGTH,
...args
} = {} } = {}
) => { ) => {
const queue = []; const queue = [];
@@ -23,11 +24,7 @@ const BatchQueue = (
let timer = null; let timer = null;
const sendBatchRequest = async (payloads) => { const sendBatchRequest = async (payloads) => {
const texts = payloads.map((item) => item.text); return taskFn(payloads, args);
return taskFn({
...args,
texts,
});
}; };
const processQueue = async () => { const processQueue = async () => {
@@ -47,7 +44,7 @@ const BatchQueue = (
let endIndex = 0; let endIndex = 0;
for (const task of queue) { for (const task of queue) {
const textLength = task.payload.text?.length || 0; const textLength = task.payload?.length || 0;
if ( if (
endIndex >= batchSize || endIndex >= batchSize ||
(currentBatchLength + textLength > batchLength && endIndex > 0) (currentBatchLength + textLength > batchLength && endIndex > 0)
@@ -135,14 +132,12 @@ const queueMap = new Map();
/** /**
* 获取批处理实例 * 获取批处理实例
*/ */
export const getBatchQueue = (args) => { export const getBatchQueue = (key, taskFn, args) => {
const { from, to, apiSetting } = args;
const key = `${apiSetting.apiSlug}_${from}_${to}`;
if (queueMap.has(key)) { if (queueMap.has(key)) {
return queueMap.get(key); return queueMap.get(key);
} }
const queue = BatchQueue(args, apiSetting); const queue = BatchQueue(taskFn, args);
queueMap.set(key, queue); queueMap.set(key, queue);
return queue; return queue;
}; };

View File

@@ -113,7 +113,7 @@ function RuleFields({ rule, rules, setShow, setKeyword }) {
transTag = DEFAULT_TRANS_TAG, transTag = DEFAULT_TRANS_TAG,
transTitle = "false", transTitle = "false",
transSelected = "true", transSelected = "true",
detectRemote = "false", detectRemote = "true",
skipLangs = [], skipLangs = [],
// fixerSelector = "", // fixerSelector = "",
// fixerFunc = "-", // fixerFunc = "-",