feat: support batch langdetect

This commit is contained in:
Gabe
2025-09-27 23:33:33 +08:00
parent fffa448425
commit 867c2209b1
6 changed files with 71 additions and 48 deletions

View File

@@ -2,16 +2,17 @@ import queryString from "query-string";
import { fetchData } from "../libs/fetch";
import {
URL_CACHE_TRAN,
URL_CACHE_DELANG,
KV_SALT_SYNC,
OPT_LANGS_TO_SPEC,
OPT_LANGS_SPEC_DEFAULT,
API_SPE_TYPES,
DEFAULT_API_SETTING,
OPT_TRANS_MICROSOFT,
} from "../config";
import { sha256 } from "../libs/utils";
import { msAuth } from "../libs/auth";
import { kissLog } from "../libs/log";
import { handleTranslate } from "./trans";
import { handleTranslate, handleMicrosoftLangdetect } from "./trans";
import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache";
import { getBatchQueue } from "../libs/batchQueue";
@@ -83,24 +84,25 @@ export const apiGoogleLangdetect = async (text) => {
* @returns
*/
export const apiMicrosoftLangdetect = async (text) => {
const token = await msAuth();
const input =
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
const init = {
headers: {
"Content-type": "application/json",
Authorization: `Bearer ${token}`,
},
method: "POST",
body: JSON.stringify([{ Text: text }]),
};
const res = await fetchData(input, init, {
useCache: true,
});
const cacheOpts = { text, detector: OPT_TRANS_MICROSOFT };
const cacheInput = `${URL_CACHE_DELANG}?${queryString.stringify(cacheOpts)}`;
const cache = await getHttpCachePolyfill(cacheInput);
if (cache) {
return cache;
}
if (res?.[0]?.language) {
await putHttpCachePolyfill(input, init, res);
return res[0].language;
const key = `${URL_CACHE_DELANG}_${OPT_TRANS_MICROSOFT}`;
const queue = getBatchQueue(key, handleMicrosoftLangdetect, {
batchInterval: 500,
batchSize: 20,
batchLength: 100000,
});
const lang = await queue.addTask(text);
if (lang) {
putHttpCachePolyfill(cacheInput, null, lang);
console.log("handleMicrosoftLangdetect", { text, lang });
return lang;
}
return "";
@@ -255,7 +257,9 @@ export const apiTranslate = async ({
let trText = "";
let srLang = "";
if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) {
const queue = getBatchQueue({
const { apiSlug, batchInterval, batchSize, batchLength } = apiSetting;
const key = `${apiSlug}_${fromLang}_${toLang}`;
const queue = getBatchQueue(key, handleTranslate, {
from,
to,
fromLang,
@@ -264,17 +268,18 @@ export const apiTranslate = async ({
docInfo,
apiSetting,
usePool,
taskFn: handleTranslate,
batchInterval,
batchSize,
batchLength,
});
const tranlation = await queue.addTask({ text });
const tranlation = await queue.addTask(text);
if (Array.isArray(tranlation)) {
[trText, srLang = ""] = tranlation;
} else if (typeof tranlation === "string") {
trText = tranlation;
}
} else {
const translations = await handleTranslate({
texts: [text],
const translations = await handleTranslate([text], {
from,
to,
fromLang,

View File

@@ -782,17 +782,10 @@ export const parseTransRes = async (
* @param {*} param0
* @returns
*/
export const handleTranslate = async ({
texts,
from,
to,
fromLang,
toLang,
langMap,
docInfo,
apiSetting,
usePool,
}) => {
export const handleTranslate = async (
texts = [],
{ from, to, fromLang, toLang, langMap, docInfo, apiSetting, usePool }
) => {
let history = null;
let hisMsgs = [];
const {
@@ -850,3 +843,32 @@ export const handleTranslate = async ({
...apiSetting,
});
};
/**
* Microsoft语言识别聚合及解析
* @param {*} texts
* @returns
*/
export const handleMicrosoftLangdetect = async (texts = []) => {
const token = await msAuth();
const input =
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
const init = {
headers: {
"Content-type": "application/json",
Authorization: `Bearer ${token}`,
},
method: "POST",
body: JSON.stringify(texts.map((text) => ({ Text: text }))),
};
const res = await fetchData(input, init, {
useCache: false,
});
if (Array.isArray(res)) {
return res.map((r) => r.language);
}
return [];
};

View File

@@ -132,7 +132,7 @@ export const GLOBLA_RULE = {
transTag: DEFAULT_TRANS_TAG, // 译文元素标签
transTitle: "false", // 是否同时翻译页面标题
transSelected: "true", // 是否启用划词翻译
detectRemote: "false", // 是否使用远程语言检测
detectRemote: "true", // 是否使用远程语言检测
skipLangs: [], // 不翻译的语言
// fixerSelector: "", // 修复函数选择器 (暂时作废)
// fixerFunc: "-", // 修复函数 (暂时作废)

View File

@@ -1,6 +1,7 @@
import { APP_LCNAME } from "./app";
export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`;
export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`;
export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker";
export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy";

View File

@@ -11,11 +11,12 @@ import {
* @returns
*/
const BatchQueue = (
{ taskFn, ...args },
taskFn,
{
batchInterval = DEFAULT_BATCH_INTERVAL,
batchSize = DEFAULT_BATCH_SIZE,
batchLength = DEFAULT_BATCH_LENGTH,
...args
} = {}
) => {
const queue = [];
@@ -23,11 +24,7 @@ const BatchQueue = (
let timer = null;
const sendBatchRequest = async (payloads) => {
const texts = payloads.map((item) => item.text);
return taskFn({
...args,
texts,
});
return taskFn(payloads, args);
};
const processQueue = async () => {
@@ -47,7 +44,7 @@ const BatchQueue = (
let endIndex = 0;
for (const task of queue) {
const textLength = task.payload.text?.length || 0;
const textLength = task.payload?.length || 0;
if (
endIndex >= batchSize ||
(currentBatchLength + textLength > batchLength && endIndex > 0)
@@ -135,14 +132,12 @@ const queueMap = new Map();
/**
* 获取批处理实例
*/
export const getBatchQueue = (args) => {
const { from, to, apiSetting } = args;
const key = `${apiSetting.apiSlug}_${from}_${to}`;
export const getBatchQueue = (key, taskFn, args) => {
if (queueMap.has(key)) {
return queueMap.get(key);
}
const queue = BatchQueue(args, apiSetting);
const queue = BatchQueue(taskFn, args);
queueMap.set(key, queue);
return queue;
};

View File

@@ -113,7 +113,7 @@ function RuleFields({ rule, rules, setShow, setKeyword }) {
transTag = DEFAULT_TRANS_TAG,
transTitle = "false",
transSelected = "true",
detectRemote = "false",
detectRemote = "true",
skipLangs = [],
// fixerSelector = "",
// fixerFunc = "-",