feat: support batch langdetect
This commit is contained in:
@@ -2,16 +2,17 @@ import queryString from "query-string";
|
||||
import { fetchData } from "../libs/fetch";
|
||||
import {
|
||||
URL_CACHE_TRAN,
|
||||
URL_CACHE_DELANG,
|
||||
KV_SALT_SYNC,
|
||||
OPT_LANGS_TO_SPEC,
|
||||
OPT_LANGS_SPEC_DEFAULT,
|
||||
API_SPE_TYPES,
|
||||
DEFAULT_API_SETTING,
|
||||
OPT_TRANS_MICROSOFT,
|
||||
} from "../config";
|
||||
import { sha256 } from "../libs/utils";
|
||||
import { msAuth } from "../libs/auth";
|
||||
import { kissLog } from "../libs/log";
|
||||
import { handleTranslate } from "./trans";
|
||||
import { handleTranslate, handleMicrosoftLangdetect } from "./trans";
|
||||
import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache";
|
||||
import { getBatchQueue } from "../libs/batchQueue";
|
||||
|
||||
@@ -83,24 +84,25 @@ export const apiGoogleLangdetect = async (text) => {
|
||||
* @returns
|
||||
*/
|
||||
export const apiMicrosoftLangdetect = async (text) => {
|
||||
const token = await msAuth();
|
||||
const input =
|
||||
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
|
||||
const init = {
|
||||
headers: {
|
||||
"Content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
method: "POST",
|
||||
body: JSON.stringify([{ Text: text }]),
|
||||
};
|
||||
const res = await fetchData(input, init, {
|
||||
useCache: true,
|
||||
});
|
||||
const cacheOpts = { text, detector: OPT_TRANS_MICROSOFT };
|
||||
const cacheInput = `${URL_CACHE_DELANG}?${queryString.stringify(cacheOpts)}`;
|
||||
const cache = await getHttpCachePolyfill(cacheInput);
|
||||
if (cache) {
|
||||
return cache;
|
||||
}
|
||||
|
||||
if (res?.[0]?.language) {
|
||||
await putHttpCachePolyfill(input, init, res);
|
||||
return res[0].language;
|
||||
const key = `${URL_CACHE_DELANG}_${OPT_TRANS_MICROSOFT}`;
|
||||
const queue = getBatchQueue(key, handleMicrosoftLangdetect, {
|
||||
batchInterval: 500,
|
||||
batchSize: 20,
|
||||
batchLength: 100000,
|
||||
});
|
||||
const lang = await queue.addTask(text);
|
||||
|
||||
if (lang) {
|
||||
putHttpCachePolyfill(cacheInput, null, lang);
|
||||
console.log("handleMicrosoftLangdetect", { text, lang });
|
||||
return lang;
|
||||
}
|
||||
|
||||
return "";
|
||||
@@ -255,7 +257,9 @@ export const apiTranslate = async ({
|
||||
let trText = "";
|
||||
let srLang = "";
|
||||
if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) {
|
||||
const queue = getBatchQueue({
|
||||
const { apiSlug, batchInterval, batchSize, batchLength } = apiSetting;
|
||||
const key = `${apiSlug}_${fromLang}_${toLang}`;
|
||||
const queue = getBatchQueue(key, handleTranslate, {
|
||||
from,
|
||||
to,
|
||||
fromLang,
|
||||
@@ -264,17 +268,18 @@ export const apiTranslate = async ({
|
||||
docInfo,
|
||||
apiSetting,
|
||||
usePool,
|
||||
taskFn: handleTranslate,
|
||||
batchInterval,
|
||||
batchSize,
|
||||
batchLength,
|
||||
});
|
||||
const tranlation = await queue.addTask({ text });
|
||||
const tranlation = await queue.addTask(text);
|
||||
if (Array.isArray(tranlation)) {
|
||||
[trText, srLang = ""] = tranlation;
|
||||
} else if (typeof tranlation === "string") {
|
||||
trText = tranlation;
|
||||
}
|
||||
} else {
|
||||
const translations = await handleTranslate({
|
||||
texts: [text],
|
||||
const translations = await handleTranslate([text], {
|
||||
from,
|
||||
to,
|
||||
fromLang,
|
||||
|
||||
@@ -782,17 +782,10 @@ export const parseTransRes = async (
|
||||
* @param {*} param0
|
||||
* @returns
|
||||
*/
|
||||
export const handleTranslate = async ({
|
||||
texts,
|
||||
from,
|
||||
to,
|
||||
fromLang,
|
||||
toLang,
|
||||
langMap,
|
||||
docInfo,
|
||||
apiSetting,
|
||||
usePool,
|
||||
}) => {
|
||||
export const handleTranslate = async (
|
||||
texts = [],
|
||||
{ from, to, fromLang, toLang, langMap, docInfo, apiSetting, usePool }
|
||||
) => {
|
||||
let history = null;
|
||||
let hisMsgs = [];
|
||||
const {
|
||||
@@ -850,3 +843,32 @@ export const handleTranslate = async ({
|
||||
...apiSetting,
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Microsoft语言识别聚合及解析
|
||||
* @param {*} texts
|
||||
* @returns
|
||||
*/
|
||||
export const handleMicrosoftLangdetect = async (texts = []) => {
|
||||
const token = await msAuth();
|
||||
const input =
|
||||
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
|
||||
const init = {
|
||||
headers: {
|
||||
"Content-type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
method: "POST",
|
||||
body: JSON.stringify(texts.map((text) => ({ Text: text }))),
|
||||
};
|
||||
|
||||
const res = await fetchData(input, init, {
|
||||
useCache: false,
|
||||
});
|
||||
|
||||
if (Array.isArray(res)) {
|
||||
return res.map((r) => r.language);
|
||||
}
|
||||
|
||||
return [];
|
||||
};
|
||||
|
||||
@@ -132,7 +132,7 @@ export const GLOBLA_RULE = {
|
||||
transTag: DEFAULT_TRANS_TAG, // 译文元素标签
|
||||
transTitle: "false", // 是否同时翻译页面标题
|
||||
transSelected: "true", // 是否启用划词翻译
|
||||
detectRemote: "false", // 是否使用远程语言检测
|
||||
detectRemote: "true", // 是否使用远程语言检测
|
||||
skipLangs: [], // 不翻译的语言
|
||||
// fixerSelector: "", // 修复函数选择器 (暂时作废)
|
||||
// fixerFunc: "-", // 修复函数 (暂时作废)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { APP_LCNAME } from "./app";
|
||||
|
||||
export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`;
|
||||
export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`;
|
||||
|
||||
export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker";
|
||||
export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy";
|
||||
|
||||
@@ -11,11 +11,12 @@ import {
|
||||
* @returns
|
||||
*/
|
||||
const BatchQueue = (
|
||||
{ taskFn, ...args },
|
||||
taskFn,
|
||||
{
|
||||
batchInterval = DEFAULT_BATCH_INTERVAL,
|
||||
batchSize = DEFAULT_BATCH_SIZE,
|
||||
batchLength = DEFAULT_BATCH_LENGTH,
|
||||
...args
|
||||
} = {}
|
||||
) => {
|
||||
const queue = [];
|
||||
@@ -23,11 +24,7 @@ const BatchQueue = (
|
||||
let timer = null;
|
||||
|
||||
const sendBatchRequest = async (payloads) => {
|
||||
const texts = payloads.map((item) => item.text);
|
||||
return taskFn({
|
||||
...args,
|
||||
texts,
|
||||
});
|
||||
return taskFn(payloads, args);
|
||||
};
|
||||
|
||||
const processQueue = async () => {
|
||||
@@ -47,7 +44,7 @@ const BatchQueue = (
|
||||
let endIndex = 0;
|
||||
|
||||
for (const task of queue) {
|
||||
const textLength = task.payload.text?.length || 0;
|
||||
const textLength = task.payload?.length || 0;
|
||||
if (
|
||||
endIndex >= batchSize ||
|
||||
(currentBatchLength + textLength > batchLength && endIndex > 0)
|
||||
@@ -135,14 +132,12 @@ const queueMap = new Map();
|
||||
/**
|
||||
* 获取批处理实例
|
||||
*/
|
||||
export const getBatchQueue = (args) => {
|
||||
const { from, to, apiSetting } = args;
|
||||
const key = `${apiSetting.apiSlug}_${from}_${to}`;
|
||||
export const getBatchQueue = (key, taskFn, args) => {
|
||||
if (queueMap.has(key)) {
|
||||
return queueMap.get(key);
|
||||
}
|
||||
|
||||
const queue = BatchQueue(args, apiSetting);
|
||||
const queue = BatchQueue(taskFn, args);
|
||||
queueMap.set(key, queue);
|
||||
return queue;
|
||||
};
|
||||
|
||||
@@ -113,7 +113,7 @@ function RuleFields({ rule, rules, setShow, setKeyword }) {
|
||||
transTag = DEFAULT_TRANS_TAG,
|
||||
transTitle = "false",
|
||||
transSelected = "true",
|
||||
detectRemote = "false",
|
||||
detectRemote = "true",
|
||||
skipLangs = [],
|
||||
// fixerSelector = "",
|
||||
// fixerFunc = "-",
|
||||
|
||||
Reference in New Issue
Block a user