feat: support batch langdetect
This commit is contained in:
@@ -2,16 +2,17 @@ import queryString from "query-string";
|
|||||||
import { fetchData } from "../libs/fetch";
|
import { fetchData } from "../libs/fetch";
|
||||||
import {
|
import {
|
||||||
URL_CACHE_TRAN,
|
URL_CACHE_TRAN,
|
||||||
|
URL_CACHE_DELANG,
|
||||||
KV_SALT_SYNC,
|
KV_SALT_SYNC,
|
||||||
OPT_LANGS_TO_SPEC,
|
OPT_LANGS_TO_SPEC,
|
||||||
OPT_LANGS_SPEC_DEFAULT,
|
OPT_LANGS_SPEC_DEFAULT,
|
||||||
API_SPE_TYPES,
|
API_SPE_TYPES,
|
||||||
DEFAULT_API_SETTING,
|
DEFAULT_API_SETTING,
|
||||||
|
OPT_TRANS_MICROSOFT,
|
||||||
} from "../config";
|
} from "../config";
|
||||||
import { sha256 } from "../libs/utils";
|
import { sha256 } from "../libs/utils";
|
||||||
import { msAuth } from "../libs/auth";
|
|
||||||
import { kissLog } from "../libs/log";
|
import { kissLog } from "../libs/log";
|
||||||
import { handleTranslate } from "./trans";
|
import { handleTranslate, handleMicrosoftLangdetect } from "./trans";
|
||||||
import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache";
|
import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache";
|
||||||
import { getBatchQueue } from "../libs/batchQueue";
|
import { getBatchQueue } from "../libs/batchQueue";
|
||||||
|
|
||||||
@@ -83,24 +84,25 @@ export const apiGoogleLangdetect = async (text) => {
|
|||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const apiMicrosoftLangdetect = async (text) => {
|
export const apiMicrosoftLangdetect = async (text) => {
|
||||||
const token = await msAuth();
|
const cacheOpts = { text, detector: OPT_TRANS_MICROSOFT };
|
||||||
const input =
|
const cacheInput = `${URL_CACHE_DELANG}?${queryString.stringify(cacheOpts)}`;
|
||||||
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
|
const cache = await getHttpCachePolyfill(cacheInput);
|
||||||
const init = {
|
if (cache) {
|
||||||
headers: {
|
return cache;
|
||||||
"Content-type": "application/json",
|
}
|
||||||
Authorization: `Bearer ${token}`,
|
|
||||||
},
|
|
||||||
method: "POST",
|
|
||||||
body: JSON.stringify([{ Text: text }]),
|
|
||||||
};
|
|
||||||
const res = await fetchData(input, init, {
|
|
||||||
useCache: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res?.[0]?.language) {
|
const key = `${URL_CACHE_DELANG}_${OPT_TRANS_MICROSOFT}`;
|
||||||
await putHttpCachePolyfill(input, init, res);
|
const queue = getBatchQueue(key, handleMicrosoftLangdetect, {
|
||||||
return res[0].language;
|
batchInterval: 500,
|
||||||
|
batchSize: 20,
|
||||||
|
batchLength: 100000,
|
||||||
|
});
|
||||||
|
const lang = await queue.addTask(text);
|
||||||
|
|
||||||
|
if (lang) {
|
||||||
|
putHttpCachePolyfill(cacheInput, null, lang);
|
||||||
|
console.log("handleMicrosoftLangdetect", { text, lang });
|
||||||
|
return lang;
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
@@ -255,7 +257,9 @@ export const apiTranslate = async ({
|
|||||||
let trText = "";
|
let trText = "";
|
||||||
let srLang = "";
|
let srLang = "";
|
||||||
if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) {
|
if (useBatchFetch && API_SPE_TYPES.batch.has(apiType)) {
|
||||||
const queue = getBatchQueue({
|
const { apiSlug, batchInterval, batchSize, batchLength } = apiSetting;
|
||||||
|
const key = `${apiSlug}_${fromLang}_${toLang}`;
|
||||||
|
const queue = getBatchQueue(key, handleTranslate, {
|
||||||
from,
|
from,
|
||||||
to,
|
to,
|
||||||
fromLang,
|
fromLang,
|
||||||
@@ -264,17 +268,18 @@ export const apiTranslate = async ({
|
|||||||
docInfo,
|
docInfo,
|
||||||
apiSetting,
|
apiSetting,
|
||||||
usePool,
|
usePool,
|
||||||
taskFn: handleTranslate,
|
batchInterval,
|
||||||
|
batchSize,
|
||||||
|
batchLength,
|
||||||
});
|
});
|
||||||
const tranlation = await queue.addTask({ text });
|
const tranlation = await queue.addTask(text);
|
||||||
if (Array.isArray(tranlation)) {
|
if (Array.isArray(tranlation)) {
|
||||||
[trText, srLang = ""] = tranlation;
|
[trText, srLang = ""] = tranlation;
|
||||||
} else if (typeof tranlation === "string") {
|
} else if (typeof tranlation === "string") {
|
||||||
trText = tranlation;
|
trText = tranlation;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const translations = await handleTranslate({
|
const translations = await handleTranslate([text], {
|
||||||
texts: [text],
|
|
||||||
from,
|
from,
|
||||||
to,
|
to,
|
||||||
fromLang,
|
fromLang,
|
||||||
|
|||||||
@@ -782,17 +782,10 @@ export const parseTransRes = async (
|
|||||||
* @param {*} param0
|
* @param {*} param0
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const handleTranslate = async ({
|
export const handleTranslate = async (
|
||||||
texts,
|
texts = [],
|
||||||
from,
|
{ from, to, fromLang, toLang, langMap, docInfo, apiSetting, usePool }
|
||||||
to,
|
) => {
|
||||||
fromLang,
|
|
||||||
toLang,
|
|
||||||
langMap,
|
|
||||||
docInfo,
|
|
||||||
apiSetting,
|
|
||||||
usePool,
|
|
||||||
}) => {
|
|
||||||
let history = null;
|
let history = null;
|
||||||
let hisMsgs = [];
|
let hisMsgs = [];
|
||||||
const {
|
const {
|
||||||
@@ -850,3 +843,32 @@ export const handleTranslate = async ({
|
|||||||
...apiSetting,
|
...apiSetting,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Microsoft语言识别聚合及解析
|
||||||
|
* @param {*} texts
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
export const handleMicrosoftLangdetect = async (texts = []) => {
|
||||||
|
const token = await msAuth();
|
||||||
|
const input =
|
||||||
|
"https://api-edge.cognitive.microsofttranslator.com/detect?api-version=3.0";
|
||||||
|
const init = {
|
||||||
|
headers: {
|
||||||
|
"Content-type": "application/json",
|
||||||
|
Authorization: `Bearer ${token}`,
|
||||||
|
},
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify(texts.map((text) => ({ Text: text }))),
|
||||||
|
};
|
||||||
|
|
||||||
|
const res = await fetchData(input, init, {
|
||||||
|
useCache: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (Array.isArray(res)) {
|
||||||
|
return res.map((r) => r.language);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ export const GLOBLA_RULE = {
|
|||||||
transTag: DEFAULT_TRANS_TAG, // 译文元素标签
|
transTag: DEFAULT_TRANS_TAG, // 译文元素标签
|
||||||
transTitle: "false", // 是否同时翻译页面标题
|
transTitle: "false", // 是否同时翻译页面标题
|
||||||
transSelected: "true", // 是否启用划词翻译
|
transSelected: "true", // 是否启用划词翻译
|
||||||
detectRemote: "false", // 是否使用远程语言检测
|
detectRemote: "true", // 是否使用远程语言检测
|
||||||
skipLangs: [], // 不翻译的语言
|
skipLangs: [], // 不翻译的语言
|
||||||
// fixerSelector: "", // 修复函数选择器 (暂时作废)
|
// fixerSelector: "", // 修复函数选择器 (暂时作废)
|
||||||
// fixerFunc: "-", // 修复函数 (暂时作废)
|
// fixerFunc: "-", // 修复函数 (暂时作废)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { APP_LCNAME } from "./app";
|
import { APP_LCNAME } from "./app";
|
||||||
|
|
||||||
export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`;
|
export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`;
|
||||||
|
export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`;
|
||||||
|
|
||||||
export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker";
|
export const URL_KISS_WORKER = "https://github.com/fishjar/kiss-worker";
|
||||||
export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy";
|
export const URL_KISS_PROXY = "https://github.com/fishjar/kiss-proxy";
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ import {
|
|||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
const BatchQueue = (
|
const BatchQueue = (
|
||||||
{ taskFn, ...args },
|
taskFn,
|
||||||
{
|
{
|
||||||
batchInterval = DEFAULT_BATCH_INTERVAL,
|
batchInterval = DEFAULT_BATCH_INTERVAL,
|
||||||
batchSize = DEFAULT_BATCH_SIZE,
|
batchSize = DEFAULT_BATCH_SIZE,
|
||||||
batchLength = DEFAULT_BATCH_LENGTH,
|
batchLength = DEFAULT_BATCH_LENGTH,
|
||||||
|
...args
|
||||||
} = {}
|
} = {}
|
||||||
) => {
|
) => {
|
||||||
const queue = [];
|
const queue = [];
|
||||||
@@ -23,11 +24,7 @@ const BatchQueue = (
|
|||||||
let timer = null;
|
let timer = null;
|
||||||
|
|
||||||
const sendBatchRequest = async (payloads) => {
|
const sendBatchRequest = async (payloads) => {
|
||||||
const texts = payloads.map((item) => item.text);
|
return taskFn(payloads, args);
|
||||||
return taskFn({
|
|
||||||
...args,
|
|
||||||
texts,
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const processQueue = async () => {
|
const processQueue = async () => {
|
||||||
@@ -47,7 +44,7 @@ const BatchQueue = (
|
|||||||
let endIndex = 0;
|
let endIndex = 0;
|
||||||
|
|
||||||
for (const task of queue) {
|
for (const task of queue) {
|
||||||
const textLength = task.payload.text?.length || 0;
|
const textLength = task.payload?.length || 0;
|
||||||
if (
|
if (
|
||||||
endIndex >= batchSize ||
|
endIndex >= batchSize ||
|
||||||
(currentBatchLength + textLength > batchLength && endIndex > 0)
|
(currentBatchLength + textLength > batchLength && endIndex > 0)
|
||||||
@@ -135,14 +132,12 @@ const queueMap = new Map();
|
|||||||
/**
|
/**
|
||||||
* 获取批处理实例
|
* 获取批处理实例
|
||||||
*/
|
*/
|
||||||
export const getBatchQueue = (args) => {
|
export const getBatchQueue = (key, taskFn, args) => {
|
||||||
const { from, to, apiSetting } = args;
|
|
||||||
const key = `${apiSetting.apiSlug}_${from}_${to}`;
|
|
||||||
if (queueMap.has(key)) {
|
if (queueMap.has(key)) {
|
||||||
return queueMap.get(key);
|
return queueMap.get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
const queue = BatchQueue(args, apiSetting);
|
const queue = BatchQueue(taskFn, args);
|
||||||
queueMap.set(key, queue);
|
queueMap.set(key, queue);
|
||||||
return queue;
|
return queue;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ function RuleFields({ rule, rules, setShow, setKeyword }) {
|
|||||||
transTag = DEFAULT_TRANS_TAG,
|
transTag = DEFAULT_TRANS_TAG,
|
||||||
transTitle = "false",
|
transTitle = "false",
|
||||||
transSelected = "true",
|
transSelected = "true",
|
||||||
detectRemote = "false",
|
detectRemote = "true",
|
||||||
skipLangs = [],
|
skipLangs = [],
|
||||||
// fixerSelector = "",
|
// fixerSelector = "",
|
||||||
// fixerFunc = "-",
|
// fixerFunc = "-",
|
||||||
|
|||||||
Reference in New Issue
Block a user