diff --git a/src/apis/index.js b/src/apis/index.js index 9f70a3b..b860091 100644 --- a/src/apis/index.js +++ b/src/apis/index.js @@ -13,10 +13,15 @@ import { MSG_BUILTINAI_DETECT, MSG_BUILTINAI_TRANSLATE, OPT_TRANS_BUILTINAI, + URL_CACHE_SUBTITLE, } from "../config"; import { sha256, withTimeout } from "../libs/utils"; import { kissLog } from "../libs/log"; -import { handleTranslate, handleMicrosoftLangdetect } from "./trans"; +import { + handleTranslate, + handleSubtitle, + handleMicrosoftLangdetect, +} from "./trans"; import { getHttpCachePolyfill, putHttpCachePolyfill } from "../libs/cache"; import { getBatchQueue } from "../libs/batchQueue"; import { isBuiltinAIAvailable } from "../libs/browser"; @@ -495,3 +500,36 @@ export const apiTranslate = async ({ return [trText, isSame]; }; + +// 字幕处理/翻译 +export const apiSubtitle = async ({ + videoId, + fromLang = "en", + toLang, + events = [], + apiSetting, +}) => { + const cacheOpts = { + videoId, + fromLang, + toLang, + }; + const cacheInput = `${URL_CACHE_SUBTITLE}?${queryString.stringify(cacheOpts)}`; + const cache = await getHttpCachePolyfill(cacheInput); + if (cache) { + return cache; + } + + const subtitles = await handleSubtitle({ + events, + from: fromLang, + to: toLang, + apiSetting, + }); + if (subtitles?.length) { + putHttpCachePolyfill(cacheInput, null, subtitles); + return subtitles; + } + + return []; +}; diff --git a/src/apis/trans.js b/src/apis/trans.js index 5887313..d80d6aa 100644 --- a/src/apis/trans.js +++ b/src/apis/trans.js @@ -90,25 +90,44 @@ const genUserPrompt = ({ }; const parseAIRes = (raw) => { - let data; + if (!raw) { + return []; + } try { const jsonString = extractJson(raw); - data = JSON.parse(jsonString); + const data = JSON.parse(jsonString); + + if (Array.isArray(data.translations)) { + // todo: 考虑序号id可能会打乱 + return data.translations.map((item) => [ + item?.text ?? "", + item?.sourceLanguage ?? "", + ]); + } } catch (err) { kissLog("parseAIRes", err); + } + + return []; +}; + +const parseSTRes = (raw) => { + if (!raw) { return []; } - if (!Array.isArray(data.translations)) { - return []; + try { + const jsonString = extractJson(raw); + const data = JSON.parse(jsonString); + if (Array.isArray(data)) { + return data; + } + } catch (err) { + kissLog("parseAIRes: subtitle", err); } - // todo: 考虑序号id可能会打乱 - return data.translations.map((item) => [ - item?.text ?? "", - item?.sourceLanguage ?? "", - ]); + return []; }; const genGoogle = ({ texts, from, to, url, key }) => { @@ -258,7 +277,7 @@ const genOpenAI = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { const userMsg = { role: "user", @@ -295,7 +314,7 @@ const genGemini = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { url = url .replaceAll(INPUT_PLACE_MODEL, model) @@ -359,7 +378,7 @@ const genGemini2 = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { const userMsg = { role: "user", @@ -395,7 +414,7 @@ const genClaude = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { const userMsg = { role: "user", @@ -427,7 +446,7 @@ const genOpenRouter = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { const userMsg = { role: "user", @@ -464,7 +483,7 @@ const genOllama = ({ model, temperature, maxTokens, - hisMsgs, + hisMsgs = [], }) => { const userMsg = { role: "user", @@ -579,7 +598,7 @@ const genInit = ({ * @param {*} * @returns */ -export const genTransReq = async ({ reqHook, resHook, ...args }) => { +export const genTransReq = async ({ reqHook, ...args }) => { const { apiType, apiSlug, @@ -593,6 +612,7 @@ export const genTransReq = async ({ reqHook, resHook, ...args }) => { glossary, customHeader, customBody, + events, } = args; if (API_SPE_TYPES.mulkeys.has(apiType)) { @@ -605,14 +625,16 @@ export const genTransReq = async ({ reqHook, resHook, ...args }) => { if (API_SPE_TYPES.ai.has(apiType)) { args.systemPrompt = genSystemPrompt({ systemPrompt, from, to }); - args.userPrompt = genUserPrompt({ - userPrompt, - from, - to, - texts, - docInfo, - glossary, - }); + args.userPrompt = !!events + ? JSON.stringify(events) + : genUserPrompt({ + userPrompt, + from, + to, + texts, + docInfo, + glossary, + }); } const { @@ -632,7 +654,7 @@ export const genTransReq = async ({ reqHook, resHook, ...args }) => { } // 执行 request hook - if (reqHook?.trim()) { + if (reqHook?.trim() && !events) { try { interpreter.run(`exports.reqHook = ${reqHook}`); const hookResult = await interpreter.exports.reqHook(args, { @@ -864,7 +886,8 @@ export const handleTranslate = async ( httpTimeout, }); if (!res) { - throw new Error("tranlate got empty response"); + kissLog("tranlate got empty response"); + return []; } return parseTransRes(res, { @@ -908,3 +931,54 @@ export const handleMicrosoftLangdetect = async (texts = []) => { return []; }; + +/** + * 字幕翻译 + * @param {*} param0 + * @returns + */ +export const handleSubtitle = async ({ + events, + from = "en", + to, + apiSetting, +}) => { + const { apiType, fetchInterval, fetchLimit, httpTimeout } = apiSetting; + + const [input, init] = await genTransReq({ + ...apiSetting, + events, + from, + to, + systemPrompt: apiSetting.subtitlePrompt, + }); + + const res = await fetchData(input, init, { + useCache: false, + usePool: true, + fetchInterval, + fetchLimit, + httpTimeout, + }); + if (!res) { + kissLog("subtitle got empty response"); + return []; + } + + switch (apiType) { + case OPT_TRANS_OPENAI: + case OPT_TRANS_GEMINI_2: + case OPT_TRANS_OPENROUTER: + case OPT_TRANS_OLLAMA: + return parseSTRes(res?.choices?.[0]?.message?.content ?? ""); + case OPT_TRANS_GEMINI: + return parseSTRes(res?.candidates?.[0]?.content?.parts?.[0]?.text ?? ""); + case OPT_TRANS_CLAUDE: + return parseSTRes(res?.content?.[0]?.text ?? ""); + case OPT_TRANS_CUSTOMIZE: + return res; + default: + } + + return []; +}; diff --git a/src/config/api.js b/src/config/api.js index 992fc7b..a388d7f 100644 --- a/src/config/api.js +++ b/src/config/api.js @@ -354,6 +354,24 @@ Output: {"translations":[{"id":1,"text":"一个React组件","sourceLangua Fail-safe: On any error, return {"translations":[]}.`; +const defaultSubtitlePrompt = `Goal: Convert raw subtitle event JSON into a clean, sentence-based JSON array. + +Output (valid JSON array, output ONLY this array): +[{ + "text": "string", // Full sentence with correct punctuation + "translation": "string", // Translation in ${INPUT_PLACE_TO} + "start": int, // Start time (ms) + "end": int, // End time (ms) + "duration": int // end - start +}] + +Guidelines: +1. **Segmentation**: Merge sequential 'utf8' strings from 'segs' into full sentences, merging groups logically. +2. **Punctuation**: Ensure proper sentence-final punctuation (., ?, !); add if missing. +3. **Translation**: Translate 'text' into ${INPUT_PLACE_TO}, place result in 'translation'. +4. **Special Cases**: '[Music]' (and similar cues) are standalone entries. Translate appropriately (e.g., '[音乐]', '[Musique]'). +`; + const defaultRequestHook = `async (args, { url, body, headers, userMsg, method } = {}) => { console.log("request hook args:", args); // return { url, body, headers, userMsg, method }; @@ -375,6 +393,7 @@ const defaultApi = { key: "", model: "", // 模型名称 systemPrompt: defaultSystemPrompt, + subtitlePrompt: defaultSubtitlePrompt, userPrompt: "", tone: BUILTIN_STONES[0], // 翻译风格 placeholder: BUILTIN_PLACEHOLDERS[0], // 占位符 diff --git a/src/config/i18n.js b/src/config/i18n.js index 9490a30..328c67c 100644 --- a/src/config/i18n.js +++ b/src/config/i18n.js @@ -1484,9 +1484,9 @@ export const I18N = { zh_TW: `佔位標名`, }, system_prompt_helper: { - zh: `在未完全理解默认Prompt的情况下,请勿随意修改,否则可能翻译失败。`, - en: `If you do not fully understand the default prompt, please do not modify it at will, otherwise the translation may fail.`, - zh_TW: `在未完全理解預設Prompt的情況下,請勿隨意修改,否則可能翻譯失敗。`, + zh: `在未完全理解默认Prompt的情况下,请勿随意修改,否则可能无法工作。`, + en: `Do not modify the default prompt without fully understanding it, otherwise it may not work.`, + zh_TW: `在未完全理解預設Prompt的情況下,請勿隨意修改,否則可能無法運作。`, }, if_pre_init: { zh: `是否预初始化`, @@ -1568,4 +1568,19 @@ export const I18N = { en: `Translation style`, zh_TW: `譯文樣式`, }, + ai_segmentation: { + zh: `AI智能断句`, + en: `AI intelligent punctuation`, + zh_TW: `AI智慧斷句`, + }, + subtitle_helper_1: { + zh: `1、目前仅支持Youtube英文字幕双语翻译,且仅支持浏览器扩展。`, + en: `1. Currently only supports bilingual translation of Youtube English subtitles, and only supports browser extensions.`, + zh_TW: `1.目前僅支援Youtube英文字幕雙語翻譯,且僅支援瀏覽器擴充功能。`, + }, + subtitle_helper_2: { + zh: `2、插件内置基础断句逻辑,如不理想,可以启用AI智能断句,但需考虑视频长度与AI接口的处理能力,可能等待的时间会很长,甚至失败。`, + en: `2. The plug-in has built-in basic segmentation logic. If it is not ideal, you can enable AI intelligent segmentation. However, you need to consider the video length and the processing power of the AI ​​interface. The waiting time may be very long or even fail.`, + zh_TW: `2.插件內建基礎斷句邏輯,如不理想,可以啟用AI智能斷句,但需考慮視訊長度與AI介面的處理能力,可能等待的時間會很長,甚至失敗。`, + }, }; diff --git a/src/config/setting.js b/src/config/setting.js index 477700b..434556f 100644 --- a/src/config/setting.js +++ b/src/config/setting.js @@ -111,6 +111,7 @@ const SUBTITLE_TRANSLATION_STYLE = `font-size: clamp(1.5rem, 3cqw, 3rem);`; export const DEFAULT_SUBTITLE_SETTING = { enabled: true, // 是否开启 apiSlug: OPT_TRANS_MICROSOFT, + segSlug: "-", // AI智能断句 // fromLang: "en", toLang: "zh-CN", isBilingual: true, // 是否双语显示 diff --git a/src/config/url.js b/src/config/url.js index 83b8bac..b57ffee 100644 --- a/src/config/url.js +++ b/src/config/url.js @@ -1,6 +1,7 @@ import { APP_LCNAME } from "./app"; export const URL_CACHE_TRAN = `https://${APP_LCNAME}/translate`; +export const URL_CACHE_SUBTITLE = `https://${APP_LCNAME}/subtitle`; export const URL_CACHE_DELANG = `https://${APP_LCNAME}/detectlang`; export const URL_CACHE_BINGDICT = `https://${APP_LCNAME}/bingdict`; diff --git a/src/hooks/Api.js b/src/hooks/Api.js index c4d6008..fffebfc 100644 --- a/src/hooks/Api.js +++ b/src/hooks/Api.js @@ -43,6 +43,11 @@ export function useApiList() { [transApis] ); + const aiEnabledApis = useMemo( + () => enabledApis.filter((api) => API_SPE_TYPES.ai.has(api.apiSlug)), + [enabledApis] + ); + const addApi = useCallback( (apiType) => { const defaultApiOpt = @@ -76,7 +81,15 @@ export function useApiList() { [updateSetting] ); - return { transApis, userApis, builtinApis, enabledApis, addApi, deleteApi }; + return { + transApis, + userApis, + builtinApis, + enabledApis, + aiEnabledApis, + addApi, + deleteApi, + }; } export function useApiItem(apiSlug) { diff --git a/src/libs/utils.js b/src/libs/utils.js index f436e15..8cfba7b 100644 --- a/src/libs/utils.js +++ b/src/libs/utils.js @@ -298,11 +298,9 @@ export const parseJsonObj = (str) => { * @returns */ export const extractJson = (raw) => { - if (!raw) return "{}"; - - let s = raw.replace(/^\s*```(?:json)?\s*/i, "").replace(/\s*```\s*$/i, ""); - const match = s.match(/\{[\s\S]*\}/); - return match ? match[0] : "{}"; + const jsonRegex = /({.*}|\[.*\])/s; + const match = raw.match(jsonRegex); + return match ? match[0] : null; }; /** diff --git a/src/subtitle/YouTubeCaptionProvider.js b/src/subtitle/YouTubeCaptionProvider.js index f45ec91..88b5502 100644 --- a/src/subtitle/YouTubeCaptionProvider.js +++ b/src/subtitle/YouTubeCaptionProvider.js @@ -1,10 +1,11 @@ import { logger } from "../libs/log.js"; -import { apiTranslate } from "../apis/index.js"; +import { apiSubtitle, apiTranslate } from "../apis/index.js"; import { BilingualSubtitleManager } from "./BilingualSubtitleManager.js"; import { MSG_XHR_DATA_YOUTUBE, APP_NAME } from "../config"; import { truncateWords, sleep } from "../libs/utils.js"; import { createLogoSvg } from "../libs/svg.js"; import { randomBetween } from "../libs/utils.js"; +import { fetchData } from "../libs/fetch.js"; const VIDEO_SELECT = "#container video"; const CONTORLS_SELECT = ".ytp-right-controls"; @@ -114,7 +115,12 @@ class YouTubeCaptionProvider { this.#ytControls.before(kissControls); } + // todo: 优化逻辑 #findCaptionTrack(captionTracks) { + if (!captionTracks.length) { + return null; + } + let captionTrack = captionTracks.find((item) => item.vssId?.startsWith(".en") ); @@ -123,6 +129,10 @@ class YouTubeCaptionProvider { item.vssId?.startsWith("a.en") ); } + + captionTrack = captionTracks[0]; + captionTrack.baseUrl += "&tlang=en"; + return captionTrack; } @@ -144,7 +154,8 @@ class YouTubeCaptionProvider { async #getSubtitleEvents(captionTrack, potUrl, responseText) { if (potUrl.searchParams.get("lang") === captionTrack.languageCode) { try { - return JSON.parse(responseText); + const json = JSON.parse(responseText); + return json; } catch (err) { logger.error("Youtube Provider: parse responseText", err); return null; @@ -161,7 +172,7 @@ class YouTubeCaptionProvider { potUrl.searchParams.delete("kind"); } - const res = await fetch(potUrl); + const res = await fetchData(potUrl, null, { useCache: true }); if (res.ok) { const json = await res.json(); return json; @@ -181,6 +192,24 @@ class YouTubeCaptionProvider { return docUrl.searchParams.get("v"); } + async #aiSegment({ videoId, toLang, events, segApiSetting }) { + try { + const subtitles = await apiSubtitle({ + videoId, + toLang, + events, + apiSetting: segApiSetting, + }); + if (Array.isArray(subtitles)) { + return subtitles; + } + } catch (err) { + logger.info("Youtube Provider: ai segmentation", err); + } + + return []; + } + async #handleInterceptedRequest(url, responseText) { try { if (!responseText) { @@ -216,12 +245,27 @@ class YouTubeCaptionProvider { potUrl, responseText ); - if (!subtitleEvents) { + const events = subtitleEvents?.events; + if (!Array.isArray(events)) { logger.info("Youtube Provider: SubtitleEvents not got."); return; } - const subtitles = this.#formatSubtitles(subtitleEvents); + let subtitles = []; + + const { segApiSetting, toLang } = this.#setting; + if (captionTrack.kind === "asr" && segApiSetting) { + // todo: 提示用户等待中 + subtitles = await this.#aiSegment({ + videoId, + events, + toLang, + segApiSetting, + }); + } + if (subtitles.length === 0) { + subtitles = this.#formatSubtitles(events); + } if (subtitles.length === 0) { logger.info("Youtube Provider: No subtitles after format."); return; @@ -300,8 +344,7 @@ class YouTubeCaptionProvider { } } - #formatSubtitles(data) { - const events = data?.events; + #formatSubtitles(events) { if (!Array.isArray(events)) return []; const lines = []; @@ -362,7 +405,7 @@ class YouTubeCaptionProvider { const isPoor = this.#isQualityPoor(lines); if (isPoor) { - return this.#processSubtitles(data); + return this.#processSubtitles(events); } return lines.map((item) => ({ @@ -380,7 +423,7 @@ class YouTubeCaptionProvider { return longLinesCount / lines.length > percentageThreshold; } - #processSubtitles(data, { timeout = 1500, maxWords = 15 } = {}) { + #processSubtitles(events, { timeout = 1500, maxWords = 15 } = {}) { const groupedPauseWords = { 1: new Set([ "actually", @@ -483,7 +526,7 @@ class YouTubeCaptionProvider { bufferWordCount = 0; }; - data.events?.forEach((event) => { + events?.forEach((event) => { event.segs?.forEach((seg, j) => { const text = seg.utf8?.trim() || ""; if (!text) return; diff --git a/src/subtitle/subtitle.js b/src/subtitle/subtitle.js index 192ae7c..96d8a10 100644 --- a/src/subtitle/subtitle.js +++ b/src/subtitle/subtitle.js @@ -10,7 +10,7 @@ const providers = [ { pattern: "https://www.youtube.com/watch", start: YouTubeInitializer }, ]; -export function runSubtitle({ href, setting, rule }) { +export function runSubtitle({ href, setting }) { try { const subtitleSetting = setting.subtitleSetting || DEFAULT_SUBTITLE_SETTING; if (!subtitleSetting.enabled) { @@ -24,11 +24,16 @@ export function runSubtitle({ href, setting, rule }) { injectExternalJs(src, id); const apiSetting = - setting.transApis.find((api) => api.apiSlug === rule.apiSlug) || - DEFAULT_API_SETTING; + setting.transApis.find( + (api) => api.apiSlug === subtitleSetting.apiSlug + ) || DEFAULT_API_SETTING; + const segApiSetting = setting.transApis.find( + (api) => api.apiSlug === subtitleSetting.segSlug + ); provider.start({ ...subtitleSetting, apiSetting, + segApiSetting, }); } } catch (err) { diff --git a/src/views/Options/Apis.js b/src/views/Options/Apis.js index 6648c17..06d00e5 100644 --- a/src/views/Options/Apis.js +++ b/src/views/Options/Apis.js @@ -210,6 +210,7 @@ function ApiFields({ apiSlug, isUserApi, deleteApi }) { model = "", apiType, systemPrompt = "", + subtitlePrompt = "", // userPrompt = "", customHeader = "", customBody = "", @@ -344,6 +345,16 @@ function ApiFields({ apiSlug, isUserApi, deleteApi }) { maxRows={10} helperText={i18n("system_prompt_helper")} /> + {/* { e.preventDefault(); @@ -26,6 +27,7 @@ export default function SubtitleSetting() { const { enabled, apiSlug, + segSlug, toLang, isBilingual, windowStyle, @@ -36,6 +38,12 @@ export default function SubtitleSetting() { return ( + + {i18n("subtitle_helper_1")} +
+ {i18n("subtitle_helper_2")} +
+ + + + {i18n("disable")} + {aiEnabledApis.map((api) => ( + + {api.apiName} + + ))} + +