fix: subtitle
This commit is contained in:
@@ -504,7 +504,7 @@ export const apiTranslate = async ({
|
|||||||
// 字幕处理/翻译
|
// 字幕处理/翻译
|
||||||
export const apiSubtitle = async ({
|
export const apiSubtitle = async ({
|
||||||
videoId,
|
videoId,
|
||||||
fromLang = "en",
|
fromLang = "auto",
|
||||||
toLang,
|
toLang,
|
||||||
events = [],
|
events = [],
|
||||||
apiSetting,
|
apiSetting,
|
||||||
|
|||||||
@@ -937,12 +937,7 @@ export const handleMicrosoftLangdetect = async (texts = []) => {
|
|||||||
* @param {*} param0
|
* @param {*} param0
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const handleSubtitle = async ({
|
export const handleSubtitle = async ({ events, from, to, apiSetting }) => {
|
||||||
events,
|
|
||||||
from = "en",
|
|
||||||
to,
|
|
||||||
apiSetting,
|
|
||||||
}) => {
|
|
||||||
const { apiType, fetchInterval, fetchLimit, httpTimeout } = apiSetting;
|
const { apiType, fetchInterval, fetchLimit, httpTimeout } = apiSetting;
|
||||||
|
|
||||||
const [input, init] = await genTransReq({
|
const [input, init] = await genTransReq({
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ const RULES_MAP = {
|
|||||||
},
|
},
|
||||||
"www.youtube.com": {
|
"www.youtube.com": {
|
||||||
rootsSelector: `ytd-page-manager`,
|
rootsSelector: `ytd-page-manager`,
|
||||||
ignoreSelector: `aside, button, footer, form, header, pre, mark, nav, #ytp-caption-window-container`,
|
ignoreSelector: `aside, button, footer, form, header, pre, mark, nav, #player`,
|
||||||
transEndHook: `({ parentNode }) => {parentNode.parentElement.style.cssText += "-webkit-line-clamp: unset; max-height: none; height: auto;";}`,
|
transEndHook: `({ parentNode }) => {parentNode.parentElement.style.cssText += "-webkit-line-clamp: unset; max-height: none; height: auto;";}`,
|
||||||
textStyle: OPT_STYLE_DASHBOX,
|
textStyle: OPT_STYLE_DASHBOX,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ class ChromeTranslator {
|
|||||||
error.message &&
|
error.message &&
|
||||||
error.message.includes("Other generic failures occurred")
|
error.message.includes("Other generic failures occurred")
|
||||||
) {
|
) {
|
||||||
logger.error("Generic failure detected, resetting translator cache.");
|
logger.info("Generic failure detected, resetting translator cache.");
|
||||||
this.#translatorMap.clear();
|
this.#translatorMap.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -355,7 +355,8 @@ export const withTimeout = (task, timeout, timeoutMsg = "Task timed out") => {
|
|||||||
* @param {*} maxLength
|
* @param {*} maxLength
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const truncateWords = (str, maxLength) => {
|
export const truncateWords = (str, maxLength = 200) => {
|
||||||
|
if (typeof str !== "string") return "";
|
||||||
if (str.length <= maxLength) return str;
|
if (str.length <= maxLength) return str;
|
||||||
const truncated = str.slice(0, maxLength);
|
const truncated = str.slice(0, maxLength);
|
||||||
return truncated.slice(0, truncated.lastIndexOf(" ")) + " …";
|
return truncated.slice(0, truncated.lastIndexOf(" ")) + " …";
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { logger } from "../libs/log.js";
|
import { logger } from "../libs/log.js";
|
||||||
|
import { truncateWords } from "../libs/utils.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @class BilingualSubtitleManager
|
* @class BilingualSubtitleManager
|
||||||
@@ -163,11 +164,11 @@ export class BilingualSubtitleManager {
|
|||||||
if (subtitle) {
|
if (subtitle) {
|
||||||
const p1 = document.createElement("p");
|
const p1 = document.createElement("p");
|
||||||
p1.style.cssText = this.#setting.originStyle;
|
p1.style.cssText = this.#setting.originStyle;
|
||||||
p1.textContent = subtitle.text;
|
p1.textContent = truncateWords(subtitle.text);
|
||||||
|
|
||||||
const p2 = document.createElement("p");
|
const p2 = document.createElement("p");
|
||||||
p2.style.cssText = this.#setting.originStyle;
|
p2.style.cssText = this.#setting.originStyle;
|
||||||
p2.textContent = subtitle.translation || "...";
|
p2.textContent = truncateWords(subtitle.translation) || "...";
|
||||||
|
|
||||||
if (this.#setting.isBilingual) {
|
if (this.#setting.isBilingual) {
|
||||||
this.#captionWindowEl.replaceChildren(p1, p2);
|
this.#captionWindowEl.replaceChildren(p1, p2);
|
||||||
@@ -207,16 +208,16 @@ export class BilingualSubtitleManager {
|
|||||||
async #translateAndStore(subtitle) {
|
async #translateAndStore(subtitle) {
|
||||||
subtitle.isTranslating = true;
|
subtitle.isTranslating = true;
|
||||||
try {
|
try {
|
||||||
const { toLang, apiSetting } = this.#setting;
|
const { fromLang, toLang, apiSetting } = this.#setting;
|
||||||
const [translatedText] = await this.#translationService({
|
const [translatedText] = await this.#translationService({
|
||||||
text: subtitle.text,
|
text: subtitle.text,
|
||||||
fromLang: "en",
|
fromLang,
|
||||||
toLang,
|
toLang,
|
||||||
apiSetting,
|
apiSetting,
|
||||||
});
|
});
|
||||||
subtitle.translation = translatedText;
|
subtitle.translation = translatedText;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error("Translation failed for:", subtitle.text, error);
|
logger.info("Translation failed for:", subtitle.text, error);
|
||||||
subtitle.translation = "[Translation failed]";
|
subtitle.translation = "[Translation failed]";
|
||||||
} finally {
|
} finally {
|
||||||
subtitle.isTranslating = false;
|
subtitle.isTranslating = false;
|
||||||
|
|||||||
@@ -1,11 +1,15 @@
|
|||||||
import { logger } from "../libs/log.js";
|
import { logger } from "../libs/log.js";
|
||||||
import { apiSubtitle, apiTranslate } from "../apis/index.js";
|
import { apiSubtitle, apiTranslate } from "../apis/index.js";
|
||||||
import { BilingualSubtitleManager } from "./BilingualSubtitleManager.js";
|
import { BilingualSubtitleManager } from "./BilingualSubtitleManager.js";
|
||||||
import { MSG_XHR_DATA_YOUTUBE, APP_NAME } from "../config";
|
import {
|
||||||
import { truncateWords, sleep } from "../libs/utils.js";
|
MSG_XHR_DATA_YOUTUBE,
|
||||||
|
APP_NAME,
|
||||||
|
OPT_LANGS_TO_CODE,
|
||||||
|
OPT_TRANS_MICROSOFT,
|
||||||
|
} from "../config";
|
||||||
|
import { sleep } from "../libs/utils.js";
|
||||||
import { createLogoSvg } from "../libs/svg.js";
|
import { createLogoSvg } from "../libs/svg.js";
|
||||||
import { randomBetween } from "../libs/utils.js";
|
import { randomBetween } from "../libs/utils.js";
|
||||||
import { fetchData } from "../libs/fetch.js";
|
|
||||||
|
|
||||||
const VIDEO_SELECT = "#container video";
|
const VIDEO_SELECT = "#container video";
|
||||||
const CONTORLS_SELECT = ".ytp-right-controls";
|
const CONTORLS_SELECT = ".ytp-right-controls";
|
||||||
@@ -19,6 +23,8 @@ class YouTubeCaptionProvider {
|
|||||||
#toggleButton = null;
|
#toggleButton = null;
|
||||||
#enabled = false;
|
#enabled = false;
|
||||||
#ytControls = null;
|
#ytControls = null;
|
||||||
|
#isBusy = false;
|
||||||
|
#fromLang = "auto";
|
||||||
|
|
||||||
constructor(setting = {}) {
|
constructor(setting = {}) {
|
||||||
this.#setting = setting;
|
this.#setting = setting;
|
||||||
@@ -29,7 +35,9 @@ class YouTubeCaptionProvider {
|
|||||||
if (event.source !== window) return;
|
if (event.source !== window) return;
|
||||||
if (event.data?.type === MSG_XHR_DATA_YOUTUBE) {
|
if (event.data?.type === MSG_XHR_DATA_YOUTUBE) {
|
||||||
const { url, response } = event.data;
|
const { url, response } = event.data;
|
||||||
this.#handleInterceptedRequest(url, response);
|
if (url && response) {
|
||||||
|
this.#handleInterceptedRequest(url, response);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
document.body.addEventListener("yt-navigate-finish", () => {
|
document.body.addEventListener("yt-navigate-finish", () => {
|
||||||
@@ -115,23 +123,33 @@ class YouTubeCaptionProvider {
|
|||||||
this.#ytControls.before(kissControls);
|
this.#ytControls.before(kissControls);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#isSameLang(lang1, lang2) {
|
||||||
|
return lang1.slice(0, 2) === lang2.slice(0, 2);
|
||||||
|
}
|
||||||
|
|
||||||
// todo: 优化逻辑
|
// todo: 优化逻辑
|
||||||
#findCaptionTrack(captionTracks) {
|
#findCaptionTrack(captionTracks) {
|
||||||
if (!captionTracks.length) {
|
if (!captionTracks?.length) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
let captionTrack = captionTracks.find((item) =>
|
let captionTrack = null;
|
||||||
item.vssId?.startsWith(".en")
|
|
||||||
);
|
const asrTrack = captionTracks.find((item) => item.kind === "asr");
|
||||||
if (!captionTrack) {
|
if (asrTrack) {
|
||||||
captionTrack = captionTracks.find((item) =>
|
captionTrack = captionTracks.find(
|
||||||
item.vssId?.startsWith("a.en")
|
(item) =>
|
||||||
|
item.kind !== "asr" &&
|
||||||
|
this.#isSameLang(item.languageCode, asrTrack.languageCode)
|
||||||
);
|
);
|
||||||
|
if (!captionTrack) {
|
||||||
|
captionTrack = asrTrack;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
captionTrack = captionTracks[0];
|
if (!captionTrack) {
|
||||||
captionTrack.baseUrl += "&tlang=en";
|
captionTrack = captionTracks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
return captionTrack;
|
return captionTrack;
|
||||||
}
|
}
|
||||||
@@ -143,46 +161,49 @@ class YouTubeCaptionProvider {
|
|||||||
const match = html.match(/ytInitialPlayerResponse\s*=\s*(\{.*?\});/s);
|
const match = html.match(/ytInitialPlayerResponse\s*=\s*(\{.*?\});/s);
|
||||||
if (!match) return [];
|
if (!match) return [];
|
||||||
const data = JSON.parse(match[1]);
|
const data = JSON.parse(match[1]);
|
||||||
return (
|
return data.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
||||||
data.captions?.playerCaptionsTracklistRenderer?.captionTracks || []
|
|
||||||
);
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.info("Youtube Provider: get captionTracks", err);
|
logger.info("Youtube Provider: get captionTracks", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async #getSubtitleEvents(captionTrack, potUrl, responseText) {
|
async #getSubtitleEvents(capUrl, potUrl, responseText) {
|
||||||
if (potUrl.searchParams.get("lang") === captionTrack.languageCode) {
|
if (
|
||||||
|
!potUrl.searchParams.get("tlang") &&
|
||||||
|
potUrl.searchParams.get("kind") === capUrl.searchParams.get("kind") &&
|
||||||
|
this.#isSameLang(
|
||||||
|
potUrl.searchParams.get("lang"),
|
||||||
|
capUrl.searchParams.get("lang")
|
||||||
|
)
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
const json = JSON.parse(responseText);
|
const json = JSON.parse(responseText);
|
||||||
return json;
|
return json?.events;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error("Youtube Provider: parse responseText", err);
|
logger.info("Youtube Provider: parse responseText", err);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const baseUrl = new URL(captionTrack.baseUrl);
|
potUrl.searchParams.delete("tlang");
|
||||||
potUrl.searchParams.set("lang", baseUrl.searchParams.get("lang"));
|
potUrl.searchParams.set("lang", capUrl.searchParams.get("lang"));
|
||||||
potUrl.searchParams.set("fmt", "json3");
|
potUrl.searchParams.set("fmt", "json3");
|
||||||
if (baseUrl.searchParams.get("kind")) {
|
if (capUrl.searchParams.get("kind")) {
|
||||||
potUrl.searchParams.set("kind", baseUrl.searchParams.get("kind"));
|
potUrl.searchParams.set("kind", capUrl.searchParams.get("kind"));
|
||||||
} else {
|
} else {
|
||||||
potUrl.searchParams.delete("kind");
|
potUrl.searchParams.delete("kind");
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await fetchData(potUrl, null, { useCache: true });
|
const res = await fetch(potUrl.href);
|
||||||
if (res.ok) {
|
if (res?.ok) {
|
||||||
const json = await res.json();
|
const json = await res.json();
|
||||||
return json;
|
return json?.events;
|
||||||
}
|
}
|
||||||
logger.error(
|
logger.info(`Youtube Provider: Failed to fetch subtitles: ${res.status}`);
|
||||||
`Youtube Provider: Failed to fetch subtitles: ${res.status}`
|
|
||||||
);
|
|
||||||
return null;
|
return null;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error("Youtube Provider: fetching subtitles error", error);
|
logger.info("Youtube Provider: fetching subtitles error", error);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -211,19 +232,21 @@ class YouTubeCaptionProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async #handleInterceptedRequest(url, responseText) {
|
async #handleInterceptedRequest(url, responseText) {
|
||||||
try {
|
if (this.#isBusy) {
|
||||||
if (!responseText) {
|
logger.info("Youtube Provider is busy...");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
this.#isBusy = true; // todo: 提示用户等待中
|
||||||
|
|
||||||
|
try {
|
||||||
const videoId = this.#getVideoId();
|
const videoId = this.#getVideoId();
|
||||||
if (!videoId) {
|
if (!videoId) {
|
||||||
logger.info("Youtube Provider: can't get doc videoId");
|
logger.info("Youtube Provider: videoId not found.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (videoId === this.#videoId) {
|
if (videoId === this.#videoId) {
|
||||||
logger.info("Youtube Provider: skip fetched timedtext.");
|
logger.info("Youtube Provider: videoId already processed.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -240,13 +263,13 @@ class YouTubeCaptionProvider {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const subtitleEvents = await this.#getSubtitleEvents(
|
const capUrl = new URL(captionTrack.baseUrl);
|
||||||
captionTrack,
|
const events = await this.#getSubtitleEvents(
|
||||||
|
capUrl,
|
||||||
potUrl,
|
potUrl,
|
||||||
responseText
|
responseText
|
||||||
);
|
);
|
||||||
const events = subtitleEvents?.events;
|
if (!events?.length) {
|
||||||
if (!Array.isArray(events)) {
|
|
||||||
logger.info("Youtube Provider: SubtitleEvents not got.");
|
logger.info("Youtube Provider: SubtitleEvents not got.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -254,32 +277,38 @@ class YouTubeCaptionProvider {
|
|||||||
let subtitles = [];
|
let subtitles = [];
|
||||||
|
|
||||||
const { segApiSetting, toLang } = this.#setting;
|
const { segApiSetting, toLang } = this.#setting;
|
||||||
if (captionTrack.kind === "asr" && segApiSetting) {
|
const lang = potUrl.searchParams.get("lang");
|
||||||
// todo: 提示用户等待中
|
const fromLang = OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang) || lang;
|
||||||
|
if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) {
|
||||||
subtitles = await this.#aiSegment({
|
subtitles = await this.#aiSegment({
|
||||||
videoId,
|
videoId,
|
||||||
events,
|
events,
|
||||||
|
fromLang,
|
||||||
toLang,
|
toLang,
|
||||||
segApiSetting,
|
segApiSetting,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (subtitles.length === 0) {
|
|
||||||
subtitles = this.#formatSubtitles(events);
|
if (!subtitles?.length) {
|
||||||
|
subtitles = this.#formatSubtitles(events, fromLang);
|
||||||
}
|
}
|
||||||
if (subtitles.length === 0) {
|
if (!subtitles?.length) {
|
||||||
logger.info("Youtube Provider: No subtitles after format.");
|
logger.info("Youtube Provider: No subtitles after format.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.#onCaptionsReady(videoId, subtitles);
|
this.#onCaptionsReady({ videoId, subtitles, fromLang });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn("Youtube Provider: unknow error", error);
|
logger.warn("Youtube Provider: unknow error", error);
|
||||||
|
} finally {
|
||||||
|
this.#isBusy = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#onCaptionsReady(videoId, subtitles) {
|
#onCaptionsReady({ videoId, subtitles, fromLang }) {
|
||||||
this.#subtitles = subtitles;
|
this.#subtitles = subtitles;
|
||||||
this.#videoId = videoId;
|
this.#videoId = videoId;
|
||||||
|
this.#fromLang = fromLang;
|
||||||
|
|
||||||
if (this.#toggleButton) {
|
if (this.#toggleButton) {
|
||||||
this.#toggleButton.style.opacity = subtitles.length ? "1" : "0.5";
|
this.#toggleButton.style.opacity = subtitles.length ? "1" : "0.5";
|
||||||
@@ -318,7 +347,7 @@ class YouTubeCaptionProvider {
|
|||||||
videoEl,
|
videoEl,
|
||||||
formattedSubtitles: this.#subtitles,
|
formattedSubtitles: this.#subtitles,
|
||||||
translationService: apiTranslate,
|
translationService: apiTranslate,
|
||||||
setting: this.#setting,
|
setting: { ...this.#setting, fromLang: this.#fromLang },
|
||||||
});
|
});
|
||||||
this.#managerInstance.start();
|
this.#managerInstance.start();
|
||||||
|
|
||||||
@@ -344,74 +373,41 @@ class YouTubeCaptionProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#formatSubtitles(events) {
|
#formatSubtitles(events, lang) {
|
||||||
if (!Array.isArray(events)) return [];
|
if (!events?.length) return [];
|
||||||
|
|
||||||
const lines = [];
|
const noSpaceLanguages = [
|
||||||
let currentLine = null;
|
"zh", // 中文
|
||||||
|
"ja", // 日文
|
||||||
|
"ko", // 韩文(现代用空格,但结构上仍可连写)
|
||||||
|
"th", // 泰文
|
||||||
|
"lo", // 老挝文
|
||||||
|
"km", // 高棉文
|
||||||
|
"my", // 缅文
|
||||||
|
];
|
||||||
|
|
||||||
events.forEach((event) => {
|
if (noSpaceLanguages.some((l) => lang?.startsWith(l))) {
|
||||||
(event.segs ?? []).forEach((seg, segIndex) => {
|
return events
|
||||||
const text = seg.utf8 ?? "";
|
.map(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => ({
|
||||||
const trimmedText = text.trim();
|
text: segs
|
||||||
const segmentStartTime = event.tStartMs + (seg.tOffsetMs ?? 0);
|
.map(({ utf8 = "" }) => utf8)
|
||||||
|
.join("")
|
||||||
if (currentLine) {
|
?.trim(),
|
||||||
if (currentLine.text.endsWith(",") && !text.startsWith(" ")) {
|
start: tStartMs,
|
||||||
currentLine.text += " ";
|
end: tStartMs + dDurationMs,
|
||||||
}
|
}))
|
||||||
currentLine.text += text.replaceAll("\n", " ");
|
.filter((item) => item.text);
|
||||||
} else if (trimmedText) {
|
|
||||||
if (lines.length > 0) {
|
|
||||||
const prevLine = lines[lines.length - 1];
|
|
||||||
if (!prevLine.end) {
|
|
||||||
prevLine.end = segmentStartTime;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
currentLine = {
|
|
||||||
text: text.replaceAll("\n", " "),
|
|
||||||
start: segmentStartTime,
|
|
||||||
end: 0,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const isEndOfSentence = /[.?!\]]$/.test(trimmedText);
|
|
||||||
const isEnoughLong =
|
|
||||||
(currentLine?.text.length ?? 0) > 50 && /[,]\s*$/.test(trimmedText);
|
|
||||||
if (currentLine && trimmedText && (isEndOfSentence || isEnoughLong)) {
|
|
||||||
const isLastSegmentInEvent =
|
|
||||||
segIndex === (event.segs?.length ?? 0) - 1;
|
|
||||||
if (isLastSegmentInEvent && event.dDurationMs) {
|
|
||||||
currentLine.end = event.tStartMs + event.dDurationMs;
|
|
||||||
}
|
|
||||||
lines.push(currentLine);
|
|
||||||
currentLine = null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
if (lines.length > 0) {
|
|
||||||
const lastLine = lines[lines.length - 1];
|
|
||||||
if (!lastLine.end) {
|
|
||||||
const lastMeaningfulEvent = [...events]
|
|
||||||
.reverse()
|
|
||||||
.find((e) => e.dDurationMs);
|
|
||||||
if (lastMeaningfulEvent) {
|
|
||||||
lastLine.end =
|
|
||||||
lastMeaningfulEvent.tStartMs + lastMeaningfulEvent.dDurationMs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let lines = this.#processSubtitles({ events });
|
||||||
const isPoor = this.#isQualityPoor(lines);
|
const isPoor = this.#isQualityPoor(lines);
|
||||||
if (isPoor) {
|
if (isPoor) {
|
||||||
return this.#processSubtitles(events);
|
lines = this.#processSubtitles({ events, usePause: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
return lines.map((item) => ({
|
return lines.map((item) => ({
|
||||||
...item,
|
...item,
|
||||||
duration: Math.max(0, item.end - item.start),
|
duration: Math.max(0, item.end - item.start),
|
||||||
text: truncateWords(item.text.trim().replace(/\s+/g, " "), 250),
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -423,7 +419,12 @@ class YouTubeCaptionProvider {
|
|||||||
return longLinesCount / lines.length > percentageThreshold;
|
return longLinesCount / lines.length > percentageThreshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
#processSubtitles(events, { timeout = 1500, maxWords = 15 } = {}) {
|
#processSubtitles({
|
||||||
|
events,
|
||||||
|
usePause = false,
|
||||||
|
timeout = 1500,
|
||||||
|
maxWords = 15,
|
||||||
|
} = {}) {
|
||||||
const groupedPauseWords = {
|
const groupedPauseWords = {
|
||||||
1: new Set([
|
1: new Set([
|
||||||
"actually",
|
"actually",
|
||||||
@@ -526,44 +527,45 @@ class YouTubeCaptionProvider {
|
|||||||
bufferWordCount = 0;
|
bufferWordCount = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
events?.forEach((event) => {
|
events.forEach(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => {
|
||||||
event.segs?.forEach((seg, j) => {
|
segs.forEach(({ utf8 = "", tOffsetMs = 0 }, j) => {
|
||||||
const text = seg.utf8?.trim() || "";
|
const text = utf8?.trim().replace(/\s+/g, " ") || "";
|
||||||
if (!text) return;
|
if (!text) return;
|
||||||
|
|
||||||
const start = event.tStartMs + (seg.tOffsetMs ?? 0);
|
const start = tStartMs + tOffsetMs;
|
||||||
const lastSegment = currentBuffer[currentBuffer.length - 1];
|
const lastSegment = currentBuffer[currentBuffer.length - 1];
|
||||||
|
|
||||||
if (lastSegment) {
|
if (lastSegment) {
|
||||||
if (!lastSegment.end) {
|
if (!lastSegment.end || lastSegment.end > start) {
|
||||||
lastSegment.end = start;
|
lastSegment.end = start;
|
||||||
}
|
}
|
||||||
|
|
||||||
const isEndOfSentence = /[.?!\]]$/.test(lastSegment.text);
|
const isEndOfSentence = /[.?!…\])]$/.test(lastSegment.text);
|
||||||
|
const isPauseOfSentence = /[,]$/.test(lastSegment.text);
|
||||||
const isTimeout = start - lastSegment.end > timeout;
|
const isTimeout = start - lastSegment.end > timeout;
|
||||||
const isWordLimitExceeded = bufferWordCount >= maxWords;
|
const isWordLimitExceeded =
|
||||||
const startsWithPauseWord = groupedPauseWords["1"].has(
|
(usePause || isPauseOfSentence) && bufferWordCount >= maxWords;
|
||||||
text.toLowerCase().split(" ")[0]
|
|
||||||
);
|
|
||||||
|
|
||||||
// todo: 考虑连词开头
|
const startsWithSign = /^[[(♪]/.test(text);
|
||||||
const isNewClause =
|
const startsWithPauseWord =
|
||||||
(startsWithPauseWord && currentBuffer.length > 1) ||
|
usePause &&
|
||||||
text.startsWith("[");
|
groupedPauseWords["1"].has(text.toLowerCase().split(" ")[0]) && // todo: 考虑连词开头
|
||||||
|
currentBuffer.length > 1;
|
||||||
|
|
||||||
if (
|
if (
|
||||||
isEndOfSentence ||
|
isEndOfSentence ||
|
||||||
isTimeout ||
|
isTimeout ||
|
||||||
isWordLimitExceeded ||
|
isWordLimitExceeded ||
|
||||||
isNewClause
|
startsWithSign ||
|
||||||
|
startsWithPauseWord
|
||||||
) {
|
) {
|
||||||
flushBuffer();
|
flushBuffer();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const currentSegment = { text, start };
|
const currentSegment = { text, start };
|
||||||
if (j === event.segs.length - 1) {
|
if (j === segs.length - 1) {
|
||||||
currentSegment.end = event.tStartMs + event.dDurationMs;
|
currentSegment.end = tStartMs + dDurationMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentBuffer.push(currentSegment);
|
currentBuffer.push(currentSegment);
|
||||||
@@ -573,10 +575,7 @@ class YouTubeCaptionProvider {
|
|||||||
|
|
||||||
flushBuffer();
|
flushBuffer();
|
||||||
|
|
||||||
return sentences.map((item) => ({
|
return sentences;
|
||||||
...item,
|
|
||||||
duration: item.end - item.start,
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user