fix: subtitle

This commit is contained in:
Gabe
2025-10-10 21:17:51 +08:00
parent 3844d2eb75
commit 001f04a9ee
6 changed files with 133 additions and 23 deletions

View File

@@ -111,6 +111,11 @@ class YouTubeCaptionProvider {
kissControls.appendChild(toggleButton);
toggleButton.onclick = () => {
if (this.#isBusy) {
logger.info(`Youtube Provider: It's budy now...`);
return;
}
if (!this.#enabled) {
logger.info(`Youtube Provider: Feature toggled ON.`);
this.#startManager();
@@ -283,9 +288,10 @@ class YouTubeCaptionProvider {
OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang.slice(0, 2)) ||
"auto";
if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) {
// todo: 切分多次发送接受以适应接口处理能力
subtitles = await this.#aiSegment({
videoId,
events,
events: this.#flatEvents(events),
fromLang,
toLang,
segApiSetting,
@@ -408,10 +414,7 @@ class YouTubeCaptionProvider {
lines = this.#processSubtitles({ events, usePause: true });
}
return lines.map((item) => ({
...item,
duration: Math.max(0, item.end - item.start),
}));
return lines;
}
#isQualityPoor(lines, lengthThreshold = 250, percentageThreshold = 0.1) {
@@ -580,6 +583,39 @@ class YouTubeCaptionProvider {
return sentences;
}
#flatEvents(events = []) {
const segments = [];
let buffer = null;
events.forEach(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => {
segs.forEach(({ utf8 = "", tOffsetMs = 0 }, j) => {
const text = utf8.trim().replace(/\s+/g, " ");
const start = tStartMs + tOffsetMs;
if (buffer) {
if (!buffer.end || buffer.end > start) {
buffer.end = start;
}
segments.push(buffer);
buffer = null;
}
buffer = {
text,
start,
};
if (j === segs.length - 1) {
buffer.end = tStartMs + dDurationMs;
}
});
});
segments.push(buffer);
return segments.filter((item) => item.text);
}
}
export const YouTubeInitializer = (() => {

44
src/subtitle/vtt.js Normal file
View File

@@ -0,0 +1,44 @@
function millisecondsStringToNumber(msString) {
const cleanString = msString.trim();
const milliseconds = parseInt(cleanString, 10);
if (isNaN(milliseconds)) {
return 0;
}
return milliseconds;
}
export function parseBilingualVtt(vttText) {
const cleanText = vttText.replace(/^\uFEFF/, "").trim();
const cues = cleanText.split(/\n\n+/);
const result = [];
for (const cue of cues) {
if (!cue.includes("-->")) continue;
const lines = cue.split("\n");
const timestampLineIndex = lines.findIndex((line) => line.includes("-->"));
if (timestampLineIndex === -1) continue;
const [startTimeString, endTimeString] =
lines[timestampLineIndex].split(" --> ");
const textLines = lines.slice(timestampLineIndex + 1);
if (startTimeString && endTimeString && textLines.length > 0) {
const originalText = textLines[0].trim();
const translatedText = (textLines[1] || "").trim();
result.push({
start: millisecondsStringToNumber(startTimeString),
end: millisecondsStringToNumber(endTimeString),
text: originalText,
translation: translatedText,
});
}
}
return result;
}