fix: parse subtitle time string

This commit is contained in:
Gabe
2025-11-01 22:45:08 +08:00
parent 6c1a4e851c
commit 34370345cd

View File

@@ -1,25 +1,80 @@
function millisecondsStringToNumber(msString) { /**
const cleanString = msString.trim(); * 将多种格式的VTT时间戳字符串转换为毫秒数。
const milliseconds = parseInt(cleanString, 10); * 兼容以下格式:
* - mmm (e.g., "291040")
* - MM:SS (e.g., "00:03")
* - HH:MM:SS (e.g., "01:02:03")
* - MM:SS.mmm (e.g., "00:07.980")
* - HH:MM:SS.mmm (e.g., "01:02:03.456")
* - MM:SS:mmm (e.g., "00:07:536")
*
* @param {string} timestamp - VTT时间戳字符串.
* @returns {number} - 转换后的总毫秒数.
*/
function parseTimestampToMilliseconds(timestamp) {
const ts = timestamp.trim();
if (isNaN(milliseconds)) { if (!ts.includes(":") && !ts.includes(".")) {
return 0; return parseInt(ts, 10) || 0;
} }
return milliseconds; let timePart = ts;
let msPart = "0";
if (ts.includes(".")) {
const parts = ts.split(".");
timePart = parts[0];
msPart = parts[1];
} else {
const colonParts = ts.split(":");
if (
colonParts.length > 1 &&
colonParts[colonParts.length - 1].length === 3
) {
msPart = colonParts.pop();
timePart = colonParts.join(":");
}
} }
const timeComponents = timePart.split(":").map((p) => parseInt(p, 10) || 0);
let hours = 0,
minutes = 0,
seconds = 0;
if (timeComponents.length === 3) {
[hours, minutes, seconds] = timeComponents;
} else if (timeComponents.length === 2) {
[minutes, seconds] = timeComponents;
} else if (timeComponents.length === 1) {
[seconds] = timeComponents;
}
const milliseconds = parseInt(msPart.padEnd(3, "0"), 10) || 0;
return (hours * 3600 + minutes * 60 + seconds) * 1000 + milliseconds;
}
/**
* 解析包含双语字幕的VTT文件内容。
* @param {string} vttText - VTT文件的文本内容。
* @returns {Array<Object>} 一个包含字幕对象的数组,每个对象包含 start, end, text, 和 translation.
*/
export function parseBilingualVtt(vttText) { export function parseBilingualVtt(vttText) {
const cleanText = vttText.replace(/^\uFEFF/, "").trim(); const cleanText = vttText.replace(/^\uFEFF/, "").trim();
const cues = cleanText.split(/\n\n+/); if (!cleanText) {
return [];
}
const cues = cleanText.split(/\n\n+/);
const result = []; const result = [];
for (const cue of cues) { const startIndex = cues[0].toUpperCase().includes("WEBVTT") ? 1 : 0;
for (let i = startIndex; i < cues.length; i++) {
const cue = cues[i];
if (!cue.includes("-->")) continue; if (!cue.includes("-->")) continue;
const lines = cue.split("\n"); const lines = cue.split("\n");
const timestampLineIndex = lines.findIndex((line) => line.includes("-->")); const timestampLineIndex = lines.findIndex((line) => line.includes("-->"));
if (timestampLineIndex === -1) continue; if (timestampLineIndex === -1) continue;
@@ -28,12 +83,12 @@ export function parseBilingualVtt(vttText) {
const textLines = lines.slice(timestampLineIndex + 1); const textLines = lines.slice(timestampLineIndex + 1);
if (startTimeString && endTimeString && textLines.length > 0) { if (startTimeString && endTimeString && textLines.length > 0) {
const originalText = textLines[0].trim(); const originalText = textLines[0]?.trim() || "";
const translatedText = (textLines[1] || "").trim(); const translatedText = textLines[1]?.trim() || "";
result.push({ result.push({
start: millisecondsStringToNumber(startTimeString), start: parseTimestampToMilliseconds(startTimeString),
end: millisecondsStringToNumber(endTimeString), end: parseTimestampToMilliseconds(endTimeString),
text: originalText, text: originalText,
translation: translatedText, translation: translatedText,
}); });