feat: 支持 instagram 聊天记录导入

2026-01-24 09:23:07 +08:00 · 2026-01-14 00:35:05 +08:00
parent 176a467779
commit e925c4464f
7 changed files with 375 additions and 7 deletions
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ ChatLab 是一个免费、开源、本地化的，专注于分析聊天记录的

 我们拒绝将你的隐私上传云端，而是把强大的分析能力直接塞进你的电脑。

-目前已支持：微信、QQ、WhatsApp、Discord 的聊天记录分析，即将支持：iMessage、LINE。
+目前已支持：微信、QQ、WhatsApp、Discord、Instagram 的聊天记录分析，即将支持：iMessage、LINE。

 项目目前还处于早期迭代阶段，因此还有很多缺陷和未完成功能。若您遇到了任何问题，欢迎随时反馈。

--- a/README_en.md
+++ b/README_en.md
@@ -6,7 +6,7 @@ ChatLab is a free, open-source, and local-first application dedicated to analyzi

 We refuse to upload your privacy to the cloud; instead, we bring powerful analytics directly to your computer.

-Currently supported: Chat record analysis for **WeChat, QQ, WhatsApp and Discord**. Upcoming support: **iMessage, and LINE**.
+Currently supported: Chat record analysis for **WeChat, QQ, WhatsApp, Instagram and Discord**. Upcoming support: **iMessage, and LINE**.

 The project is still in early iteration, so there are many bugs and unfinished features. If you encounter any issues, feel free to provide feedback.

--- a/electron/main/parser/formats/chatlab.ts
+++ b/electron/main/parser/formats/chatlab.ts
@@ -50,8 +50,8 @@ export const feature: FormatFeature = {
  extensions: ['.json'],
  signatures: {
    // 只要求 chatlab 字段在文件头（8KB），其他字段在解析时验证
-    // 这样可以正确识别格式化后的大文件（meta/messages 可能超出 8KB）
-    head: [/"chatlab"\s*:\s*\{/, /"version"\s*:\s*"/],
+    // 移除过于宽松的 version 签名，只保留 chatlab 对象签名
+    head: [/"chatlab"\s*:\s*\{/],
    requiredFields: ['chatlab'],
  },
 }
--- a/electron/main/parser/formats/index.ts
+++ b/electron/main/parser/formats/index.ts
@@ -14,6 +14,7 @@ import yccccccyEchotrace from './ycccccccy-echotrace'
 import tyrrrzDiscordExporter from './tyrrrz-discord-exporter'
 import whatsappNativeTxt from './whatsapp-native-txt'
 import qqNativeTxt from './qq-native-txt'
+import instagramNative from './instagram-native'

 /**
 * 所有支持的格式模块（按优先级排序）
@@ -25,7 +26,8 @@ export const formats: FormatModule[] = [
  shuakamiQqExporter, // 优先级 10 - shuakami/qq-chat-exporter
  yccccccyEchotrace, // 优先级 15 - ycccccccy/echotrace
  tyrrrzDiscordExporter, // 优先级 20 - Tyrrrz/DiscordChatExporter
-  whatsappNativeTxt, // 优先级 25 - WhatsApp 官方导出 TXT
+  instagramNative, // 优先级 25 - Instagram 官方导出
+  whatsappNativeTxt, // 优先级 26 - WhatsApp 官方导出 TXT
  qqNativeTxt, // 优先级 30 - QQ 官方导出 TXT
 ]

@@ -37,6 +39,7 @@ export {
  shuakamiQqExporterChunked,
  yccccccyEchotrace,
  tyrrrzDiscordExporter,
-  qqNativeTxt,
+  instagramNative,
  whatsappNativeTxt,
+  qqNativeTxt,
 }
--- a/electron/main/parser/formats/instagram-native.ts
+++ b/electron/main/parser/formats/instagram-native.ts
@@ -0,0 +1,364 @@
+/**
+ * Instagram 官方导出格式解析器
+ * 适配：Instagram 账号数据下载功能导出的 JSON 文件
+ *
+ * 文件结构：
+ * - participants: 参与者数组 [{ name: string }]
+ * - messages: 消息数组（逆序，最新在前）
+ * - title: 对话标题（群名或对方用户名）
+ * - thread_path: 线程路径，如 "inbox/xxx_123456"
+ * - joinable_mode: 仅群聊有，包含入群链接
+ *
+ * 特殊处理：
+ * - 编码问题：Instagram 将 UTF-8 字节按 Latin-1 编码后存储，需要解码
+ * - 消息逆序：需要反转为正序
+ * - 无用户 ID：使用用户名作为 platformId
+ */
+
+import * as fs from 'fs'
+import * as path from 'path'
+import { KNOWN_PLATFORMS, ChatType, MessageType } from '../../../../src/types/base'
+import type {
+  FormatFeature,
+  FormatModule,
+  Parser,
+  ParseOptions,
+  ParseEvent,
+  ParsedMeta,
+  ParsedMember,
+  ParsedMessage,
+} from '../types'
+import { getFileSize, createProgress } from '../utils'
+
+// ==================== 特征定义 ====================
+
+export const feature: FormatFeature = {
+  id: 'instagram-native',
+  name: 'Instagram 官方导出',
+  platform: KNOWN_PLATFORMS.INSTAGRAM,
+  priority: 25,
+  extensions: ['.json'],
+  signatures: {
+    // 使用 Instagram 特有的字段作为签名（在文件头部就能匹配）
+    // is_geoblocked_for_viewer 是 Instagram 消息特有的字段
+    requiredFields: ['participants', 'messages'],
+    head: [/"is_geoblocked_for_viewer"\s*:/],
+  },
+}
+
+// ==================== 类型定义 ====================
+
+interface InstagramParticipant {
+  name: string
+}
+
+interface InstagramPhoto {
+  uri: string
+  creation_timestamp?: number
+}
+
+interface InstagramVideo {
+  uri: string
+  creation_timestamp?: number
+}
+
+interface InstagramAudio {
+  uri: string
+  creation_timestamp?: number
+}
+
+interface InstagramShare {
+  link?: string
+  share_text?: string
+  original_content_owner?: string
+}
+
+interface InstagramReaction {
+  reaction: string
+  actor: string
+}
+
+interface InstagramMessage {
+  sender_name: string
+  timestamp_ms: number
+  content?: string
+  photos?: InstagramPhoto[]
+  videos?: InstagramVideo[]
+  audio_files?: InstagramAudio[]
+  share?: InstagramShare
+  reactions?: InstagramReaction[]
+  is_geoblocked_for_viewer?: boolean
+  is_unsent_image_by_messenger_kid_parent?: boolean
+}
+
+interface InstagramData {
+  participants: InstagramParticipant[]
+  messages: InstagramMessage[]
+  title: string
+  is_still_participant?: boolean
+  thread_path: string
+  magic_words?: unknown[]
+  joinable_mode?: {
+    mode: number
+    link: string
+  }
+}
+
+// ==================== 辅助函数 ====================
+
+/**
+ * 解码 Instagram 特殊编码的文本
+ * Instagram 将 UTF-8 字节按 Latin-1 编码后存储
+ */
+function decodeInstagramText(text: string): string {
+  try {
+    // 将每个字符的 charCode 收集为字节数组
+    const bytes = new Uint8Array(text.length)
+    for (let i = 0; i < text.length; i++) {
+      bytes[i] = text.charCodeAt(i)
+    }
+    // 用 UTF-8 解码
+    return new TextDecoder('utf-8').decode(bytes)
+  } catch {
+    return text // 解码失败则返回原文
+  }
+}
+
+/**
+ * 从文件名提取名称（备用）
+ */
+function extractNameFromFilePath(filePath: string): string {
+  const basename = path.basename(filePath)
+  return basename.replace(/\.json$/i, '') || '未知对话'
+}
+
+/**
+ * 判断是否为系统消息
+ */
+function isSystemMessage(content: string): boolean {
+  const systemPatterns = [
+    'You created the group',
+    'created the group',
+    'added',
+    'to the group',
+    'left the group',
+    'removed',
+    'named the group',
+    'changed the group photo',
+    'Reacted',
+    'sent an attachment',
+    'liked a message',
+    'changed the theme',
+    'set the nickname',
+  ]
+  return systemPatterns.some((p) => content.includes(p))
+}
+
+/**
+ * 判断消息类型
+ */
+function detectMessageType(msg: InstagramMessage): MessageType {
+  const content = msg.content || ''
+
+  // 1. 系统消息判断
+  if (content && isSystemMessage(content)) {
+    return MessageType.SYSTEM
+  }
+
+  // 2. 媒体消息
+  if (msg.photos?.length) return MessageType.IMAGE
+  if (msg.videos?.length) return MessageType.VIDEO
+  if (msg.audio_files?.length) return MessageType.VOICE
+
+  // 3. 分享消息
+  if (msg.share) {
+    const link = msg.share.link || ''
+    if (link.includes('giphy.com')) return MessageType.EMOJI
+    return MessageType.LINK
+  }
+
+  // 4. 文本消息
+  if (content) return MessageType.TEXT
+
+  // 5. 空消息（位置分享、通话等已删除的消息）
+  return MessageType.OTHER
+}
+
+/**
+ * 获取消息内容
+ */
+function getMessageContent(msg: InstagramMessage): string | null {
+  // 文本内容
+  if (msg.content) {
+    return decodeInstagramText(msg.content)
+  }
+
+  // 图片
+  if (msg.photos?.length) {
+    return `[图片] ${msg.photos[0].uri}`
+  }
+
+  // 视频
+  if (msg.videos?.length) {
+    return `[视频] ${msg.videos[0].uri}`
+  }
+
+  // 语音
+  if (msg.audio_files?.length) {
+    return `[语音] ${msg.audio_files[0].uri}`
+  }
+
+  // 分享
+  if (msg.share) {
+    const link = msg.share.link || ''
+    if (link.includes('giphy.com')) {
+      return `[GIF] ${link}`
+    }
+    return `[链接] ${link}`
+  }
+
+  // 空消息
+  return '[未知消息]'
+}
+
+// ==================== 解析器实现 ====================
+
+async function* parseInstagram(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
+  const { filePath, batchSize = 5000, onProgress, onLog } = options
+
+  const totalBytes = getFileSize(filePath)
+  let messagesProcessed = 0
+
+  // 发送初始进度
+  const initialProgress = createProgress('parsing', 0, totalBytes, 0, '正在解析 Instagram 聊天记录...')
+  yield { type: 'progress', data: initialProgress }
+  onProgress?.(initialProgress)
+
+  onLog?.('info', `开始解析 Instagram 聊天记录，大小: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`)
+
+  // 读取并解析 JSON 文件
+  let data: InstagramData
+  try {
+    const content = fs.readFileSync(filePath, 'utf-8')
+    data = JSON.parse(content)
+  } catch (error) {
+    const err = new Error(`无法解析 Instagram JSON 文件: ${error}`)
+    yield { type: 'error', data: err }
+    return
+  }
+
+  // 判断聊天类型
+  const isGroup = data.participants.length > 2 || !!data.joinable_mode
+  const chatType = isGroup ? ChatType.GROUP : ChatType.PRIVATE
+
+  // 判断 Owner
+  let ownerId: string | undefined
+  if (chatType === ChatType.PRIVATE) {
+    // 私聊：title 是对方名字，另一个参与者是 owner
+    const owner = data.participants.find((p) => decodeInstagramText(p.name) !== decodeInstagramText(data.title))
+    ownerId = owner ? decodeInstagramText(owner.name) : undefined
+  } else {
+    // 群聊：找 "You created the group." 消息的发送者
+    const createMsg = data.messages.find((m) => m.content === 'You created the group.')
+    ownerId = createMsg ? decodeInstagramText(createMsg.sender_name) : undefined
+  }
+
+  // 发送 meta
+  const meta: ParsedMeta = {
+    name: decodeInstagramText(data.title) || extractNameFromFilePath(filePath),
+    platform: KNOWN_PLATFORMS.INSTAGRAM,
+    type: chatType,
+    ownerId,
+  }
+  yield { type: 'meta', data: meta }
+
+  // 收集成员信息
+  const memberMap = new Map<string, ParsedMember>()
+  for (const participant of data.participants) {
+    const name = decodeInstagramText(participant.name)
+    memberMap.set(name, {
+      platformId: name,
+      accountName: name,
+    })
+  }
+
+  // 发送成员
+  const members = Array.from(memberMap.values())
+  yield { type: 'members', data: members }
+
+  // 处理消息（Instagram 消息是逆序的，需要反转）
+  const reversedMessages = [...data.messages].reverse()
+  const messageBatch: ParsedMessage[] = []
+
+  for (const msg of reversedMessages) {
+    const senderName = decodeInstagramText(msg.sender_name)
+    const timestamp = Math.floor(msg.timestamp_ms / 1000) // 毫秒转秒
+    const type = detectMessageType(msg)
+    const content = getMessageContent(msg)
+
+    // 确保成员存在（处理消息中出现但不在 participants 中的情况）
+    if (!memberMap.has(senderName)) {
+      memberMap.set(senderName, {
+        platformId: senderName,
+        accountName: senderName,
+      })
+    }
+
+    messageBatch.push({
+      senderPlatformId: senderName,
+      senderAccountName: senderName,
+      timestamp,
+      type,
+      content,
+    })
+
+    messagesProcessed++
+
+    // 分批输出消息
+    if (messageBatch.length >= batchSize) {
+      yield { type: 'messages', data: [...messageBatch] }
+      messageBatch.length = 0
+
+      const progress = createProgress(
+        'parsing',
+        Math.floor((messagesProcessed / reversedMessages.length) * totalBytes),
+        totalBytes,
+        messagesProcessed,
+        `已处理 ${messagesProcessed} 条消息...`
+      )
+      onProgress?.(progress)
+    }
+  }
+
+  // 发送剩余消息
+  if (messageBatch.length > 0) {
+    yield { type: 'messages', data: messageBatch }
+  }
+
+  // 完成
+  const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
+  yield { type: 'progress', data: doneProgress }
+  onProgress?.(doneProgress)
+
+  onLog?.('info', `解析完成: ${messagesProcessed} 条消息, ${memberMap.size} 个成员`)
+
+  yield {
+    type: 'done',
+    data: { messageCount: messagesProcessed, memberCount: memberMap.size },
+  }
+}
+
+// ==================== 导出 ====================
+
+export const parser_: Parser = {
+  feature,
+  parse: parseInstagram,
+}
+
+const module_: FormatModule = {
+  feature,
+  parser: parser_,
+}
+
+export default module_
+
--- a/src/components/common/ChatRecord/MessageItem.vue
+++ b/src/components/common/ChatRecord/MessageItem.vue
@@ -182,7 +182,7 @@ function highlightContent(content: string): string {
              </p>
            </div>
            <p
-              class="whitespace-pre-wrap break-words text-sm text-gray-700 dark:text-gray-200"
+              class="whitespace-pre-wrap break-all text-sm text-gray-700 dark:text-gray-200"
              v-html="highlightContent(message.content || '')"
            />
          </div>
--- a/src/types/base.ts
+++ b/src/types/base.ts
@@ -95,6 +95,7 @@ export const KNOWN_PLATFORMS = {
  WECHAT: 'weixin',
  DISCORD: 'discord',
  WHATSAPP: 'whatsapp',
+  INSTAGRAM: 'instagram',
  UNKNOWN: 'unknown',
 } as const