feat: 支持 instagram 聊天记录导入

This commit is contained in:
digua
2026-01-14 00:35:05 +08:00
parent 176a467779
commit e925c4464f
7 changed files with 375 additions and 7 deletions

View File

@@ -6,7 +6,7 @@ ChatLab 是一个免费、开源、本地化的,专注于分析聊天记录的
我们拒绝将你的隐私上传云端,而是把强大的分析能力直接塞进你的电脑。
目前已支持微信、QQ、WhatsApp、Discord 的聊天记录分析即将支持iMessage、LINE。
目前已支持微信、QQ、WhatsApp、Discord、Instagram 的聊天记录分析即将支持iMessage、LINE。
项目目前还处于早期迭代阶段,因此还有很多缺陷和未完成功能。若您遇到了任何问题,欢迎随时反馈。

View File

@@ -6,7 +6,7 @@ ChatLab is a free, open-source, and local-first application dedicated to analyzi
We refuse to upload your privacy to the cloud; instead, we bring powerful analytics directly to your computer.
Currently supported: Chat record analysis for **WeChat, QQ, WhatsApp and Discord**. Upcoming support: **iMessage, and LINE**.
Currently supported: Chat record analysis for **WeChat, QQ, WhatsApp, Instagram and Discord**. Upcoming support: **iMessage, and LINE**.
The project is still in early iteration, so there are many bugs and unfinished features. If you encounter any issues, feel free to provide feedback.

View File

@@ -50,8 +50,8 @@ export const feature: FormatFeature = {
extensions: ['.json'],
signatures: {
// 只要求 chatlab 字段在文件头8KB其他字段在解析时验证
// 这样可以正确识别格式化后的大文件meta/messages 可能超出 8KB
head: [/"chatlab"\s*:\s*\{/, /"version"\s*:\s*"/],
// 移除过于宽松的 version 签名,只保留 chatlab 对象签名
head: [/"chatlab"\s*:\s*\{/],
requiredFields: ['chatlab'],
},
}

View File

@@ -14,6 +14,7 @@ import yccccccyEchotrace from './ycccccccy-echotrace'
import tyrrrzDiscordExporter from './tyrrrz-discord-exporter'
import whatsappNativeTxt from './whatsapp-native-txt'
import qqNativeTxt from './qq-native-txt'
import instagramNative from './instagram-native'
/**
* 所有支持的格式模块(按优先级排序)
@@ -25,7 +26,8 @@ export const formats: FormatModule[] = [
shuakamiQqExporter, // 优先级 10 - shuakami/qq-chat-exporter
yccccccyEchotrace, // 优先级 15 - ycccccccy/echotrace
tyrrrzDiscordExporter, // 优先级 20 - Tyrrrz/DiscordChatExporter
whatsappNativeTxt, // 优先级 25 - WhatsApp 官方导出 TXT
instagramNative, // 优先级 25 - Instagram 官方导出
whatsappNativeTxt, // 优先级 26 - WhatsApp 官方导出 TXT
qqNativeTxt, // 优先级 30 - QQ 官方导出 TXT
]
@@ -37,6 +39,7 @@ export {
shuakamiQqExporterChunked,
yccccccyEchotrace,
tyrrrzDiscordExporter,
qqNativeTxt,
instagramNative,
whatsappNativeTxt,
qqNativeTxt,
}

View File

@@ -0,0 +1,364 @@
/**
* Instagram 官方导出格式解析器
* 适配Instagram 账号数据下载功能导出的 JSON 文件
*
* 文件结构:
* - participants: 参与者数组 [{ name: string }]
* - messages: 消息数组(逆序,最新在前)
* - title: 对话标题(群名或对方用户名)
* - thread_path: 线程路径,如 "inbox/xxx_123456"
* - joinable_mode: 仅群聊有,包含入群链接
*
* 特殊处理:
* - 编码问题Instagram 将 UTF-8 字节按 Latin-1 编码后存储,需要解码
* - 消息逆序:需要反转为正序
* - 无用户 ID使用用户名作为 platformId
*/
import * as fs from 'fs'
import * as path from 'path'
import { KNOWN_PLATFORMS, ChatType, MessageType } from '../../../../src/types/base'
import type {
FormatFeature,
FormatModule,
Parser,
ParseOptions,
ParseEvent,
ParsedMeta,
ParsedMember,
ParsedMessage,
} from '../types'
import { getFileSize, createProgress } from '../utils'
// ==================== 特征定义 ====================
export const feature: FormatFeature = {
id: 'instagram-native',
name: 'Instagram 官方导出',
platform: KNOWN_PLATFORMS.INSTAGRAM,
priority: 25,
extensions: ['.json'],
signatures: {
// 使用 Instagram 特有的字段作为签名(在文件头部就能匹配)
// is_geoblocked_for_viewer 是 Instagram 消息特有的字段
requiredFields: ['participants', 'messages'],
head: [/"is_geoblocked_for_viewer"\s*:/],
},
}
// ==================== 类型定义 ====================
interface InstagramParticipant {
name: string
}
interface InstagramPhoto {
uri: string
creation_timestamp?: number
}
interface InstagramVideo {
uri: string
creation_timestamp?: number
}
interface InstagramAudio {
uri: string
creation_timestamp?: number
}
interface InstagramShare {
link?: string
share_text?: string
original_content_owner?: string
}
interface InstagramReaction {
reaction: string
actor: string
}
interface InstagramMessage {
sender_name: string
timestamp_ms: number
content?: string
photos?: InstagramPhoto[]
videos?: InstagramVideo[]
audio_files?: InstagramAudio[]
share?: InstagramShare
reactions?: InstagramReaction[]
is_geoblocked_for_viewer?: boolean
is_unsent_image_by_messenger_kid_parent?: boolean
}
interface InstagramData {
participants: InstagramParticipant[]
messages: InstagramMessage[]
title: string
is_still_participant?: boolean
thread_path: string
magic_words?: unknown[]
joinable_mode?: {
mode: number
link: string
}
}
// ==================== 辅助函数 ====================
/**
* 解码 Instagram 特殊编码的文本
* Instagram 将 UTF-8 字节按 Latin-1 编码后存储
*/
function decodeInstagramText(text: string): string {
try {
// 将每个字符的 charCode 收集为字节数组
const bytes = new Uint8Array(text.length)
for (let i = 0; i < text.length; i++) {
bytes[i] = text.charCodeAt(i)
}
// 用 UTF-8 解码
return new TextDecoder('utf-8').decode(bytes)
} catch {
return text // 解码失败则返回原文
}
}
/**
* 从文件名提取名称(备用)
*/
function extractNameFromFilePath(filePath: string): string {
const basename = path.basename(filePath)
return basename.replace(/\.json$/i, '') || '未知对话'
}
/**
* 判断是否为系统消息
*/
function isSystemMessage(content: string): boolean {
const systemPatterns = [
'You created the group',
'created the group',
'added',
'to the group',
'left the group',
'removed',
'named the group',
'changed the group photo',
'Reacted',
'sent an attachment',
'liked a message',
'changed the theme',
'set the nickname',
]
return systemPatterns.some((p) => content.includes(p))
}
/**
* 判断消息类型
*/
function detectMessageType(msg: InstagramMessage): MessageType {
const content = msg.content || ''
// 1. 系统消息判断
if (content && isSystemMessage(content)) {
return MessageType.SYSTEM
}
// 2. 媒体消息
if (msg.photos?.length) return MessageType.IMAGE
if (msg.videos?.length) return MessageType.VIDEO
if (msg.audio_files?.length) return MessageType.VOICE
// 3. 分享消息
if (msg.share) {
const link = msg.share.link || ''
if (link.includes('giphy.com')) return MessageType.EMOJI
return MessageType.LINK
}
// 4. 文本消息
if (content) return MessageType.TEXT
// 5. 空消息(位置分享、通话等已删除的消息)
return MessageType.OTHER
}
/**
* 获取消息内容
*/
function getMessageContent(msg: InstagramMessage): string | null {
// 文本内容
if (msg.content) {
return decodeInstagramText(msg.content)
}
// 图片
if (msg.photos?.length) {
return `[图片] ${msg.photos[0].uri}`
}
// 视频
if (msg.videos?.length) {
return `[视频] ${msg.videos[0].uri}`
}
// 语音
if (msg.audio_files?.length) {
return `[语音] ${msg.audio_files[0].uri}`
}
// 分享
if (msg.share) {
const link = msg.share.link || ''
if (link.includes('giphy.com')) {
return `[GIF] ${link}`
}
return `[链接] ${link}`
}
// 空消息
return '[未知消息]'
}
// ==================== 解析器实现 ====================
async function* parseInstagram(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
const { filePath, batchSize = 5000, onProgress, onLog } = options
const totalBytes = getFileSize(filePath)
let messagesProcessed = 0
// 发送初始进度
const initialProgress = createProgress('parsing', 0, totalBytes, 0, '正在解析 Instagram 聊天记录...')
yield { type: 'progress', data: initialProgress }
onProgress?.(initialProgress)
onLog?.('info', `开始解析 Instagram 聊天记录,大小: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`)
// 读取并解析 JSON 文件
let data: InstagramData
try {
const content = fs.readFileSync(filePath, 'utf-8')
data = JSON.parse(content)
} catch (error) {
const err = new Error(`无法解析 Instagram JSON 文件: ${error}`)
yield { type: 'error', data: err }
return
}
// 判断聊天类型
const isGroup = data.participants.length > 2 || !!data.joinable_mode
const chatType = isGroup ? ChatType.GROUP : ChatType.PRIVATE
// 判断 Owner
let ownerId: string | undefined
if (chatType === ChatType.PRIVATE) {
// 私聊title 是对方名字,另一个参与者是 owner
const owner = data.participants.find((p) => decodeInstagramText(p.name) !== decodeInstagramText(data.title))
ownerId = owner ? decodeInstagramText(owner.name) : undefined
} else {
// 群聊:找 "You created the group." 消息的发送者
const createMsg = data.messages.find((m) => m.content === 'You created the group.')
ownerId = createMsg ? decodeInstagramText(createMsg.sender_name) : undefined
}
// 发送 meta
const meta: ParsedMeta = {
name: decodeInstagramText(data.title) || extractNameFromFilePath(filePath),
platform: KNOWN_PLATFORMS.INSTAGRAM,
type: chatType,
ownerId,
}
yield { type: 'meta', data: meta }
// 收集成员信息
const memberMap = new Map<string, ParsedMember>()
for (const participant of data.participants) {
const name = decodeInstagramText(participant.name)
memberMap.set(name, {
platformId: name,
accountName: name,
})
}
// 发送成员
const members = Array.from(memberMap.values())
yield { type: 'members', data: members }
// 处理消息Instagram 消息是逆序的,需要反转)
const reversedMessages = [...data.messages].reverse()
const messageBatch: ParsedMessage[] = []
for (const msg of reversedMessages) {
const senderName = decodeInstagramText(msg.sender_name)
const timestamp = Math.floor(msg.timestamp_ms / 1000) // 毫秒转秒
const type = detectMessageType(msg)
const content = getMessageContent(msg)
// 确保成员存在(处理消息中出现但不在 participants 中的情况)
if (!memberMap.has(senderName)) {
memberMap.set(senderName, {
platformId: senderName,
accountName: senderName,
})
}
messageBatch.push({
senderPlatformId: senderName,
senderAccountName: senderName,
timestamp,
type,
content,
})
messagesProcessed++
// 分批输出消息
if (messageBatch.length >= batchSize) {
yield { type: 'messages', data: [...messageBatch] }
messageBatch.length = 0
const progress = createProgress(
'parsing',
Math.floor((messagesProcessed / reversedMessages.length) * totalBytes),
totalBytes,
messagesProcessed,
`已处理 ${messagesProcessed} 条消息...`
)
onProgress?.(progress)
}
}
// 发送剩余消息
if (messageBatch.length > 0) {
yield { type: 'messages', data: messageBatch }
}
// 完成
const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
yield { type: 'progress', data: doneProgress }
onProgress?.(doneProgress)
onLog?.('info', `解析完成: ${messagesProcessed} 条消息, ${memberMap.size} 个成员`)
yield {
type: 'done',
data: { messageCount: messagesProcessed, memberCount: memberMap.size },
}
}
// ==================== 导出 ====================
export const parser_: Parser = {
feature,
parse: parseInstagram,
}
const module_: FormatModule = {
feature,
parser: parser_,
}
export default module_

View File

@@ -182,7 +182,7 @@ function highlightContent(content: string): string {
</p>
</div>
<p
class="whitespace-pre-wrap break-words text-sm text-gray-700 dark:text-gray-200"
class="whitespace-pre-wrap break-all text-sm text-gray-700 dark:text-gray-200"
v-html="highlightContent(message.content || '')"
/>
</div>

View File

@@ -95,6 +95,7 @@ export const KNOWN_PLATFORMS = {
WECHAT: 'weixin',
DISCORD: 'discord',
WHATSAPP: 'whatsapp',
INSTAGRAM: 'instagram',
UNKNOWN: 'unknown',
} as const