From defe59ffe879901f53591a5cbe85ddeb7d420d56 Mon Sep 17 00:00:00 2001 From: Huoji's <1296564236@qq.com> Date: Sun, 9 Mar 2025 03:19:40 +0800 Subject: [PATCH] update --- ai_anti_malware/ai_anti_malware.cpp | 110 +++++++++++++++++---- ai_anti_malware/head.h | 2 + ai_anti_malware/ml.cpp | 136 ++++++++++++++++++++++++-- ai_anti_malware/ml.h | 21 ++-- ai_anti_malware/sandbox.cpp | 17 ++-- ai_anti_malware/sandbox_api_emu.cpp | 108 +++++++++++++++++--- ai_anti_malware/sandbox_callbacks.cpp | 2 +- 7 files changed, 337 insertions(+), 59 deletions(-) diff --git a/ai_anti_malware/ai_anti_malware.cpp b/ai_anti_malware/ai_anti_malware.cpp index 85bf1f1..f98b0e2 100644 --- a/ai_anti_malware/ai_anti_malware.cpp +++ b/ai_anti_malware/ai_anti_malware.cpp @@ -29,28 +29,100 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr { sampleInfo->RecImageBase + (sampleInfo->isX64 ? sampleInfo->ntHead64->OptionalHeader.SizeOfImage : sampleInfo->ntHead32->OptionalHeader.SizeOfImage); + printf("Debug - Memory mapping parameters:\n"); + printf("RecImageBase: 0x%llx\n", sampleInfo->RecImageBase); + printf("peSize: 0x%llx\n", sampleInfo->peSize); + printf("Page aligned base: 0x%llx\n", sampleInfo->RecImageBase & ~0xFFF); + printf("Page aligned size: 0x%llx\n", + (sampleInfo->peSize + 0xFFF) & ~0xFFF); + sampleInfo->RecImageBase = sampleInfo->RecImageBase & ~0xFFF; + sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF; return sampleInfo; } -int main() { - auto sampleInfo = getPeInfo("z:\\Console_Test.exe"); - // auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe"); - printf("input new file %s \n", sampleInfo->inputFilePath); - printf("is x64: %d\n", sampleInfo->isX64); - printf("is relocated: %d\n", sampleInfo->isRelocated); - printf("RecImageBase: %llx\n", sampleInfo->RecImageBase); - auto sandbox = std::make_shared(); - sandbox->InitEnv(sampleInfo); - sandbox->Run(); - auto [peBuffer, peSize] = sandbox->DumpPE(); - - if (peBuffer) { - printf("peBuffer: %p\n", peBuffer.get()); - printf("peSize: %d\n", peSize); - // peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize); - MachineLearning ml; - ml.ExtractFeatures(peBuffer.get(), peSize, "z:\\features.txt"); +int doMl(int argc, char* argv[]) { + // 检查命令行参数 + if (argc < 3) { + std::cout << "用法: " << argv[0] << " <样本目录路径> <输出CSV路径>" + << std::endl; + std::cout << "或者: " << argv[0] + << " -single <单个文件路径> <输出CSV路径>" << std::endl; + return 1; } - peBuffer.release(); + MachineLearning ml; + + if (std::string(argv[1]) == "-single") { + // 处理单个文件 + if (argc < 4) { + std::cout << "处理单个文件时需要提供文件路径和输出CSV路径" + << std::endl; + return 1; + } + + std::string filePath = argv[2]; + std::string csvPath = argv[3]; + + // 读取文件 + std::vector buffer = ml.ReadFileToBuffer(filePath); + if (buffer.empty()) { + std::cerr << "无法读取文件: " << filePath << std::endl; + return 1; + } + + // 提取特征 + std::vector features = + ml.ExtractFeatures(buffer.data(), buffer.size()); + if (features.empty()) { + std::cerr << "无法从文件提取特征: " << filePath << std::endl; + return 1; + } + + // 导出到CSV + if (!ml.ExportToCSV(features, csvPath)) { + std::cerr << "无法导出到CSV文件: " << csvPath << std::endl; + return 1; + } + + std::cout << "成功处理文件并导出特征到: " << csvPath << std::endl; + } else { + // 处理目录 + std::string dirPath = argv[1]; + std::string csvPath = argv[2]; + + std::cout << "开始处理目录: " << dirPath << std::endl; + std::cout << "特征将导出到: " << csvPath << std::endl; + + if (!ml.ProcessDirectory(dirPath, csvPath)) { + std::cerr << "处理目录时发生错误" << std::endl; + return 1; + } + } + return 0; +}; +int main(int argc, char* argv[]) { + doMl(argc, argv); + /* + auto sampleInfo = getPeInfo( + "E:\\对战平台\\CrowAntiCheat\\CrowAntiCheat\\client\\Console_" + "Test\\Release\\Console_Test.exe"); + // auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe"); + printf("input new file %s \n", sampleInfo->inputFilePath); + printf("is x64: %d\n", sampleInfo->isX64); + printf("is relocated: %d\n", sampleInfo->isRelocated); + printf("RecImageBase: %llx\n", sampleInfo->RecImageBase); + auto sandbox = std::make_shared(); + sandbox->InitEnv(sampleInfo); + sandbox->Run(); + auto [peBuffer, peSize] = sandbox->DumpPE(); + + if (peBuffer) { + printf("peBuffer: %p\n", peBuffer.get()); + printf("peSize: %d\n", peSize); + // peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize); + MachineLearning ml; + ml.ExtractFeatures(peBuffer.get(), peSize); + } + peBuffer.release(); + */ system("pause"); return 0; } diff --git a/ai_anti_malware/head.h b/ai_anti_malware/head.h index dda12c1..ab4c354 100644 --- a/ai_anti_malware/head.h +++ b/ai_anti_malware/head.h @@ -1,4 +1,6 @@ #pragma once +#define LOG_LEVEL 0 + #define _CRT_SECURE_NO_WARNINGS #include #include diff --git a/ai_anti_malware/ml.cpp b/ai_anti_malware/ml.cpp index 3ac220b..aa2f004 100644 --- a/ai_anti_malware/ml.cpp +++ b/ai_anti_malware/ml.cpp @@ -1,4 +1,5 @@ #include "ml.h" +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include // 确保std命名空间中的函数可用 using std::max; @@ -177,15 +179,14 @@ MachineLearning::~MachineLearning() { // 析构函数,清理资源(如有必要) } -bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize, - const std::string& outputPath) { +std::vector MachineLearning::ExtractFeatures(const uint8_t* buffer, + size_t bufferSize) { // 使用libpeconv解析PE文件 size_t v_size = 0; BYTE* peBuffer = peconv::load_pe_module(const_cast(buffer), bufferSize, v_size, false, false); if (!peBuffer) { - std::cerr << "无法加载PE文件" << std::endl; - return false; + return std::vector(); } // 解析PE信息 @@ -202,7 +203,7 @@ bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize, (PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer); if (!ntHeaders) { peconv::free_pe_buffer(peBuffer); - return false; + return std::vector(); } // 从NT头部获取信息 @@ -392,13 +393,10 @@ bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize, // 7. 节区数量 allFeatures.push_back(static_cast(sections.size())); - // 导出特征到CSV - bool result = ExportToCSV(allFeatures, outputPath); - // 清理资源 peconv::free_pe_buffer(peBuffer); - return result; + return allFeatures; } std::vector MachineLearning::EncodeProperties( @@ -588,4 +586,124 @@ MachineLearning::GetOpcodeStatistics(const uint8_t* data, size_t dataSize, bool isX64, const PeInfo& peInfo) { // 此函数未使用,但保留实现接口 return std::make_tuple(std::vector(), std::vector()); +} + +std::vector MachineLearning::ReadFileToBuffer( + const std::string& filePath) { + std::ifstream fileStream(filePath, std::ios::binary | std::ios::ate); + if (!fileStream.is_open()) { + std::cerr << "无法打开文件: " << filePath << std::endl; + return std::vector(); + } + + // 获取文件大小 + std::streamsize fileSize = fileStream.tellg(); + fileStream.seekg(0, std::ios::beg); + + // 分配缓冲区并读取文件 + std::vector buffer(fileSize); + if (!fileStream.read(reinterpret_cast(buffer.data()), fileSize)) { + std::cerr << "读取文件失败: " << filePath << std::endl; + return std::vector(); + } + + return buffer; +} + +bool MachineLearning::ProcessDirectory(const std::string& directoryPath, + const std::string& outputCsvPath) { + // 打开CSV文件用于写入 + std::ofstream csvFile(outputCsvPath); + if (!csvFile.is_open()) { + std::cerr << "无法创建CSV文件: " << outputCsvPath << std::endl; + return false; + } + /* + // 写入CSV标题行 + csvFile << "文件路径"; + for (size_t i = 0; i < _properties.size(); i++) { + csvFile << ",属性_" << i; + } + for (size_t i = 0; i < _libraries.size(); i++) { + csvFile << ",库_" << i; + } + csvFile << ",文件熵"; + for (size_t i = 0; i < 64; i++) { // 前64个字节特征 + csvFile << ",EP_" << i; + } + csvFile << ",节区数"; + csvFile << ",平均熵"; + csvFile << ",最大熵"; + csvFile << ",归一化平均熵"; + csvFile << ",节区大小比率"; + csvFile << ",代码比率"; + csvFile << ",节区计数"; + csvFile << std::endl; + */ + // 递归遍历目录 + WIN32_FIND_DATAA findData; + std::string searchPath = directoryPath + "\\*"; + HANDLE hFind = FindFirstFileA(searchPath.c_str(), &findData); + + if (hFind == INVALID_HANDLE_VALUE) { + std::cerr << "无法访问目录: " << directoryPath << std::endl; + csvFile.close(); + return false; + } + + int processedCount = 0; + int failedCount = 0; + + do { + // 跳过 "." 和 ".." 目录 + if (strcmp(findData.cFileName, ".") == 0 || + strcmp(findData.cFileName, "..") == 0) { + continue; + } + + std::string currentPath = directoryPath + "\\" + findData.cFileName; + + if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + // 递归处理子目录 + ProcessDirectory(currentPath, outputCsvPath); + } else { + // 处理文件 + std::vector fileBuffer = ReadFileToBuffer(currentPath); + if (fileBuffer.empty()) { + std::cerr << "跳过文件: " << currentPath << " (读取失败)" + << std::endl; + failedCount++; + continue; + } + + // 提取特征 + std::vector features = + ExtractFeatures(fileBuffer.data(), fileBuffer.size()); + if (features.empty()) { + std::cerr << "跳过文件: " << currentPath << " (特征提取失败)" + << std::endl; + failedCount++; + continue; + } + + // 写入CSV + csvFile << currentPath; + for (const auto& feature : features) { + csvFile << "," << std::fixed << std::setprecision(6) << feature; + } + csvFile << std::endl; + + processedCount++; + if (processedCount % 100 == 0) { + std::cout << "已处理 " << processedCount << " 个文件..." + << std::endl; + } + } + } while (FindNextFileA(hFind, &findData)); + + FindClose(hFind); + csvFile.close(); + printf("ML Process Result, success count: %d fail count: %d \n", + processedCount, failedCount); + return true; } \ No newline at end of file diff --git a/ai_anti_malware/ml.h b/ai_anti_malware/ml.h index 4dbed93..cd6646d 100644 --- a/ai_anti_malware/ml.h +++ b/ai_anti_malware/ml.h @@ -62,9 +62,20 @@ class MachineLearning { MachineLearning(); ~MachineLearning(); - // 主函数:提取特征并导出到CSV - bool ExtractFeatures(const uint8_t* buffer, size_t bufferSize, - const std::string& outputPath); + // 提取特征并返回特征向量 + std::vector ExtractFeatures(const uint8_t* buffer, + size_t bufferSize); + + // 将特征导出到CSV + bool ExportToCSV(const std::vector& features, + const std::string& outputPath); + + // 批量处理目录中的样本并生成CSV + bool ProcessDirectory(const std::string& directoryPath, + const std::string& outputCsvPath); + + // 读取文件到内存 + std::vector ReadFileToBuffer(const std::string& filePath); private: // 特征提取辅助函数 @@ -81,10 +92,6 @@ class MachineLearning { int GetOpcodeType(const void* code, bool isX64); double CalculateEntropy(const uint8_t* data, size_t size); - // 将特征导出到CSV - bool ExportToCSV(const std::vector& features, - const std::string& outputPath); - // 常量定义 std::vector _properties; std::vector _libraries; diff --git a/ai_anti_malware/sandbox.cpp b/ai_anti_malware/sandbox.cpp index 0fa8ebc..9844494 100644 --- a/ai_anti_malware/sandbox.cpp +++ b/ai_anti_malware/sandbox.cpp @@ -155,7 +155,7 @@ class cFixImprot : public peconv::t_function_resolver { } } } - __debugbreak(); + //__debugbreak(); return nullptr; } @@ -191,12 +191,6 @@ Sandbox::~Sandbox() { } m_heapSegments.clear(); - // 4. 清理栈内存 - if (m_stackBuffer) { - free(m_stackBuffer); - m_stackBuffer = nullptr; - } - // 5. 最后清理底层资源 if (m_csHandle) { cs_close(&m_csHandle); @@ -349,8 +343,9 @@ auto Sandbox::ResolveImportExports() -> void { } const auto exports = ResolveExport(module->real_base); for (const auto item : exports) { - printf("import export: [%s] %s => %llx\n", module->name, item->name, - item->function_address); + if (LOG_LEVEL > 0) { + printf("import export: [%s] %s => %llx\n", module->name, item->name, item->function_address); + } module->export_function.push_back(item); } } @@ -359,7 +354,9 @@ auto Sandbox::ResolveImportExports() -> void { auto Sandbox::processImportModule(const moudle_import* importModule) -> void { for (auto module : m_moduleList) { if (strcmp(module->name, importModule->dll_name) == 0) { - printf("skip module name: %s (already loaded)\n", module->name); + if (LOG_LEVEL > 0) { + printf("skip module name: %s (already loaded)\n", module->name); + } return; } } diff --git a/ai_anti_malware/sandbox_api_emu.cpp b/ai_anti_malware/sandbox_api_emu.cpp index c6db5cf..3c1f73c 100644 --- a/ai_anti_malware/sandbox_api_emu.cpp +++ b/ai_anti_malware/sandbox_api_emu.cpp @@ -2173,6 +2173,83 @@ auto Api_VirtualProtect(void* sandbox, uc_engine* uc, uint64_t address) &result); } +auto Api___set_app_type(void* sandbox, uc_engine* uc, uint64_t address) + -> void { + auto context = static_cast(sandbox); + int32_t appType = 0; + + // 获取参数 + if (context->GetPeInfo()->isX64) { + // x64: rcx = appType + uint64_t temp_type; + uc_reg_read(uc, UC_X86_REG_RCX, &temp_type); + appType = static_cast(temp_type); + } else { + // x86: 从栈上读取参数 + uint32_t esp_address = 0; + uc_reg_read(uc, UC_X86_REG_ESP, &esp_address); + esp_address += 0x4; // 跳过返回地址 + uc_mem_read(uc, esp_address, &appType, sizeof(int32_t)); + } + + // 简单地返回0表示成功 + int32_t result = 0; + printf("[*] __set_app_type: AppType=%d\n", appType); + + uc_reg_write(uc, + context->GetPeInfo()->isX64 ? UC_X86_REG_RAX : UC_X86_REG_EAX, + &result); +} + +auto Api___p__fmode(void* sandbox, uc_engine* uc, uint64_t address) -> void { + auto sb = static_cast(sandbox); + + // 检查是否已经创建了 _fmode 变量 + static uint64_t fmode_address = 0; + static int32_t fmode_value = 0; // 默认为文本模式 (_O_TEXT) + + if (fmode_address == 0) { + // 为 _fmode 变量分配内存 + // 使用特定堆地址,与其他 API 一致 + uint64_t heap_handle = + sb->GetPeInfo()->isX64 ? HEAP_ADDRESS_64 : HEAP_ADDRESS_32; + + // 在堆上分配空间 + HeapSegment* segment = nullptr; + auto it = sb->m_heapSegments.find(heap_handle); + if (it != sb->m_heapSegments.end()) { + segment = it->second; + } else { + // 创建新的堆段 + segment = sb->CreateHeapSegment(heap_handle, 0x10000); + sb->m_heapSegments[heap_handle] = segment; + } + + if (segment) { + fmode_address = sb->AllocateFromSegment(segment, sizeof(int32_t)); + if (fmode_address) { + // 初始化 _fmode 为文本模式 + uc_mem_write(uc, fmode_address, &fmode_value, sizeof(int32_t)); + printf( + "[*] __p__fmode: Allocated _fmode at 0x%llx with value " + "%d\n", + fmode_address, fmode_value); + } + } + } + + // 返回 _fmode 变量的地址 + printf("[*] __p__fmode: Returning address 0x%llx\n", fmode_address); + + // 设置返回值 + if (sb->GetPeInfo()->isX64) { + uc_reg_write(uc, UC_X86_REG_RAX, &fmode_address); + } else { + uint32_t eax = static_cast(fmode_address); + uc_reg_write(uc, UC_X86_REG_EAX, &eax); + } +} + auto Sandbox::InitApiHooks() -> void { auto FakeApi_GetSystemTimeAsFileTime = _fakeApi{.func = Api_GetSystemTimeAsFileTime, .paramCount = 1}; @@ -2241,6 +2318,9 @@ auto Sandbox::InitApiHooks() -> void { _fakeApi{.func = Api_SetUnhandledExceptionFilter, .paramCount = 1}; auto FakeApi_VirtualProtect = _fakeApi{.func = Api_VirtualProtect, .paramCount = 4}; + auto FakeApi___set_app_type = + _fakeApi{.func = Api___set_app_type, .paramCount = 1}; + auto FakeApi___p__fmode = _fakeApi{.func = Api___p__fmode, .paramCount = 0}; api_map = { {"GetSystemTimeAsFileTime", @@ -2300,6 +2380,8 @@ auto Sandbox::InitApiHooks() -> void { {"SetUnhandledExceptionFilter", std::make_shared<_fakeApi>(FakeApi_SetUnhandledExceptionFilter)}, {"VirtualProtect", std::make_shared<_fakeApi>(FakeApi_VirtualProtect)}, + {"__set_app_type", std::make_shared<_fakeApi>(FakeApi___set_app_type)}, + {"__p__fmode", std::make_shared<_fakeApi>(FakeApi___p__fmode)}, }; } auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip, @@ -2310,16 +2392,13 @@ auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip, // 获取参数数量 int paramCount = it->second->paramCount; - - // 获取当前的栈指针 + uint32_t esp; uint64_t rsp; - uc_reg_read(uc, - this->GetPeInfo()->isX64 ? UC_X86_REG_RSP : UC_X86_REG_ESP, - &rsp); // 从栈上读取返回地址 uint64_t return_address; if (this->GetPeInfo()->isX64) { // 64位系统 + uc_reg_read(uc, UC_X86_REG_RSP, &rsp); // 读取8字节的返回地址 uc_mem_read(uc, rsp, &return_address, 8); @@ -2332,21 +2411,24 @@ auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip, uc_reg_write(uc, UC_X86_REG_RIP, &return_address); } else { // 32位系统 // 读取4字节的返回地址 - uint32_t return_address_32; - uc_mem_read(uc, rsp, &return_address_32, 4); + uc_reg_read(uc, UC_X86_REG_ESP, &esp); + uc_mem_read(uc, esp, &return_address, 4); + uint32_t return_address_32; + uc_mem_read(uc, esp, &return_address_32, 4); + printf("return_address_32: %x\n", return_address_32); // x86下,所有参数都通过栈传递 // 调整栈指针:每个参数4字节 + 返回地址4字节 - rsp += (paramCount * 4) + 4; - + esp += (paramCount * 4) + 4; // 设置EIP为返回地址 uc_reg_write(uc, UC_X86_REG_EIP, &return_address_32); } + if (this->GetPeInfo()->isX64) { + uc_reg_write(uc, UC_X86_REG_RSP, &rsp); + } else { + uc_reg_write(uc, UC_X86_REG_ESP, &esp); + } - // 更新栈指针,使用正确的寄存器 - uc_reg_write(uc, - this->GetPeInfo()->isX64 ? UC_X86_REG_RSP : UC_X86_REG_ESP, - &rsp); return; } printf("ApiName: %s not found\n", ApiName.c_str()); diff --git a/ai_anti_malware/sandbox_callbacks.cpp b/ai_anti_malware/sandbox_callbacks.cpp index c7e47e5..43ba07f 100644 --- a/ai_anti_malware/sandbox_callbacks.cpp +++ b/ai_anti_malware/sandbox_callbacks.cpp @@ -1,5 +1,4 @@ #include "sandbox_callbacks.h" -#define LOG_LEVEL 0 namespace sandboxCallbacks { void handleCodeRun(uc_engine* uc, uint64_t address, uint32_t size, void* userData) { @@ -236,6 +235,7 @@ void handleMemoryUnmapRead(uc_engine* uc, uc_mem_type type, uint64_t address, printf("[handleMemoryUnmapRead] Address: %p Size: %p Value: %p\n", address, size, value); dumpVmenv(uc, userData); + __debugbreak(); } void handleMemoryWrite(uc_engine* uc, uc_mem_type type, uint64_t address,