This commit is contained in:
Huoji's
2025-03-09 03:19:40 +08:00
parent 1cea516cf7
commit defe59ffe8
7 changed files with 337 additions and 59 deletions

View File

@@ -29,28 +29,100 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
sampleInfo->RecImageBase + sampleInfo->RecImageBase +
(sampleInfo->isX64 ? sampleInfo->ntHead64->OptionalHeader.SizeOfImage (sampleInfo->isX64 ? sampleInfo->ntHead64->OptionalHeader.SizeOfImage
: sampleInfo->ntHead32->OptionalHeader.SizeOfImage); : sampleInfo->ntHead32->OptionalHeader.SizeOfImage);
printf("Debug - Memory mapping parameters:\n");
printf("RecImageBase: 0x%llx\n", sampleInfo->RecImageBase);
printf("peSize: 0x%llx\n", sampleInfo->peSize);
printf("Page aligned base: 0x%llx\n", sampleInfo->RecImageBase & ~0xFFF);
printf("Page aligned size: 0x%llx\n",
(sampleInfo->peSize + 0xFFF) & ~0xFFF);
sampleInfo->RecImageBase = sampleInfo->RecImageBase & ~0xFFF;
sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF;
return sampleInfo; return sampleInfo;
} }
int main() { int doMl(int argc, char* argv[]) {
auto sampleInfo = getPeInfo("z:\\Console_Test.exe"); // 检查命令行参数
// auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe"); if (argc < 3) {
printf("input new file %s \n", sampleInfo->inputFilePath); std::cout << "用法: " << argv[0] << " <样本目录路径> <输出CSV路径>"
printf("is x64: %d\n", sampleInfo->isX64); << std::endl;
printf("is relocated: %d\n", sampleInfo->isRelocated); std::cout << "或者: " << argv[0]
printf("RecImageBase: %llx\n", sampleInfo->RecImageBase); << " -single <单个文件路径> <输出CSV路径>" << std::endl;
auto sandbox = std::make_shared<Sandbox>(); return 1;
sandbox->InitEnv(sampleInfo);
sandbox->Run();
auto [peBuffer, peSize] = sandbox->DumpPE();
if (peBuffer) {
printf("peBuffer: %p\n", peBuffer.get());
printf("peSize: %d\n", peSize);
// peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize);
MachineLearning ml;
ml.ExtractFeatures(peBuffer.get(), peSize, "z:\\features.txt");
} }
peBuffer.release(); MachineLearning ml;
if (std::string(argv[1]) == "-single") {
// 处理单个文件
if (argc < 4) {
std::cout << "处理单个文件时需要提供文件路径和输出CSV路径"
<< std::endl;
return 1;
}
std::string filePath = argv[2];
std::string csvPath = argv[3];
// 读取文件
std::vector<uint8_t> buffer = ml.ReadFileToBuffer(filePath);
if (buffer.empty()) {
std::cerr << "无法读取文件: " << filePath << std::endl;
return 1;
}
// 提取特征
std::vector<double> features =
ml.ExtractFeatures(buffer.data(), buffer.size());
if (features.empty()) {
std::cerr << "无法从文件提取特征: " << filePath << std::endl;
return 1;
}
// 导出到CSV
if (!ml.ExportToCSV(features, csvPath)) {
std::cerr << "无法导出到CSV文件: " << csvPath << std::endl;
return 1;
}
std::cout << "成功处理文件并导出特征到: " << csvPath << std::endl;
} else {
// 处理目录
std::string dirPath = argv[1];
std::string csvPath = argv[2];
std::cout << "开始处理目录: " << dirPath << std::endl;
std::cout << "特征将导出到: " << csvPath << std::endl;
if (!ml.ProcessDirectory(dirPath, csvPath)) {
std::cerr << "处理目录时发生错误" << std::endl;
return 1;
}
}
return 0;
};
int main(int argc, char* argv[]) {
doMl(argc, argv);
/*
auto sampleInfo = getPeInfo(
"E:\\对战平台\\CrowAntiCheat\\CrowAntiCheat\\client\\Console_"
"Test\\Release\\Console_Test.exe");
// auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe");
printf("input new file %s \n", sampleInfo->inputFilePath);
printf("is x64: %d\n", sampleInfo->isX64);
printf("is relocated: %d\n", sampleInfo->isRelocated);
printf("RecImageBase: %llx\n", sampleInfo->RecImageBase);
auto sandbox = std::make_shared<Sandbox>();
sandbox->InitEnv(sampleInfo);
sandbox->Run();
auto [peBuffer, peSize] = sandbox->DumpPE();
if (peBuffer) {
printf("peBuffer: %p\n", peBuffer.get());
printf("peSize: %d\n", peSize);
// peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize);
MachineLearning ml;
ml.ExtractFeatures(peBuffer.get(), peSize);
}
peBuffer.release();
*/
system("pause"); system("pause");
return 0; return 0;
} }

View File

@@ -1,4 +1,6 @@
#pragma once #pragma once
#define LOG_LEVEL 0
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <iostream> #include <iostream>
#include <iostream> #include <iostream>

View File

@@ -1,4 +1,5 @@
#include "ml.h" #include "ml.h"
#include <Windows.h>
#include <array> #include <array>
#include <limits> #include <limits>
#include <algorithm> #include <algorithm>
@@ -7,6 +8,7 @@
#include <iomanip> #include <iomanip>
#include <sstream> #include <sstream>
#include <cfloat> #include <cfloat>
#include <filesystem>
// 确保std命名空间中的函数可用 // 确保std命名空间中的函数可用
using std::max; using std::max;
@@ -177,15 +179,14 @@ MachineLearning::~MachineLearning() {
// 析构函数,清理资源(如有必要) // 析构函数,清理资源(如有必要)
} }
bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize, std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
const std::string& outputPath) { size_t bufferSize) {
// 使用libpeconv解析PE文件 // 使用libpeconv解析PE文件
size_t v_size = 0; size_t v_size = 0;
BYTE* peBuffer = peconv::load_pe_module(const_cast<BYTE*>(buffer), BYTE* peBuffer = peconv::load_pe_module(const_cast<BYTE*>(buffer),
bufferSize, v_size, false, false); bufferSize, v_size, false, false);
if (!peBuffer) { if (!peBuffer) {
std::cerr << "无法加载PE文件" << std::endl; return std::vector<double>();
return false;
} }
// 解析PE信息 // 解析PE信息
@@ -202,7 +203,7 @@ bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize,
(PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer); (PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer);
if (!ntHeaders) { if (!ntHeaders) {
peconv::free_pe_buffer(peBuffer); peconv::free_pe_buffer(peBuffer);
return false; return std::vector<double>();
} }
// 从NT头部获取信息 // 从NT头部获取信息
@@ -392,13 +393,10 @@ bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize,
// 7. 节区数量 // 7. 节区数量
allFeatures.push_back(static_cast<double>(sections.size())); allFeatures.push_back(static_cast<double>(sections.size()));
// 导出特征到CSV
bool result = ExportToCSV(allFeatures, outputPath);
// 清理资源 // 清理资源
peconv::free_pe_buffer(peBuffer); peconv::free_pe_buffer(peBuffer);
return result; return allFeatures;
} }
std::vector<double> MachineLearning::EncodeProperties( std::vector<double> MachineLearning::EncodeProperties(
@@ -588,4 +586,124 @@ MachineLearning::GetOpcodeStatistics(const uint8_t* data, size_t dataSize,
bool isX64, const PeInfo& peInfo) { bool isX64, const PeInfo& peInfo) {
// 此函数未使用,但保留实现接口 // 此函数未使用,但保留实现接口
return std::make_tuple(std::vector<double>(), std::vector<int>()); return std::make_tuple(std::vector<double>(), std::vector<int>());
}
std::vector<uint8_t> MachineLearning::ReadFileToBuffer(
const std::string& filePath) {
std::ifstream fileStream(filePath, std::ios::binary | std::ios::ate);
if (!fileStream.is_open()) {
std::cerr << "无法打开文件: " << filePath << std::endl;
return std::vector<uint8_t>();
}
// 获取文件大小
std::streamsize fileSize = fileStream.tellg();
fileStream.seekg(0, std::ios::beg);
// 分配缓冲区并读取文件
std::vector<uint8_t> buffer(fileSize);
if (!fileStream.read(reinterpret_cast<char*>(buffer.data()), fileSize)) {
std::cerr << "读取文件失败: " << filePath << std::endl;
return std::vector<uint8_t>();
}
return buffer;
}
bool MachineLearning::ProcessDirectory(const std::string& directoryPath,
const std::string& outputCsvPath) {
// 打开CSV文件用于写入
std::ofstream csvFile(outputCsvPath);
if (!csvFile.is_open()) {
std::cerr << "无法创建CSV文件: " << outputCsvPath << std::endl;
return false;
}
/*
// 写入CSV标题行
csvFile << "文件路径";
for (size_t i = 0; i < _properties.size(); i++) {
csvFile << ",属性_" << i;
}
for (size_t i = 0; i < _libraries.size(); i++) {
csvFile << ",库_" << i;
}
csvFile << ",文件熵";
for (size_t i = 0; i < 64; i++) { // 前64个字节特征
csvFile << ",EP_" << i;
}
csvFile << ",节区数";
csvFile << ",平均熵";
csvFile << ",最大熵";
csvFile << ",归一化平均熵";
csvFile << ",节区大小比率";
csvFile << ",代码比率";
csvFile << ",节区计数";
csvFile << std::endl;
*/
// 递归遍历目录
WIN32_FIND_DATAA findData;
std::string searchPath = directoryPath + "\\*";
HANDLE hFind = FindFirstFileA(searchPath.c_str(), &findData);
if (hFind == INVALID_HANDLE_VALUE) {
std::cerr << "无法访问目录: " << directoryPath << std::endl;
csvFile.close();
return false;
}
int processedCount = 0;
int failedCount = 0;
do {
// 跳过 "." 和 ".." 目录
if (strcmp(findData.cFileName, ".") == 0 ||
strcmp(findData.cFileName, "..") == 0) {
continue;
}
std::string currentPath = directoryPath + "\\" + findData.cFileName;
if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
// 递归处理子目录
ProcessDirectory(currentPath, outputCsvPath);
} else {
// 处理文件
std::vector<uint8_t> fileBuffer = ReadFileToBuffer(currentPath);
if (fileBuffer.empty()) {
std::cerr << "跳过文件: " << currentPath << " (读取失败)"
<< std::endl;
failedCount++;
continue;
}
// 提取特征
std::vector<double> features =
ExtractFeatures(fileBuffer.data(), fileBuffer.size());
if (features.empty()) {
std::cerr << "跳过文件: " << currentPath << " (特征提取失败)"
<< std::endl;
failedCount++;
continue;
}
// 写入CSV
csvFile << currentPath;
for (const auto& feature : features) {
csvFile << "," << std::fixed << std::setprecision(6) << feature;
}
csvFile << std::endl;
processedCount++;
if (processedCount % 100 == 0) {
std::cout << "已处理 " << processedCount << " 个文件..."
<< std::endl;
}
}
} while (FindNextFileA(hFind, &findData));
FindClose(hFind);
csvFile.close();
printf("ML Process Result, success count: %d fail count: %d \n",
processedCount, failedCount);
return true;
} }

View File

@@ -62,9 +62,20 @@ class MachineLearning {
MachineLearning(); MachineLearning();
~MachineLearning(); ~MachineLearning();
// 主函数:提取特征并导出到CSV // 提取特征并返回特征向量
bool ExtractFeatures(const uint8_t* buffer, size_t bufferSize, std::vector<double> ExtractFeatures(const uint8_t* buffer,
const std::string& outputPath); size_t bufferSize);
// 将特征导出到CSV
bool ExportToCSV(const std::vector<double>& features,
const std::string& outputPath);
// 批量处理目录中的样本并生成CSV
bool ProcessDirectory(const std::string& directoryPath,
const std::string& outputCsvPath);
// 读取文件到内存
std::vector<uint8_t> ReadFileToBuffer(const std::string& filePath);
private: private:
// 特征提取辅助函数 // 特征提取辅助函数
@@ -81,10 +92,6 @@ class MachineLearning {
int GetOpcodeType(const void* code, bool isX64); int GetOpcodeType(const void* code, bool isX64);
double CalculateEntropy(const uint8_t* data, size_t size); double CalculateEntropy(const uint8_t* data, size_t size);
// 将特征导出到CSV
bool ExportToCSV(const std::vector<double>& features,
const std::string& outputPath);
// 常量定义 // 常量定义
std::vector<std::string> _properties; std::vector<std::string> _properties;
std::vector<std::string> _libraries; std::vector<std::string> _libraries;

View File

@@ -155,7 +155,7 @@ class cFixImprot : public peconv::t_function_resolver {
} }
} }
} }
__debugbreak(); //__debugbreak();
return nullptr; return nullptr;
} }
@@ -191,12 +191,6 @@ Sandbox::~Sandbox() {
} }
m_heapSegments.clear(); m_heapSegments.clear();
// 4. 清理栈内存
if (m_stackBuffer) {
free(m_stackBuffer);
m_stackBuffer = nullptr;
}
// 5. 最后清理底层资源 // 5. 最后清理底层资源
if (m_csHandle) { if (m_csHandle) {
cs_close(&m_csHandle); cs_close(&m_csHandle);
@@ -349,8 +343,9 @@ auto Sandbox::ResolveImportExports() -> void {
} }
const auto exports = ResolveExport(module->real_base); const auto exports = ResolveExport(module->real_base);
for (const auto item : exports) { for (const auto item : exports) {
printf("import export: [%s] %s => %llx\n", module->name, item->name, if (LOG_LEVEL > 0) {
item->function_address); printf("import export: [%s] %s => %llx\n", module->name, item->name, item->function_address);
}
module->export_function.push_back(item); module->export_function.push_back(item);
} }
} }
@@ -359,7 +354,9 @@ auto Sandbox::ResolveImportExports() -> void {
auto Sandbox::processImportModule(const moudle_import* importModule) -> void { auto Sandbox::processImportModule(const moudle_import* importModule) -> void {
for (auto module : m_moduleList) { for (auto module : m_moduleList) {
if (strcmp(module->name, importModule->dll_name) == 0) { if (strcmp(module->name, importModule->dll_name) == 0) {
printf("skip module name: %s (already loaded)\n", module->name); if (LOG_LEVEL > 0) {
printf("skip module name: %s (already loaded)\n", module->name);
}
return; return;
} }
} }

View File

@@ -2173,6 +2173,83 @@ auto Api_VirtualProtect(void* sandbox, uc_engine* uc, uint64_t address)
&result); &result);
} }
auto Api___set_app_type(void* sandbox, uc_engine* uc, uint64_t address)
-> void {
auto context = static_cast<Sandbox*>(sandbox);
int32_t appType = 0;
// 获取参数
if (context->GetPeInfo()->isX64) {
// x64: rcx = appType
uint64_t temp_type;
uc_reg_read(uc, UC_X86_REG_RCX, &temp_type);
appType = static_cast<int32_t>(temp_type);
} else {
// x86: 从栈上读取参数
uint32_t esp_address = 0;
uc_reg_read(uc, UC_X86_REG_ESP, &esp_address);
esp_address += 0x4; // 跳过返回地址
uc_mem_read(uc, esp_address, &appType, sizeof(int32_t));
}
// 简单地返回0表示成功
int32_t result = 0;
printf("[*] __set_app_type: AppType=%d\n", appType);
uc_reg_write(uc,
context->GetPeInfo()->isX64 ? UC_X86_REG_RAX : UC_X86_REG_EAX,
&result);
}
auto Api___p__fmode(void* sandbox, uc_engine* uc, uint64_t address) -> void {
auto sb = static_cast<Sandbox*>(sandbox);
// 检查是否已经创建了 _fmode 变量
static uint64_t fmode_address = 0;
static int32_t fmode_value = 0; // 默认为文本模式 (_O_TEXT)
if (fmode_address == 0) {
// 为 _fmode 变量分配内存
// 使用特定堆地址,与其他 API 一致
uint64_t heap_handle =
sb->GetPeInfo()->isX64 ? HEAP_ADDRESS_64 : HEAP_ADDRESS_32;
// 在堆上分配空间
HeapSegment* segment = nullptr;
auto it = sb->m_heapSegments.find(heap_handle);
if (it != sb->m_heapSegments.end()) {
segment = it->second;
} else {
// 创建新的堆段
segment = sb->CreateHeapSegment(heap_handle, 0x10000);
sb->m_heapSegments[heap_handle] = segment;
}
if (segment) {
fmode_address = sb->AllocateFromSegment(segment, sizeof(int32_t));
if (fmode_address) {
// 初始化 _fmode 为文本模式
uc_mem_write(uc, fmode_address, &fmode_value, sizeof(int32_t));
printf(
"[*] __p__fmode: Allocated _fmode at 0x%llx with value "
"%d\n",
fmode_address, fmode_value);
}
}
}
// 返回 _fmode 变量的地址
printf("[*] __p__fmode: Returning address 0x%llx\n", fmode_address);
// 设置返回值
if (sb->GetPeInfo()->isX64) {
uc_reg_write(uc, UC_X86_REG_RAX, &fmode_address);
} else {
uint32_t eax = static_cast<uint32_t>(fmode_address);
uc_reg_write(uc, UC_X86_REG_EAX, &eax);
}
}
auto Sandbox::InitApiHooks() -> void { auto Sandbox::InitApiHooks() -> void {
auto FakeApi_GetSystemTimeAsFileTime = auto FakeApi_GetSystemTimeAsFileTime =
_fakeApi{.func = Api_GetSystemTimeAsFileTime, .paramCount = 1}; _fakeApi{.func = Api_GetSystemTimeAsFileTime, .paramCount = 1};
@@ -2241,6 +2318,9 @@ auto Sandbox::InitApiHooks() -> void {
_fakeApi{.func = Api_SetUnhandledExceptionFilter, .paramCount = 1}; _fakeApi{.func = Api_SetUnhandledExceptionFilter, .paramCount = 1};
auto FakeApi_VirtualProtect = auto FakeApi_VirtualProtect =
_fakeApi{.func = Api_VirtualProtect, .paramCount = 4}; _fakeApi{.func = Api_VirtualProtect, .paramCount = 4};
auto FakeApi___set_app_type =
_fakeApi{.func = Api___set_app_type, .paramCount = 1};
auto FakeApi___p__fmode = _fakeApi{.func = Api___p__fmode, .paramCount = 0};
api_map = { api_map = {
{"GetSystemTimeAsFileTime", {"GetSystemTimeAsFileTime",
@@ -2300,6 +2380,8 @@ auto Sandbox::InitApiHooks() -> void {
{"SetUnhandledExceptionFilter", {"SetUnhandledExceptionFilter",
std::make_shared<_fakeApi>(FakeApi_SetUnhandledExceptionFilter)}, std::make_shared<_fakeApi>(FakeApi_SetUnhandledExceptionFilter)},
{"VirtualProtect", std::make_shared<_fakeApi>(FakeApi_VirtualProtect)}, {"VirtualProtect", std::make_shared<_fakeApi>(FakeApi_VirtualProtect)},
{"__set_app_type", std::make_shared<_fakeApi>(FakeApi___set_app_type)},
{"__p__fmode", std::make_shared<_fakeApi>(FakeApi___p__fmode)},
}; };
} }
auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip, auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip,
@@ -2310,16 +2392,13 @@ auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip,
// 获取参数数量 // 获取参数数量
int paramCount = it->second->paramCount; int paramCount = it->second->paramCount;
uint32_t esp;
// 获取当前的栈指针
uint64_t rsp; uint64_t rsp;
uc_reg_read(uc,
this->GetPeInfo()->isX64 ? UC_X86_REG_RSP : UC_X86_REG_ESP,
&rsp);
// 从栈上读取返回地址 // 从栈上读取返回地址
uint64_t return_address; uint64_t return_address;
if (this->GetPeInfo()->isX64) { // 64位系统 if (this->GetPeInfo()->isX64) { // 64位系统
uc_reg_read(uc, UC_X86_REG_RSP, &rsp);
// 读取8字节的返回地址 // 读取8字节的返回地址
uc_mem_read(uc, rsp, &return_address, 8); uc_mem_read(uc, rsp, &return_address, 8);
@@ -2332,21 +2411,24 @@ auto Sandbox::EmulateApi(uc_engine* uc, uint64_t address, uint64_t rip,
uc_reg_write(uc, UC_X86_REG_RIP, &return_address); uc_reg_write(uc, UC_X86_REG_RIP, &return_address);
} else { // 32位系统 } else { // 32位系统
// 读取4字节的返回地址 // 读取4字节的返回地址
uint32_t return_address_32; uc_reg_read(uc, UC_X86_REG_ESP, &esp);
uc_mem_read(uc, rsp, &return_address_32, 4); uc_mem_read(uc, esp, &return_address, 4);
uint32_t return_address_32;
uc_mem_read(uc, esp, &return_address_32, 4);
printf("return_address_32: %x\n", return_address_32);
// x86下所有参数都通过栈传递 // x86下所有参数都通过栈传递
// 调整栈指针每个参数4字节 + 返回地址4字节 // 调整栈指针每个参数4字节 + 返回地址4字节
rsp += (paramCount * 4) + 4; esp += (paramCount * 4) + 4;
// 设置EIP为返回地址 // 设置EIP为返回地址
uc_reg_write(uc, UC_X86_REG_EIP, &return_address_32); uc_reg_write(uc, UC_X86_REG_EIP, &return_address_32);
} }
if (this->GetPeInfo()->isX64) {
uc_reg_write(uc, UC_X86_REG_RSP, &rsp);
} else {
uc_reg_write(uc, UC_X86_REG_ESP, &esp);
}
// 更新栈指针,使用正确的寄存器
uc_reg_write(uc,
this->GetPeInfo()->isX64 ? UC_X86_REG_RSP : UC_X86_REG_ESP,
&rsp);
return; return;
} }
printf("ApiName: %s not found\n", ApiName.c_str()); printf("ApiName: %s not found\n", ApiName.c_str());

View File

@@ -1,5 +1,4 @@
#include "sandbox_callbacks.h" #include "sandbox_callbacks.h"
#define LOG_LEVEL 0
namespace sandboxCallbacks { namespace sandboxCallbacks {
void handleCodeRun(uc_engine* uc, uint64_t address, uint32_t size, void handleCodeRun(uc_engine* uc, uint64_t address, uint32_t size,
void* userData) { void* userData) {
@@ -236,6 +235,7 @@ void handleMemoryUnmapRead(uc_engine* uc, uc_mem_type type, uint64_t address,
printf("[handleMemoryUnmapRead] Address: %p Size: %p Value: %p\n", address, printf("[handleMemoryUnmapRead] Address: %p Size: %p Value: %p\n", address,
size, value); size, value);
dumpVmenv(uc, userData); dumpVmenv(uc, userData);
__debugbreak();
} }
void handleMemoryWrite(uc_engine* uc, uc_mem_type type, uint64_t address, void handleMemoryWrite(uc_engine* uc, uc_mem_type type, uint64_t address,