Add machine learning feature extraction for PE files

- Implemented MachineLearning class with ExtractFeatures method
- Updated project files to include new machine learning source and header files
- Modified main executable to call feature extraction
- Updated VSCode settings to include additional C++ headers
- Commented out previous file dumping code in main function
This commit is contained in:
Huoji's
2025-03-09 02:05:07 +08:00
parent d2ed7936df
commit 1cea516cf7
9 changed files with 790 additions and 33 deletions

View File

@@ -164,7 +164,44 @@ class cFixImprot : public peconv::t_function_resolver {
};
Sandbox::Sandbox() {}
Sandbox::~Sandbox() {}
Sandbox::~Sandbox() {
// 1. 先清理高层资源
m_crossSectionExecution.clear();
envStrings.clear();
api_map.clear();
m_moduleList.clear();
m_impFuncDict.clear();
m_exportFuncDict.clear();
// 2. 清理内存映射
if (m_ucEngine) {
uc_close(m_ucEngine);
m_ucEngine = nullptr;
}
// 3. 清理堆内存
for (auto& [address, segment] : m_heapSegments) {
HeapBlock* current = segment->blocks;
while (current) {
HeapBlock* next = current->next;
delete current;
current = next;
}
delete segment;
}
m_heapSegments.clear();
// 4. 清理栈内存
if (m_stackBuffer) {
free(m_stackBuffer);
m_stackBuffer = nullptr;
}
// 5. 最后清理底层资源
if (m_csHandle) {
cs_close(&m_csHandle);
}
}
auto Sandbox::PushModuleToVM(const char* dllName, uint64_t moduleBase) -> void {
for (auto module : m_moduleList) {
@@ -401,9 +438,9 @@ auto Sandbox::SetupVirtualMachine() -> void {
/*
映射 m_KSharedUserDataBase
*/
uint64_t m_KSharedUserDataBase = 0x7FFE0000;
m_KSharedUserDataBase = 0x7FFE0000;
uint64_t m_KSharedUserDataEnd = 0x7FFE0FFF; // 0x7FFE2000
uint64_t m_KSharedUserDataSize = AlignToSectionAlignment(
m_KSharedUserDataSize = AlignToSectionAlignment(
m_KSharedUserDataEnd - m_KSharedUserDataBase, PAGE_SIZE);
uc_mem_map(m_ucEngine, m_KSharedUserDataBase, m_KSharedUserDataSize,
@@ -663,29 +700,9 @@ auto Sandbox::Run() -> void {
InitApiHooks();
std::cout << "Starting execution at " << std::hex << entryPoint
<< std::endl;
err = uc_emu_start(m_ucEngine, entryPoint, m_peInfo->imageEnd, 0, 0);
if (err != UC_ERR_OK) {
std::cerr << "Emulation error: " << uc_strerror(err) << std::endl;
// 32位环境下的错误处理
if (!m_peInfo->isX64) {
uint32_t eip;
uc_reg_read(m_ucEngine, UC_X86_REG_EIP, &eip);
std::cerr << "Error occurred at EIP: 0x" << std::hex << eip
<< std::endl;
// 尝试读取当前指令
uint8_t instruction[16];
if (uc_mem_read(m_ucEngine, eip, instruction,
sizeof(instruction)) == UC_ERR_OK) {
std::cerr << "Instruction bytes: ";
for (int i = 0; i < 16; i++) {
printf("%02X ", instruction[i]);
}
std::cerr << std::endl;
}
}
}
uint64_t timeout = 60 * 1000;
err = uc_emu_start(m_ucEngine, entryPoint, m_peInfo->imageEnd, timeout, 0);
std::cerr << "Emulation error: " << uc_strerror(err) << std::endl;
}
auto Sandbox::GetEnvString() -> std::vector<wchar_t> {
@@ -909,11 +926,11 @@ auto Sandbox::DumpPE() -> std::pair<std::unique_ptr<BYTE[]>, size_t> {
reinterpret_cast<HMODULE>(moduleBuffer.get()),
module->base);
}
//这里有一个严重的问题,就懒得处理了:
//壳里面吐出来的代码的导入表和壳的导入表不是同样一个.
//这个修的是壳的 导入表,所以导入表 修 不 全
//有个很简单的办法,需要搜索IAT结构,然后修改脱壳后的IAT的字段到壳的字段里面,然后再执行一次fix_imports
//懒得写了,家庭作业.自己完成
// 这里有一个严重的问题,就懒得处理了:
// 壳里面吐出来的代码的导入表和壳的导入表不是同样一个.
// 这个修的是壳的 导入表,所以导入表 修 不 全
// 有个很简单的办法,需要搜索IAT结构,然后修改脱壳后的IAT的字段到壳的字段里面,然后再执行一次fix_imports
// 懒得写了,家庭作业.自己完成
bool importsFixed = peconv::fix_imports(
resultBuffer.get(), virtualMemorySize, exportsMap, nullptr);
if (importsFixed) {