135 lines
3.9 KiB
C++
135 lines
3.9 KiB
C++
#pragma once
|
|
#include "head.h"
|
|
#include <vector>
|
|
#include <string>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <cmath>
|
|
#include <fstream>
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <functional>
|
|
#include <unordered_map>
|
|
|
|
// 前向声明
|
|
struct PeInfo;
|
|
struct SectionInfo;
|
|
class BasicPeInfo;
|
|
|
|
// RVA转换为内存中的指针的辅助函数
|
|
inline BYTE* RvaToPtr(DWORD rva, BYTE* peBuffer) {
|
|
if (!peBuffer || rva == 0) return nullptr;
|
|
|
|
PIMAGE_NT_HEADERS ntHeaders =
|
|
(PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer);
|
|
if (!ntHeaders) return nullptr;
|
|
|
|
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(ntHeaders);
|
|
WORD numSections = ntHeaders->FileHeader.NumberOfSections;
|
|
|
|
for (WORD i = 0; i < numSections; i++, section++) {
|
|
// 检查RVA是否在这个节区范围内
|
|
if (rva >= section->VirtualAddress &&
|
|
rva < section->VirtualAddress + section->Misc.VirtualSize) {
|
|
// 计算文件偏移
|
|
DWORD offset =
|
|
rva - section->VirtualAddress + section->PointerToRawData;
|
|
return peBuffer + offset;
|
|
}
|
|
}
|
|
|
|
// 如果RVA在PE头部内
|
|
DWORD sizeOfHeaders = 0;
|
|
bool isX64 = peconv::is64bit(peBuffer);
|
|
|
|
if (isX64) {
|
|
PIMAGE_NT_HEADERS64 ntHeaders64 = (PIMAGE_NT_HEADERS64)ntHeaders;
|
|
sizeOfHeaders = ntHeaders64->OptionalHeader.SizeOfHeaders;
|
|
} else {
|
|
PIMAGE_NT_HEADERS32 ntHeaders32 = (PIMAGE_NT_HEADERS32)ntHeaders;
|
|
sizeOfHeaders = ntHeaders32->OptionalHeader.SizeOfHeaders;
|
|
}
|
|
|
|
if (rva < sizeOfHeaders) {
|
|
return peBuffer + rva;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
class MachineLearning {
|
|
public:
|
|
MachineLearning();
|
|
~MachineLearning();
|
|
|
|
// 提取特征并返回特征向量
|
|
std::vector<double> ExtractFeatures(const uint8_t* buffer,
|
|
size_t bufferSize);
|
|
|
|
// 将特征导出到CSV
|
|
bool ExportToCSV(const std::vector<double>& features,
|
|
const std::string& outputPath);
|
|
|
|
// 批量处理目录中的样本并生成CSV
|
|
bool ProcessDirectory(const std::string& directoryPath,
|
|
const std::string& outputCsvPath);
|
|
|
|
// 读取文件到内存
|
|
std::vector<uint8_t> ReadFileToBuffer(const std::string& filePath);
|
|
|
|
private:
|
|
// 特征提取辅助函数
|
|
std::vector<double> EncodeProperties(
|
|
const PeInfo& peInfo, const std::vector<std::string>& dllTables);
|
|
std::vector<double> EncodeEntrypoint(const std::vector<uint8_t>& epBytes);
|
|
std::vector<double> EncodeHistogram(const uint8_t* data, size_t size);
|
|
std::vector<double> EncodeLibraries(
|
|
const std::vector<std::string>& dllTable);
|
|
std::vector<double> EncodeSections(const std::vector<SectionInfo>& sections,
|
|
bool isX64);
|
|
std::tuple<std::vector<double>, std::vector<int>> GetOpcodeStatistics(
|
|
const uint8_t* data, size_t dataSize, bool isX64, const PeInfo& peInfo);
|
|
int GetOpcodeType(const void* code, bool isX64);
|
|
double CalculateEntropy(const uint8_t* data, size_t size);
|
|
|
|
// 常量定义
|
|
std::vector<std::string> _properties;
|
|
std::vector<std::string> _libraries;
|
|
std::unordered_map<std::string, int> _opcodeTypeDict;
|
|
};
|
|
|
|
// PE文件信息结构
|
|
struct PeInfo {
|
|
uint32_t addressOfEntryPoint;
|
|
uint32_t baseOfCode;
|
|
uint32_t sizeOfCode;
|
|
uint32_t sizeOfImage;
|
|
uint32_t sizeOfHeaders;
|
|
uint32_t characteristics;
|
|
uint32_t dllCharacteristics;
|
|
bool isX64;
|
|
|
|
// PE目录标志
|
|
bool hasConfiguration;
|
|
bool hasDebug;
|
|
bool hasExceptions;
|
|
bool hasExports;
|
|
bool hasImports;
|
|
bool hasNx; // NX兼容标志
|
|
bool hasRelocations;
|
|
bool hasResources;
|
|
bool hasSignatures;
|
|
bool hasTls;
|
|
bool hasDelayImports;
|
|
bool hasImageBase;
|
|
bool hasEntryIat;
|
|
bool hasRich;
|
|
};
|
|
|
|
// 节区信息结构
|
|
struct SectionInfo {
|
|
uint32_t characteristics;
|
|
double entropy;
|
|
uint32_t sizeOfRawData;
|
|
uint32_t virtualSize;
|
|
}; |