Files
awesome_anti_virus_engine/ai_anti_malware/ai_anti_malware.cpp
Huoji's 3a6e331f31 update
2025-03-20 02:18:00 +08:00

337 lines
11 KiB
C++

// ai_anti_malware.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include "head.h"
enum class DetectEngineType {
kNone,
kMachineLearning,
kSandbox,
kPeStruct,
kYaraScan
};
auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
auto sampleInfo = std::make_shared<BasicPeInfo>();
sampleInfo->inputFilePath = inputFilePath;
sampleInfo->peBuffer =
peconv::load_pe_module((const char*)sampleInfo->inputFilePath.c_str(),
sampleInfo->peSize, false, false);
if (sampleInfo->peBuffer == nullptr) {
return nullptr;
}
sampleInfo->ntHead64 = peconv::get_nt_hdrs64((BYTE*)sampleInfo->peBuffer);
sampleInfo->ntHead32 = peconv::get_nt_hdrs32((BYTE*)sampleInfo->peBuffer);
sampleInfo->isX64 = peconv::is64bit((BYTE*)sampleInfo->peBuffer);
sampleInfo->RecImageBase =
sampleInfo->isX64
? (DWORD64)sampleInfo->ntHead64->OptionalHeader.ImageBase
: (DWORD)sampleInfo->ntHead32->OptionalHeader.ImageBase;
sampleInfo->isRelocated =
peconv::relocate_module((BYTE*)sampleInfo->peBuffer, sampleInfo->peSize,
sampleInfo->RecImageBase);
sampleInfo->entryPoint =
sampleInfo->isX64
? sampleInfo->ntHead64->OptionalHeader.AddressOfEntryPoint
: sampleInfo->ntHead32->OptionalHeader.AddressOfEntryPoint;
sampleInfo->imageEnd =
sampleInfo->RecImageBase +
(sampleInfo->isX64 ? sampleInfo->ntHead64->OptionalHeader.SizeOfImage
: sampleInfo->ntHead32->OptionalHeader.SizeOfImage);
sampleInfo->isDll = peconv::is_module_dll((BYTE*)sampleInfo->peBuffer);
printf("Debug - Memory mapping parameters:\n");
printf("RecImageBase: 0x%llx\n", sampleInfo->RecImageBase);
printf("peSize: 0x%llx\n", sampleInfo->peSize);
printf("Page aligned base: 0x%llx\n", sampleInfo->RecImageBase & ~0xFFF);
printf("Page aligned size: 0x%llx\n",
(sampleInfo->peSize + 0xFFF) & ~0xFFF);
sampleInfo->RecImageBase = sampleInfo->RecImageBase & ~0xFFF;
sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF;
return sampleInfo;
}
// 搜集恶意软件特征的.
int doMl(int argc, char* argv[]) {
// 检查命令行参数
if (argc < 3) {
std::cout << "用法: " << argv[0] << " <样本目录路径> <输出CSV路径>"
<< std::endl;
std::cout << "或者: " << argv[0]
<< " -single <单个文件路径> <输出CSV路径>" << std::endl;
return 1;
}
MachineLearning ml;
if (std::string(argv[1]) == "-single") {
// 处理单个文件
if (argc < 4) {
std::cout << "处理单个文件时需要提供文件路径和输出CSV路径"
<< std::endl;
return 1;
}
std::string filePath = argv[2];
std::string csvPath = argv[3];
// 读取文件
std::vector<uint8_t> buffer = ml.ReadFileToBuffer(filePath);
if (buffer.empty()) {
std::cerr << "无法读取文件: " << filePath << std::endl;
return 1;
}
// 提取特征
std::vector<double> features =
ml.ExtractFeatures(buffer.data(), buffer.size());
if (features.empty()) {
std::cerr << "无法从文件提取特征: " << filePath << std::endl;
return 1;
}
// 导出到CSV
if (!ml.ExportToCSV(features, csvPath)) {
std::cerr << "无法导出到CSV文件: " << csvPath << std::endl;
return 1;
}
std::cout << "成功处理文件并导出特征到: " << csvPath << std::endl;
} else {
// 处理目录
std::string dirPath = argv[1];
std::string csvPath = argv[2];
std::cout << "开始处理目录: " << dirPath << std::endl;
std::cout << "特征将导出到: " << csvPath << std::endl;
if (!ml.ProcessDirectory(dirPath, csvPath)) {
std::cerr << "处理目录时发生错误" << std::endl;
return 1;
}
}
return 0;
};
int doPredict(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
return 1;
}
std::string filePath = argv[1];
MachineLearning ml;
double score = 1 - ml.PredictMalwareFromFile(filePath);
if (score >= 0) {
std::cout << "文件 " << filePath << " 的恶意软件得分: " << score
<< std::endl;
if (score > 0.5) {
std::cout << "警告: 这个文件可能是恶意软件!" << std::endl;
} else {
std::cout << "这个文件可能是安全的。" << std::endl;
}
} else {
std::cout << "无法分析文件。" << std::endl;
}
}
class PeStructAnalyzer {
public:
PeStructAnalyzer() = default;
~PeStructAnalyzer() = default;
bool AnalyzePe(const std::shared_ptr<BasicPeInfo>& peInfo) {
if (!peInfo || !peInfo->peBuffer) {
return false;
}
bool isSuspicious = false;
// 检查导入表
if (HasNoImports(peInfo)) {
std::cout << "警告: 未发现导入表,这是一个可疑特征" << std::endl;
isSuspicious = true;
}
// 检查节表异常
auto [hasSuspiciousSections, suspiciousReason] =
AnalyzeSections(peInfo);
if (hasSuspiciousSections) {
std::cout << "警告: " << suspiciousReason << std::endl;
isSuspicious = true;
}
return isSuspicious;
}
private:
static constexpr DWORD MAX_REASONABLE_SECTION_COUNT = 20; // 最大合理区段数
static constexpr DWORD MAX_EXECUTABLE_SECTIONS = 3; // 最大可执行区段数
static constexpr DWORD MAX_SECTION_SIZE = 0x10000000; // 256MB
static constexpr DWORD SECTION_ALIGNMENT = 0x1000; // 4KB对齐
static constexpr DWORD SUSPICIOUS_ENTROPY_THRESHOLD = 7; // 熵值阈值
bool HasNoImports(const std::shared_ptr<BasicPeInfo>& peInfo) {
PIMAGE_DATA_DIRECTORY importDir = nullptr;
if (peInfo->isX64) {
importDir = &peInfo->ntHead64->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
} else {
importDir = &peInfo->ntHead32->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
}
return (importDir->VirtualAddress == 0 || importDir->Size == 0);
}
std::pair<bool, std::string> AnalyzeSections(
const std::shared_ptr<BasicPeInfo>& peInfo) {
PIMAGE_SECTION_HEADER firstSection = nullptr;
WORD numberOfSections = 0;
if (peInfo->isX64) {
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead64);
numberOfSections = peInfo->ntHead64->FileHeader.NumberOfSections;
} else {
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead32);
numberOfSections = peInfo->ntHead32->FileHeader.NumberOfSections;
}
// 检查区段数量是否异常
if (numberOfSections > MAX_REASONABLE_SECTION_COUNT) {
return {true, "区段数量异常: " + std::to_string(numberOfSections) +
" > " +
std::to_string(MAX_REASONABLE_SECTION_COUNT)};
}
// 统计可执行区段数量
int executableSections = 0;
bool hasWritableExecutableSection = false;
bool hasZeroSizedSection = false;
bool hasOversizedSection = false;
bool hasMisalignedSection = false;
for (WORD i = 0; i < numberOfSections; i++) {
const auto& section = firstSection[i];
// 检查区段属性
if (section.Characteristics & IMAGE_SCN_MEM_EXECUTE) {
executableSections++;
// 检查是否同时具有可写和可执行属性
if (section.Characteristics & IMAGE_SCN_MEM_WRITE) {
hasWritableExecutableSection = true;
}
}
// 检查区段大小
if (section.SizeOfRawData == 0 && section.Misc.VirtualSize > 0) {
hasZeroSizedSection = true;
}
if (section.SizeOfRawData > MAX_SECTION_SIZE) {
hasOversizedSection = true;
}
// 检查对齐
if (section.VirtualAddress % SECTION_ALIGNMENT != 0) {
hasMisalignedSection = true;
}
}
// 返回检测结果
if (executableSections > MAX_EXECUTABLE_SECTIONS) {
return {true, "可执行区段数量过多: " +
std::to_string(executableSections)};
}
if (hasWritableExecutableSection) {
return {true, "发现同时具有可写和可执行属性的区段"};
}
if (hasZeroSizedSection) {
return {true, "发现大小异常的区段"};
}
if (hasOversizedSection) {
return {true, "发现过大的区段"};
}
if (hasMisalignedSection) {
return {true, "发现未正确对齐的区段"};
}
return {false, ""};
}
};
class DetectEngine {
public:
DetectEngine();
~DetectEngine();
DetectEngineType DetectMalware(std::string filePath);
};
DetectEngine::DetectEngine() {}
DetectEngine::~DetectEngine() {}
DetectEngineType DetectEngine::DetectMalware(std::string filePath) {
auto peInfo = getPeInfo(filePath);
if (peInfo == nullptr) {
return DetectEngineType::kNone;
}
// PE结构分析
PeStructAnalyzer peAnalyzer;
if (peAnalyzer.AnalyzePe(peInfo)) {
return DetectEngineType::kPeStruct;
}
// 先机器学习引擎
MachineLearning ml;
double score = 1 - ml.PredictMalwareFromFile(filePath);
if (score >= 0) {
printf("machine learning score: %f\n", score);
if (score > 0.5) {
return DetectEngineType::kMachineLearning;
}
}
// 再沙盒引擎
Sandbox se;
se.InitEnv(peInfo);
se.Run();
if (se.GetMalwareAnalysisType() == MalwareAnalysisType::kSuspicious ||
se.GetMalwareAnalysisType() == MalwareAnalysisType::kMalware) {
return DetectEngineType::kSandbox;
}
return DetectEngineType::kNone;
}
auto doMalwareScan(int argc, char* argv[]) -> void {
DetectEngine scanner;
if (argc < 2) {
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
return;
}
std::string filePath = argv[1];
auto sampleType = scanner.DetectMalware(filePath);
printf("sample type: %d \n", sampleType);
}
int doSandbox(int argc, char* argv[]) {
std::string filePath = "Z:\\opengl32.dll";
auto peInfo = getPeInfo(filePath);
if (peInfo == nullptr) {
std::cout << "无法加载PE文件: " << filePath << std::endl;
return 1;
}
Sandbox se;
se.InitEnv(peInfo);
se.Run(0x10002F20);
return 0;
}
int main(int argc, char* argv[]) {
// doMl(argc, argv);
// doPredict(argc, argv);
// doMalwareScan(argc, argv);
doSandbox(argc, argv);
return 0;
}