342 lines
11 KiB
C++
342 lines
11 KiB
C++
// ai_anti_malware.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
|
|
//
|
|
|
|
#include "head.h"
|
|
enum class DetectEngineType {
|
|
kNone,
|
|
kMachineLearning,
|
|
kSandbox,
|
|
kPeStruct,
|
|
kYaraScan
|
|
};
|
|
|
|
auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
|
|
auto sampleInfo = std::make_shared<BasicPeInfo>();
|
|
sampleInfo->inputFilePath = inputFilePath;
|
|
|
|
sampleInfo->peBuffer =
|
|
peconv::load_pe_module((const char*)sampleInfo->inputFilePath.c_str(),
|
|
sampleInfo->peSize, false, false);
|
|
if (sampleInfo->peBuffer == nullptr) {
|
|
return nullptr;
|
|
}
|
|
sampleInfo->ntHead64 = peconv::get_nt_hdrs64((BYTE*)sampleInfo->peBuffer);
|
|
sampleInfo->ntHead32 = peconv::get_nt_hdrs32((BYTE*)sampleInfo->peBuffer);
|
|
sampleInfo->isX64 = peconv::is64bit((BYTE*)sampleInfo->peBuffer);
|
|
sampleInfo->RecImageBase =
|
|
sampleInfo->isX64
|
|
? (DWORD64)sampleInfo->ntHead64->OptionalHeader.ImageBase
|
|
: (DWORD)sampleInfo->ntHead32->OptionalHeader.ImageBase;
|
|
sampleInfo->isRelocated =
|
|
peconv::relocate_module((BYTE*)sampleInfo->peBuffer, sampleInfo->peSize,
|
|
sampleInfo->RecImageBase);
|
|
|
|
sampleInfo->entryPoint =
|
|
sampleInfo->isX64
|
|
? sampleInfo->ntHead64->OptionalHeader.AddressOfEntryPoint
|
|
: sampleInfo->ntHead32->OptionalHeader.AddressOfEntryPoint;
|
|
sampleInfo->imageEnd =
|
|
sampleInfo->RecImageBase +
|
|
(sampleInfo->isX64 ? sampleInfo->ntHead64->OptionalHeader.SizeOfImage
|
|
: sampleInfo->ntHead32->OptionalHeader.SizeOfImage);
|
|
sampleInfo->isDll = peconv::is_module_dll((BYTE*)sampleInfo->peBuffer);
|
|
printf("Debug - Memory mapping parameters:\n");
|
|
printf("RecImageBase: 0x%llx\n", sampleInfo->RecImageBase);
|
|
printf("peSize: 0x%llx\n", sampleInfo->peSize);
|
|
printf("Page aligned base: 0x%llx\n", sampleInfo->RecImageBase & ~0xFFF);
|
|
printf("Page aligned size: 0x%llx\n",
|
|
(sampleInfo->peSize + 0xFFF) & ~0xFFF);
|
|
sampleInfo->RecImageBase = sampleInfo->RecImageBase & ~0xFFF;
|
|
sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF;
|
|
return sampleInfo;
|
|
}
|
|
// 搜集恶意软件特征的.
|
|
int doMl(int argc, char* argv[]) {
|
|
// 检查命令行参数
|
|
if (argc < 3) {
|
|
std::cout << "用法: " << argv[0] << " <样本目录路径> <输出CSV路径>"
|
|
<< std::endl;
|
|
std::cout << "或者: " << argv[0]
|
|
<< " -single <单个文件路径> <输出CSV路径>" << std::endl;
|
|
return 1;
|
|
}
|
|
MachineLearning ml;
|
|
|
|
if (std::string(argv[1]) == "-single") {
|
|
// 处理单个文件
|
|
if (argc < 4) {
|
|
std::cout << "处理单个文件时需要提供文件路径和输出CSV路径"
|
|
<< std::endl;
|
|
return 1;
|
|
}
|
|
|
|
std::string filePath = argv[2];
|
|
std::string csvPath = argv[3];
|
|
|
|
// 读取文件
|
|
std::vector<uint8_t> buffer = ml.ReadFileToBuffer(filePath);
|
|
if (buffer.empty()) {
|
|
std::cerr << "无法读取文件: " << filePath << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
// 提取特征
|
|
std::vector<double> features =
|
|
ml.ExtractFeatures(buffer.data(), buffer.size());
|
|
if (features.empty()) {
|
|
std::cerr << "无法从文件提取特征: " << filePath << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
// 导出到CSV
|
|
if (!ml.ExportToCSV(features, csvPath)) {
|
|
std::cerr << "无法导出到CSV文件: " << csvPath << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
std::cout << "成功处理文件并导出特征到: " << csvPath << std::endl;
|
|
} else {
|
|
// 处理目录
|
|
std::string dirPath = argv[1];
|
|
std::string csvPath = argv[2];
|
|
|
|
std::cout << "开始处理目录: " << dirPath << std::endl;
|
|
std::cout << "特征将导出到: " << csvPath << std::endl;
|
|
|
|
if (!ml.ProcessDirectory(dirPath, csvPath)) {
|
|
std::cerr << "处理目录时发生错误" << std::endl;
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
};
|
|
int doPredict(int argc, char* argv[]) {
|
|
if (argc < 2) {
|
|
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
|
|
return 1;
|
|
}
|
|
std::string filePath = argv[1];
|
|
MachineLearning ml;
|
|
double score = 1 - ml.PredictMalwareFromFile(filePath);
|
|
if (score >= 0) {
|
|
std::cout << "文件 " << filePath << " 的恶意软件得分: " << score
|
|
<< std::endl;
|
|
if (score > 0.5) {
|
|
std::cout << "警告: 这个文件可能是恶意软件!" << std::endl;
|
|
} else {
|
|
std::cout << "这个文件可能是安全的。" << std::endl;
|
|
}
|
|
} else {
|
|
std::cout << "无法分析文件。" << std::endl;
|
|
}
|
|
}
|
|
class PeStructAnalyzer {
|
|
public:
|
|
PeStructAnalyzer() = default;
|
|
~PeStructAnalyzer() = default;
|
|
|
|
bool AnalyzePe(const std::shared_ptr<BasicPeInfo>& peInfo) {
|
|
if (!peInfo || !peInfo->peBuffer) {
|
|
return false;
|
|
}
|
|
|
|
bool isSuspicious = false;
|
|
|
|
// 检查导入表
|
|
if (HasNoImports(peInfo)) {
|
|
std::cout << "警告: 未发现导入表,这是一个可疑特征" << std::endl;
|
|
isSuspicious = true;
|
|
}
|
|
|
|
// 检查节表异常
|
|
auto [hasSuspiciousSections, suspiciousReason] =
|
|
AnalyzeSections(peInfo);
|
|
if (hasSuspiciousSections) {
|
|
std::cout << "警告: " << suspiciousReason << std::endl;
|
|
isSuspicious = true;
|
|
}
|
|
|
|
return isSuspicious;
|
|
}
|
|
|
|
private:
|
|
static constexpr DWORD MAX_REASONABLE_SECTION_COUNT = 20; // 最大合理区段数
|
|
static constexpr DWORD MAX_EXECUTABLE_SECTIONS = 3; // 最大可执行区段数
|
|
static constexpr DWORD MAX_SECTION_SIZE = 0x10000000; // 256MB
|
|
static constexpr DWORD SECTION_ALIGNMENT = 0x1000; // 4KB对齐
|
|
static constexpr DWORD SUSPICIOUS_ENTROPY_THRESHOLD = 7; // 熵值阈值
|
|
|
|
bool HasNoImports(const std::shared_ptr<BasicPeInfo>& peInfo) {
|
|
PIMAGE_DATA_DIRECTORY importDir = nullptr;
|
|
if (peInfo->isX64) {
|
|
importDir = &peInfo->ntHead64->OptionalHeader
|
|
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
|
|
} else {
|
|
importDir = &peInfo->ntHead32->OptionalHeader
|
|
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
|
|
}
|
|
|
|
return (importDir->VirtualAddress == 0 || importDir->Size == 0);
|
|
}
|
|
|
|
std::pair<bool, std::string> AnalyzeSections(
|
|
const std::shared_ptr<BasicPeInfo>& peInfo) {
|
|
PIMAGE_SECTION_HEADER firstSection = nullptr;
|
|
WORD numberOfSections = 0;
|
|
|
|
if (peInfo->isX64) {
|
|
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead64);
|
|
numberOfSections = peInfo->ntHead64->FileHeader.NumberOfSections;
|
|
} else {
|
|
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead32);
|
|
numberOfSections = peInfo->ntHead32->FileHeader.NumberOfSections;
|
|
}
|
|
|
|
// 检查区段数量是否异常
|
|
if (numberOfSections > MAX_REASONABLE_SECTION_COUNT) {
|
|
return {true, "区段数量异常: " + std::to_string(numberOfSections) +
|
|
" > " +
|
|
std::to_string(MAX_REASONABLE_SECTION_COUNT)};
|
|
}
|
|
|
|
// 统计可执行区段数量
|
|
int executableSections = 0;
|
|
bool hasWritableExecutableSection = false;
|
|
bool hasZeroSizedSection = false;
|
|
bool hasOversizedSection = false;
|
|
bool hasMisalignedSection = false;
|
|
|
|
for (WORD i = 0; i < numberOfSections; i++) {
|
|
const auto& section = firstSection[i];
|
|
|
|
// 检查区段属性
|
|
if (section.Characteristics & IMAGE_SCN_MEM_EXECUTE) {
|
|
executableSections++;
|
|
|
|
// 检查是否同时具有可写和可执行属性
|
|
if (section.Characteristics & IMAGE_SCN_MEM_WRITE) {
|
|
hasWritableExecutableSection = true;
|
|
}
|
|
}
|
|
|
|
// 检查区段大小
|
|
if (section.SizeOfRawData == 0 && section.Misc.VirtualSize > 0) {
|
|
hasZeroSizedSection = true;
|
|
}
|
|
|
|
if (section.SizeOfRawData > MAX_SECTION_SIZE) {
|
|
hasOversizedSection = true;
|
|
}
|
|
|
|
// 检查对齐
|
|
if (section.VirtualAddress % SECTION_ALIGNMENT != 0) {
|
|
hasMisalignedSection = true;
|
|
}
|
|
}
|
|
|
|
// 返回检测结果
|
|
if (executableSections > MAX_EXECUTABLE_SECTIONS) {
|
|
return {true, "可执行区段数量过多: " +
|
|
std::to_string(executableSections)};
|
|
}
|
|
|
|
if (hasWritableExecutableSection) {
|
|
return {true, "发现同时具有可写和可执行属性的区段"};
|
|
}
|
|
|
|
if (hasZeroSizedSection) {
|
|
return {true, "发现大小异常的区段"};
|
|
}
|
|
|
|
if (hasOversizedSection) {
|
|
return {true, "发现过大的区段"};
|
|
}
|
|
|
|
if (hasMisalignedSection) {
|
|
return {true, "发现未正确对齐的区段"};
|
|
}
|
|
|
|
return {false, ""};
|
|
}
|
|
};
|
|
|
|
class DetectEngine {
|
|
public:
|
|
DetectEngine();
|
|
~DetectEngine();
|
|
DetectEngineType DetectMalware(std::string filePath);
|
|
};
|
|
DetectEngine::DetectEngine() {}
|
|
DetectEngine::~DetectEngine() {}
|
|
DetectEngineType DetectEngine::DetectMalware(std::string filePath) {
|
|
auto peInfo = getPeInfo(filePath);
|
|
if (peInfo == nullptr) {
|
|
return DetectEngineType::kNone;
|
|
}
|
|
|
|
// PE结构分析
|
|
PeStructAnalyzer peAnalyzer;
|
|
if (peAnalyzer.AnalyzePe(peInfo)) {
|
|
return DetectEngineType::kPeStruct;
|
|
}
|
|
|
|
// 先机器学习引擎
|
|
MachineLearning ml;
|
|
double score = 1 - ml.PredictMalwareFromFile(filePath);
|
|
if (score >= 0) {
|
|
printf("machine learning score: %f\n", score);
|
|
if (score > 0.5) {
|
|
return DetectEngineType::kMachineLearning;
|
|
}
|
|
}
|
|
|
|
// 再沙盒引擎
|
|
Sandbox se;
|
|
se.InitEnv(peInfo);
|
|
se.Run();
|
|
if (se.GetMalwareAnalysisType() == MalwareAnalysisType::kSuspicious ||
|
|
se.GetMalwareAnalysisType() == MalwareAnalysisType::kMalware) {
|
|
return DetectEngineType::kSandbox;
|
|
}
|
|
|
|
return DetectEngineType::kNone;
|
|
}
|
|
auto doMalwareScan(int argc, char* argv[]) -> void {
|
|
DetectEngine scanner;
|
|
if (argc < 2) {
|
|
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
|
|
return;
|
|
}
|
|
std::string filePath = argv[1];
|
|
auto sampleType = scanner.DetectMalware(filePath);
|
|
printf("sample type: %d \n", sampleType);
|
|
}
|
|
|
|
int doSandbox(int argc, char* argv[]) {
|
|
|
|
//if (argc < 3) {
|
|
// std::cout << "用法: " << argv[0] << " <文件路径> <地址>" << std::endl;
|
|
// return;
|
|
//}
|
|
//std::string filePath = argv[1];
|
|
|
|
std::string filePath = "Z:\\mso.dll";
|
|
|
|
auto peInfo = getPeInfo(filePath);
|
|
if (peInfo == nullptr) {
|
|
return 0;
|
|
}
|
|
Sandbox se;
|
|
se.InitEnv(peInfo);
|
|
se.Run(0x180003980);
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
// doMl(argc, argv);
|
|
// doPredict(argc, argv);
|
|
//doMalwareScan(argc, argv);
|
|
doSandbox(argc, argv);
|
|
return 0;
|
|
}
|