Update project documentation and enhance malware detection engine
- Completely rewrite README.md with comprehensive project overview and technical details - Add detailed explanation of antivirus engine architecture and detection strategies - Implement multi-stage malware detection with machine learning, sandbox, and PE structure analysis - Update project configuration and add new source files for enhanced detection capabilities - Integrate XGBoost machine learning model with C++ export functionality - Improve sandbox environment with advanced module and LDR data table handling - Remove legacy Python prediction and training scripts in favor of C++ implementation
This commit is contained in:
@@ -2,6 +2,13 @@
|
||||
//
|
||||
|
||||
#include "head.h"
|
||||
enum class DetectEngineType {
|
||||
kNone,
|
||||
kMachineLearning,
|
||||
kSandbox,
|
||||
kPeStruct,
|
||||
kYaraScan
|
||||
};
|
||||
|
||||
auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
|
||||
auto sampleInfo = std::make_shared<BasicPeInfo>();
|
||||
@@ -10,6 +17,9 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
|
||||
sampleInfo->peBuffer =
|
||||
peconv::load_pe_module((const char*)sampleInfo->inputFilePath.c_str(),
|
||||
sampleInfo->peSize, false, false);
|
||||
if (sampleInfo->peBuffer == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
sampleInfo->ntHead64 = peconv::get_nt_hdrs64((BYTE*)sampleInfo->peBuffer);
|
||||
sampleInfo->ntHead32 = peconv::get_nt_hdrs32((BYTE*)sampleInfo->peBuffer);
|
||||
sampleInfo->isX64 = peconv::is64bit((BYTE*)sampleInfo->peBuffer);
|
||||
@@ -39,6 +49,7 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
|
||||
sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF;
|
||||
return sampleInfo;
|
||||
}
|
||||
// 搜集恶意软件特征的.
|
||||
int doMl(int argc, char* argv[]) {
|
||||
// 检查命令行参数
|
||||
if (argc < 3) {
|
||||
@@ -98,31 +109,210 @@ int doMl(int argc, char* argv[]) {
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
int main(int argc, char* argv[]) {
|
||||
doMl(argc, argv);
|
||||
/*
|
||||
auto sampleInfo = getPeInfo(
|
||||
"E:\\对战平台\\CrowAntiCheat\\CrowAntiCheat\\client\\Console_"
|
||||
"Test\\Release\\Console_Test.exe");
|
||||
// auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe");
|
||||
printf("input new file %s \n", sampleInfo->inputFilePath);
|
||||
printf("is x64: %d\n", sampleInfo->isX64);
|
||||
printf("is relocated: %d\n", sampleInfo->isRelocated);
|
||||
printf("RecImageBase: %llx\n", sampleInfo->RecImageBase);
|
||||
auto sandbox = std::make_shared<Sandbox>();
|
||||
sandbox->InitEnv(sampleInfo);
|
||||
sandbox->Run();
|
||||
auto [peBuffer, peSize] = sandbox->DumpPE();
|
||||
int doPredict(int argc, char* argv[]) {
|
||||
if (argc < 2) {
|
||||
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::string filePath = argv[1];
|
||||
MachineLearning ml;
|
||||
double score = 1 - ml.PredictMalwareFromFile(filePath);
|
||||
if (score >= 0) {
|
||||
std::cout << "文件 " << filePath << " 的恶意软件得分: " << score
|
||||
<< std::endl;
|
||||
if (score > 0.5) {
|
||||
std::cout << "警告: 这个文件可能是恶意软件!" << std::endl;
|
||||
} else {
|
||||
std::cout << "这个文件可能是安全的。" << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << "无法分析文件。" << std::endl;
|
||||
}
|
||||
}
|
||||
class PeStructAnalyzer {
|
||||
public:
|
||||
PeStructAnalyzer() = default;
|
||||
~PeStructAnalyzer() = default;
|
||||
|
||||
if (peBuffer) {
|
||||
printf("peBuffer: %p\n", peBuffer.get());
|
||||
printf("peSize: %d\n", peSize);
|
||||
// peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize);
|
||||
MachineLearning ml;
|
||||
ml.ExtractFeatures(peBuffer.get(), peSize);
|
||||
}
|
||||
peBuffer.release();
|
||||
*/
|
||||
system("pause");
|
||||
bool AnalyzePe(const std::shared_ptr<BasicPeInfo>& peInfo) {
|
||||
if (!peInfo || !peInfo->peBuffer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isSuspicious = false;
|
||||
|
||||
// 检查导入表
|
||||
if (HasNoImports(peInfo)) {
|
||||
std::cout << "警告: 未发现导入表,这是一个可疑特征" << std::endl;
|
||||
isSuspicious = true;
|
||||
}
|
||||
|
||||
// 检查节表异常
|
||||
auto [hasSuspiciousSections, suspiciousReason] =
|
||||
AnalyzeSections(peInfo);
|
||||
if (hasSuspiciousSections) {
|
||||
std::cout << "警告: " << suspiciousReason << std::endl;
|
||||
isSuspicious = true;
|
||||
}
|
||||
|
||||
return isSuspicious;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr DWORD MAX_REASONABLE_SECTION_COUNT = 20; // 最大合理区段数
|
||||
static constexpr DWORD MAX_EXECUTABLE_SECTIONS = 3; // 最大可执行区段数
|
||||
static constexpr DWORD MAX_SECTION_SIZE = 0x10000000; // 256MB
|
||||
static constexpr DWORD SECTION_ALIGNMENT = 0x1000; // 4KB对齐
|
||||
static constexpr DWORD SUSPICIOUS_ENTROPY_THRESHOLD = 7; // 熵值阈值
|
||||
|
||||
bool HasNoImports(const std::shared_ptr<BasicPeInfo>& peInfo) {
|
||||
PIMAGE_DATA_DIRECTORY importDir = nullptr;
|
||||
if (peInfo->isX64) {
|
||||
importDir = &peInfo->ntHead64->OptionalHeader
|
||||
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
|
||||
} else {
|
||||
importDir = &peInfo->ntHead32->OptionalHeader
|
||||
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
|
||||
}
|
||||
|
||||
return (importDir->VirtualAddress == 0 || importDir->Size == 0);
|
||||
}
|
||||
|
||||
std::pair<bool, std::string> AnalyzeSections(
|
||||
const std::shared_ptr<BasicPeInfo>& peInfo) {
|
||||
PIMAGE_SECTION_HEADER firstSection = nullptr;
|
||||
WORD numberOfSections = 0;
|
||||
|
||||
if (peInfo->isX64) {
|
||||
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead64);
|
||||
numberOfSections = peInfo->ntHead64->FileHeader.NumberOfSections;
|
||||
} else {
|
||||
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead32);
|
||||
numberOfSections = peInfo->ntHead32->FileHeader.NumberOfSections;
|
||||
}
|
||||
|
||||
// 检查区段数量是否异常
|
||||
if (numberOfSections > MAX_REASONABLE_SECTION_COUNT) {
|
||||
return {true, "区段数量异常: " + std::to_string(numberOfSections) +
|
||||
" > " +
|
||||
std::to_string(MAX_REASONABLE_SECTION_COUNT)};
|
||||
}
|
||||
|
||||
// 统计可执行区段数量
|
||||
int executableSections = 0;
|
||||
bool hasWritableExecutableSection = false;
|
||||
bool hasZeroSizedSection = false;
|
||||
bool hasOversizedSection = false;
|
||||
bool hasMisalignedSection = false;
|
||||
|
||||
for (WORD i = 0; i < numberOfSections; i++) {
|
||||
const auto& section = firstSection[i];
|
||||
|
||||
// 检查区段属性
|
||||
if (section.Characteristics & IMAGE_SCN_MEM_EXECUTE) {
|
||||
executableSections++;
|
||||
|
||||
// 检查是否同时具有可写和可执行属性
|
||||
if (section.Characteristics & IMAGE_SCN_MEM_WRITE) {
|
||||
hasWritableExecutableSection = true;
|
||||
}
|
||||
}
|
||||
|
||||
// 检查区段大小
|
||||
if (section.SizeOfRawData == 0 && section.Misc.VirtualSize > 0) {
|
||||
hasZeroSizedSection = true;
|
||||
}
|
||||
|
||||
if (section.SizeOfRawData > MAX_SECTION_SIZE) {
|
||||
hasOversizedSection = true;
|
||||
}
|
||||
|
||||
// 检查对齐
|
||||
if (section.VirtualAddress % SECTION_ALIGNMENT != 0) {
|
||||
hasMisalignedSection = true;
|
||||
}
|
||||
}
|
||||
|
||||
// 返回检测结果
|
||||
if (executableSections > MAX_EXECUTABLE_SECTIONS) {
|
||||
return {true, "可执行区段数量过多: " +
|
||||
std::to_string(executableSections)};
|
||||
}
|
||||
|
||||
if (hasWritableExecutableSection) {
|
||||
return {true, "发现同时具有可写和可执行属性的区段"};
|
||||
}
|
||||
|
||||
if (hasZeroSizedSection) {
|
||||
return {true, "发现大小异常的区段"};
|
||||
}
|
||||
|
||||
if (hasOversizedSection) {
|
||||
return {true, "发现过大的区段"};
|
||||
}
|
||||
|
||||
if (hasMisalignedSection) {
|
||||
return {true, "发现未正确对齐的区段"};
|
||||
}
|
||||
|
||||
return {false, ""};
|
||||
}
|
||||
};
|
||||
|
||||
class DetectEngine {
|
||||
public:
|
||||
DetectEngine();
|
||||
~DetectEngine();
|
||||
DetectEngineType DetectMalware(std::string filePath);
|
||||
};
|
||||
DetectEngine::DetectEngine() {}
|
||||
DetectEngine::~DetectEngine() {}
|
||||
DetectEngineType DetectEngine::DetectMalware(std::string filePath) {
|
||||
auto peInfo = getPeInfo(filePath);
|
||||
if (peInfo == nullptr) {
|
||||
return DetectEngineType::kNone;
|
||||
}
|
||||
|
||||
// PE结构分析
|
||||
PeStructAnalyzer peAnalyzer;
|
||||
if (peAnalyzer.AnalyzePe(peInfo)) {
|
||||
return DetectEngineType::kPeStruct;
|
||||
}
|
||||
|
||||
// 先机器学习引擎
|
||||
MachineLearning ml;
|
||||
double score = 1 - ml.PredictMalwareFromFile(filePath);
|
||||
if (score >= 0) {
|
||||
printf("machine learning score: %f\n", score);
|
||||
if (score > 0.5) {
|
||||
return DetectEngineType::kMachineLearning;
|
||||
}
|
||||
}
|
||||
|
||||
// 再沙盒引擎
|
||||
Sandbox se;
|
||||
se.InitEnv(peInfo);
|
||||
se.Run();
|
||||
if (se.GetMalwareAnalysisType() == MalwareAnalysisType::kSuspicious ||
|
||||
se.GetMalwareAnalysisType() == MalwareAnalysisType::kMalware) {
|
||||
return DetectEngineType::kSandbox;
|
||||
}
|
||||
|
||||
return DetectEngineType::kNone;
|
||||
}
|
||||
auto doMalwareScan(int argc, char* argv[]) -> void {
|
||||
DetectEngine scanner;
|
||||
if (argc < 2) {
|
||||
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
|
||||
return;
|
||||
}
|
||||
std::string filePath = argv[1];
|
||||
auto sampleType = scanner.DetectMalware(filePath);
|
||||
printf("sample type: %d \n", sampleType);
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
// doMl(argc, argv);
|
||||
// doPredict(argc, argv);
|
||||
doMalwareScan(argc, argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user