Update project documentation and enhance malware detection engine

- Completely rewrite README.md with comprehensive project overview and technical details
- Add detailed explanation of antivirus engine architecture and detection strategies
- Implement multi-stage malware detection with machine learning, sandbox, and PE structure analysis
- Update project configuration and add new source files for enhanced detection capabilities
- Integrate XGBoost machine learning model with C++ export functionality
- Improve sandbox environment with advanced module and LDR data table handling
- Remove legacy Python prediction and training scripts in favor of C++ implementation
This commit is contained in:
Huoji's
2025-03-09 21:59:22 +08:00
parent 51f929abfa
commit 60c4ef5f58
23 changed files with 46102 additions and 1717 deletions

View File

@@ -9,7 +9,6 @@
#include <sstream>
#include <cfloat>
#include <filesystem>
// 确保std命名空间中的函数可用
using std::max;
using std::min;
@@ -855,4 +854,30 @@ bool MachineLearning::ProcessDirectory(const std::string& directoryPath,
printf("ML Process Result, success count: %d fail count: %d \n",
processedCount, failedCount);
return true;
}
double MachineLearning::PredictMalware(const uint8_t* buffer,
size_t bufferSize) {
// 提取特征
std::vector<double> features = ExtractFeatures(buffer, bufferSize);
// 如果特征提取失败,返回-1.0表示无法预测
if (features.empty()) {
return -1.0;
}
// 将特征向量传递给XGBoost模型
return score(features.data());
}
//返回的是白文件的概率
double MachineLearning::PredictMalwareFromFile(const std::string& filePath) {
// 读取文件
std::vector<uint8_t> fileBuffer = ReadFileToBuffer(filePath);
if (fileBuffer.empty()) {
std::cerr << "无法读取文件: " << filePath << std::endl;
return -1.0;
}
// 使用缓冲区进行预测
return PredictMalware(fileBuffer.data(), fileBuffer.size());
}