Update project documentation and enhance malware detection engine

- Completely rewrite README.md with comprehensive project overview and technical details - Add detailed explanation of antivirus engine architecture and detection strategies - Implement multi-stage malware detection with machine learning, sandbox, and PE structure analysis - Update project configuration and add new source files for enhanced detection capabilities - Integrate XGBoost machine learning model with C++ export functionality - Improve sandbox environment with advanced module and LDR data table handling - Remove legacy Python prediction and training scripts in favor of C++ implementation
2025-03-09 21:59:22 +08:00
parent 51f929abfa
commit 60c4ef5f58
23 changed files with 46102 additions and 1717 deletions
--- a/ai_anti_malware/ml.cpp
+++ b/ai_anti_malware/ml.cpp
@@ -9,7 +9,6 @@
 #include <sstream>
 #include <cfloat>
 #include <filesystem>
-
 // 确保std命名空间中的函数可用
 using std::max;
 using std::min;
@@ -855,4 +854,30 @@ bool MachineLearning::ProcessDirectory(const std::string& directoryPath,
    printf("ML Process Result, success count: %d fail count: %d \n",
           processedCount, failedCount);
    return true;
+}
+
+double MachineLearning::PredictMalware(const uint8_t* buffer,
+                                       size_t bufferSize) {
+    // 提取特征
+    std::vector<double> features = ExtractFeatures(buffer, bufferSize);
+
+    // 如果特征提取失败，返回-1.0表示无法预测
+    if (features.empty()) {
+        return -1.0;
+    }
+
+    // 将特征向量传递给XGBoost模型
+    return score(features.data());
+}
+//返回的是白文件的概率
+double MachineLearning::PredictMalwareFromFile(const std::string& filePath) {
+    // 读取文件
+    std::vector<uint8_t> fileBuffer = ReadFileToBuffer(filePath);
+    if (fileBuffer.empty()) {
+        std::cerr << "无法读取文件: " << filePath << std::endl;
+        return -1.0;
+    }
+
+    // 使用缓冲区进行预测
+    return PredictMalware(fileBuffer.data(), fileBuffer.size());
 }