Enhance entropy calculation with safety improvements in CalculateEntropy method

- Add size validation to prevent potential DoS attacks
- Implement a maximum file size limit of 2GB
- Add exception handling to prevent access violations
- Improve robustness of byte frequency calculation
- Add basic input validation for data and size parameters
This commit is contained in:
Huoji's
2025-03-09 04:09:24 +08:00
parent f00b0b2037
commit f80ba5d748

View File

@@ -614,15 +614,28 @@ std::vector<double> MachineLearning::EncodeSections(
}
double MachineLearning::CalculateEntropy(const uint8_t* data, size_t size) {
// 基本参数检查
if (!data || size == 0) {
return 0.0;
}
std::array<double, 256> frequencies = {};
// 添加合理性检查防止过大的size造成计算问题或DoS攻击
// 通常PE文件不应超过一定大小这里设置上限为2GB
constexpr size_t MAX_SAFE_SIZE = 2ULL * 1024 * 1024 * 1024; // 2GB
if (size > MAX_SAFE_SIZE) {
return 0.0;
}
// 统计每个字节的频率
for (size_t i = 0; i < size; i++) {
frequencies[data[i]] += 1.0;
std::array<double, 256> frequencies = {};
__try {
// 懒得JB处理了,累了.这里是不安全的
// 统计每个字节的频率
for (size_t i = 0; i < size; i++) {
uint8_t byteValue = data[i];
frequencies[byteValue] += 1.0;
}
} __except (EXCEPTION_EXECUTE_HANDLER) {
printf("skip file: (access violation)\n");
}
// 计算香农熵