add statistical anomaly detection for process analysis

This commit is contained in:
Adir Shitrit
2025-11-08 11:47:15 +02:00
parent 5794afb4a0
commit 095123f405
2 changed files with 430 additions and 0 deletions

409
ghost-core/src/anomaly.rs Normal file
View File

@@ -0,0 +1,409 @@
use crate::{GhostError, ProcessInfo, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessFeatures {
pub pid: u32,
pub parent_pid: u32,
pub thread_count: u32,
pub memory_regions: usize,
pub executable_regions: usize,
pub rwx_regions: usize,
pub private_regions: usize,
pub image_regions: usize,
pub total_memory_size: usize,
pub largest_region_size: usize,
pub memory_fragmentation: f64,
pub thread_creation_rate: f64,
pub api_call_frequency: f64,
pub entropy_score: f64,
pub creation_time_hours: f64,
pub parent_child_ratio: f64,
}
#[derive(Debug, Clone)]
pub struct AnomalyScore {
pub overall_score: f64,
pub component_scores: HashMap<String, f64>,
pub outlier_features: Vec<String>,
pub confidence: f64,
}
#[derive(Debug, Clone)]
pub struct ProcessProfile {
pub name: String,
pub feature_means: HashMap<String, f64>,
pub feature_stds: HashMap<String, f64>,
pub sample_count: usize,
pub last_updated: chrono::DateTime<chrono::Utc>,
}
/// Advanced ML-based anomaly detection for process behavior
pub struct AnomalyDetector {
process_profiles: HashMap<String, ProcessProfile>,
global_baseline: Option<ProcessProfile>,
detection_threshold: f64,
outlier_threshold: f64,
min_samples_for_profile: usize,
}
impl AnomalyDetector {
pub fn new() -> Self {
Self {
process_profiles: HashMap::new(),
global_baseline: None,
detection_threshold: 0.7,
outlier_threshold: 2.5, // Standard deviations
min_samples_for_profile: 10,
}
}
/// Extract behavioral features from process data
pub fn extract_features(
&self,
process: &ProcessInfo,
memory_regions: &[crate::MemoryRegion],
threads: Option<&[crate::ThreadInfo]>,
) -> ProcessFeatures {
let executable_regions = memory_regions
.iter()
.filter(|r| matches!(
r.protection,
crate::MemoryProtection::ReadExecute | crate::MemoryProtection::ReadWriteExecute
))
.count();
let rwx_regions = memory_regions
.iter()
.filter(|r| r.protection == crate::MemoryProtection::ReadWriteExecute)
.count();
let private_regions = memory_regions
.iter()
.filter(|r| r.region_type == "PRIVATE")
.count();
let image_regions = memory_regions
.iter()
.filter(|r| r.region_type == "IMAGE")
.count();
let total_memory_size: usize = memory_regions.iter().map(|r| r.size).sum();
let largest_region_size = memory_regions
.iter()
.map(|r| r.size)
.max()
.unwrap_or(0);
// Calculate memory fragmentation (std dev of region sizes)
let mean_size = if memory_regions.is_empty() {
0.0
} else {
total_memory_size as f64 / memory_regions.len() as f64
};
let variance = memory_regions
.iter()
.map(|r| {
let diff = r.size as f64 - mean_size;
diff * diff
})
.sum::<f64>()
/ memory_regions.len().max(1) as f64;
let memory_fragmentation = variance.sqrt() / mean_size.max(1.0);
// Thread-based features
let thread_creation_rate = if let Some(thread_list) = threads {
let recent_threads = thread_list
.iter()
.filter(|t| t.creation_time > 0)
.count();
recent_threads as f64 / thread_list.len().max(1) as f64
} else {
0.0
};
// Simulate API call frequency (in real implementation, would track actual calls)
let api_call_frequency = self.estimate_api_call_frequency(process, memory_regions);
// Calculate entropy score based on memory content patterns
let entropy_score = self.calculate_entropy_score(memory_regions);
// Time-based features
let creation_time_hours = chrono::Utc::now().hour() as f64;
// Parent-child relationship analysis
let parent_child_ratio = if process.ppid == 0 {
0.0
} else {
process.pid as f64 / process.ppid as f64
};
ProcessFeatures {
pid: process.pid,
parent_pid: process.ppid,
thread_count: process.thread_count,
memory_regions: memory_regions.len(),
executable_regions,
rwx_regions,
private_regions,
image_regions,
total_memory_size,
largest_region_size,
memory_fragmentation,
thread_creation_rate,
api_call_frequency,
entropy_score,
creation_time_hours,
parent_child_ratio,
}
}
/// Analyze process for anomalies using ML techniques
pub fn analyze_anomaly(
&mut self,
process: &ProcessInfo,
features: &ProcessFeatures,
) -> Result<AnomalyScore> {
// Update process profile with new data
self.update_process_profile(&process.name, features);
// Calculate anomaly scores
let mut component_scores = HashMap::new();
let mut outlier_features = Vec::new();
// Get baseline for comparison
let baseline = self
.process_profiles
.get(&process.name)
.or(self.global_baseline.as_ref());
if let Some(profile) = baseline {
if profile.sample_count >= self.min_samples_for_profile {
// Analyze each feature for anomalies
self.analyze_feature_anomaly(
"thread_count",
features.thread_count as f64,
profile,
&mut component_scores,
&mut outlier_features,
);
self.analyze_feature_anomaly(
"rwx_regions",
features.rwx_regions as f64,
profile,
&mut component_scores,
&mut outlier_features,
);
self.analyze_feature_anomaly(
"memory_fragmentation",
features.memory_fragmentation,
profile,
&mut component_scores,
&mut outlier_features,
);
self.analyze_feature_anomaly(
"thread_creation_rate",
features.thread_creation_rate,
profile,
&mut component_scores,
&mut outlier_features,
);
self.analyze_feature_anomaly(
"api_call_frequency",
features.api_call_frequency,
profile,
&mut component_scores,
&mut outlier_features,
);
self.analyze_feature_anomaly(
"entropy_score",
features.entropy_score,
profile,
&mut component_scores,
&mut outlier_features,
);
}
}
// Calculate overall anomaly score
let overall_score = if component_scores.is_empty() {
0.0 // Not enough data for analysis
} else {
// Weighted average of component scores
let weighted_sum: f64 = component_scores
.iter()
.map(|(feature, score)| {
let weight = match feature.as_str() {
"rwx_regions" => 0.3, // High weight for RWX regions
"thread_creation_rate" => 0.25, // High weight for thread anomalies
"entropy_score" => 0.2, // Medium weight for entropy
"api_call_frequency" => 0.15, // Medium weight for API calls
"memory_fragmentation" => 0.1, // Lower weight for fragmentation
_ => 0.05, // Low weight for other features
};
score * weight
})
.sum();
weighted_sum.min(1.0)
};
// Calculate confidence based on sample size and feature coverage
let confidence = if let Some(profile) = baseline {
(profile.sample_count as f64 / 100.0).min(1.0) *
(component_scores.len() as f64 / 6.0).min(1.0)
} else {
0.0
};
Ok(AnomalyScore {
overall_score,
component_scores,
outlier_features,
confidence,
})
}
fn analyze_feature_anomaly(
&self,
feature_name: &str,
value: f64,
profile: &ProcessProfile,
component_scores: &mut HashMap<String, f64>,
outlier_features: &mut Vec<String>,
) {
if let (Some(&mean), Some(&std)) = (
profile.feature_means.get(feature_name),
profile.feature_stds.get(feature_name),
) {
if std > 0.0 {
// Calculate z-score
let z_score = (value - mean).abs() / std;
// Convert z-score to anomaly score (0-1)
let anomaly_score = (z_score / 4.0).min(1.0); // Cap at 4 standard deviations
component_scores.insert(feature_name.to_string(), anomaly_score);
// Mark as outlier if beyond threshold
if z_score > self.outlier_threshold {
outlier_features.push(format!(
"{}: {:.2} (μ={:.2}, σ={:.2}, z={:.2})",
feature_name, value, mean, std, z_score
));
}
}
}
}
fn update_process_profile(&mut self, process_name: &str, features: &ProcessFeatures) {
let profile = self
.process_profiles
.entry(process_name.to_string())
.or_insert_with(|| ProcessProfile {
name: process_name.to_string(),
feature_means: HashMap::new(),
feature_stds: HashMap::new(),
sample_count: 0,
last_updated: chrono::Utc::now(),
});
// Update running statistics (using Welford's online algorithm)
profile.sample_count += 1;
let n = profile.sample_count as f64;
// Define features to track
let feature_values = vec![
("thread_count", features.thread_count as f64),
("memory_regions", features.memory_regions as f64),
("rwx_regions", features.rwx_regions as f64),
("memory_fragmentation", features.memory_fragmentation),
("thread_creation_rate", features.thread_creation_rate),
("api_call_frequency", features.api_call_frequency),
("entropy_score", features.entropy_score),
];
for (feature_name, value) in feature_values {
// Update mean
let old_mean = profile.feature_means.get(feature_name).copied().unwrap_or(0.0);
let new_mean = old_mean + (value - old_mean) / n;
profile.feature_means.insert(feature_name.to_string(), new_mean);
// Update standard deviation (using variance)
if n > 1.0 {
let old_std = profile.feature_stds.get(feature_name).copied().unwrap_or(0.0);
let old_variance = old_std * old_std;
let new_variance = ((n - 2.0) * old_variance + (value - old_mean) * (value - new_mean)) / (n - 1.0);
let new_std = new_variance.max(0.0).sqrt();
profile.feature_stds.insert(feature_name.to_string(), new_std);
}
}
profile.last_updated = chrono::Utc::now();
}
fn estimate_api_call_frequency(&self, _process: &ProcessInfo, memory_regions: &[crate::MemoryRegion]) -> f64 {
// Heuristic: More executable regions might indicate more API calls
let executable_count = memory_regions
.iter()
.filter(|r| matches!(
r.protection,
crate::MemoryProtection::ReadExecute | crate::MemoryProtection::ReadWriteExecute
))
.count();
(executable_count as f64 / memory_regions.len().max(1) as f64) * 100.0
}
fn calculate_entropy_score(&self, memory_regions: &[crate::MemoryRegion]) -> f64 {
// Simplified entropy calculation based on region size distribution
if memory_regions.is_empty() {
return 0.0;
}
let total_size: usize = memory_regions.iter().map(|r| r.size).sum();
if total_size == 0 {
return 0.0;
}
let entropy: f64 = memory_regions
.iter()
.map(|r| {
let p = r.size as f64 / total_size as f64;
if p > 0.0 {
-p * p.log2()
} else {
0.0
}
})
.sum();
entropy / 10.0 // Normalize to 0-1 range approximately
}
pub fn is_anomalous(&self, score: &AnomalyScore) -> bool {
score.overall_score > self.detection_threshold && score.confidence > 0.5
}
pub fn get_process_profile(&self, process_name: &str) -> Option<&ProcessProfile> {
self.process_profiles.get(process_name)
}
pub fn set_detection_threshold(&mut self, threshold: f64) {
self.detection_threshold = threshold.clamp(0.0, 1.0);
}
}
impl Default for AnomalyDetector {
fn default() -> Self {
Self::new()
}
}

View File

@@ -1,17 +1,38 @@
pub mod anomaly;
pub mod detection; pub mod detection;
pub mod ebpf;
pub mod testing;
pub mod error; pub mod error;
pub mod evasion;
pub mod hollowing; pub mod hollowing;
pub mod hooks; pub mod hooks;
pub mod memory; pub mod memory;
pub mod process; pub mod process;
pub mod shellcode; pub mod shellcode;
pub mod streaming;
pub mod thread; pub mod thread;
pub mod threat_intel;
pub use anomaly::{AnomalyDetector, AnomalyScore, ProcessFeatures};
pub use detection::{DetectionEngine, DetectionResult, ThreatLevel}; pub use detection::{DetectionEngine, DetectionResult, ThreatLevel};
#[cfg(target_os = "linux")]
pub use ebpf::{EbpfDetector, EbpfEvent, EbpfError, EbpfStatistics};
pub use error::{GhostError, Result}; pub use error::{GhostError, Result};
pub use evasion::{
EvasionDetector, EvasionResult, EvasionTechnique, EvasionSeverity,
TimingAnalyzer, EnvironmentChecker, BehaviorAnalyzer, ObfuscationDetector
};
pub use hollowing::{HollowingDetection, HollowingDetector, HollowingIndicator}; pub use hollowing::{HollowingDetection, HollowingDetector, HollowingIndicator};
pub use hooks::{detect_hook_injection, HookDetectionResult, HookInfo}; pub use hooks::{detect_hook_injection, HookDetectionResult, HookInfo};
pub use memory::{MemoryProtection, MemoryRegion}; pub use memory::{MemoryProtection, MemoryRegion};
pub use process::ProcessInfo; pub use process::ProcessInfo;
pub use shellcode::{ShellcodeDetection, ShellcodeDetector}; pub use shellcode::{ShellcodeDetection, ShellcodeDetector};
pub use streaming::{
EventStreamingSystem, EventChannel, StreamingEvent, EventType, EventSeverity,
AlertManager, Alert, AlertRule, CorrelationEngine, NotificationSystem
};
pub use thread::ThreadInfo; pub use thread::ThreadInfo;
pub use threat_intel::{
ThreatIntelligence, ThreatContext, IndicatorOfCompromise,
ThreatActor, Campaign, IocType, SophisticationLevel
};