diff --git a/ghost-core/src/anomaly.rs b/ghost-core/src/anomaly.rs new file mode 100644 index 0000000..3e21afe --- /dev/null +++ b/ghost-core/src/anomaly.rs @@ -0,0 +1,409 @@ +use crate::{GhostError, ProcessInfo, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProcessFeatures { + pub pid: u32, + pub parent_pid: u32, + pub thread_count: u32, + pub memory_regions: usize, + pub executable_regions: usize, + pub rwx_regions: usize, + pub private_regions: usize, + pub image_regions: usize, + pub total_memory_size: usize, + pub largest_region_size: usize, + pub memory_fragmentation: f64, + pub thread_creation_rate: f64, + pub api_call_frequency: f64, + pub entropy_score: f64, + pub creation_time_hours: f64, + pub parent_child_ratio: f64, +} + +#[derive(Debug, Clone)] +pub struct AnomalyScore { + pub overall_score: f64, + pub component_scores: HashMap, + pub outlier_features: Vec, + pub confidence: f64, +} + +#[derive(Debug, Clone)] +pub struct ProcessProfile { + pub name: String, + pub feature_means: HashMap, + pub feature_stds: HashMap, + pub sample_count: usize, + pub last_updated: chrono::DateTime, +} + +/// Advanced ML-based anomaly detection for process behavior +pub struct AnomalyDetector { + process_profiles: HashMap, + global_baseline: Option, + detection_threshold: f64, + outlier_threshold: f64, + min_samples_for_profile: usize, +} + +impl AnomalyDetector { + pub fn new() -> Self { + Self { + process_profiles: HashMap::new(), + global_baseline: None, + detection_threshold: 0.7, + outlier_threshold: 2.5, // Standard deviations + min_samples_for_profile: 10, + } + } + + /// Extract behavioral features from process data + pub fn extract_features( + &self, + process: &ProcessInfo, + memory_regions: &[crate::MemoryRegion], + threads: Option<&[crate::ThreadInfo]>, + ) -> ProcessFeatures { + let executable_regions = memory_regions + .iter() + .filter(|r| matches!( + r.protection, + crate::MemoryProtection::ReadExecute | crate::MemoryProtection::ReadWriteExecute + )) + .count(); + + let rwx_regions = memory_regions + .iter() + .filter(|r| r.protection == crate::MemoryProtection::ReadWriteExecute) + .count(); + + let private_regions = memory_regions + .iter() + .filter(|r| r.region_type == "PRIVATE") + .count(); + + let image_regions = memory_regions + .iter() + .filter(|r| r.region_type == "IMAGE") + .count(); + + let total_memory_size: usize = memory_regions.iter().map(|r| r.size).sum(); + let largest_region_size = memory_regions + .iter() + .map(|r| r.size) + .max() + .unwrap_or(0); + + // Calculate memory fragmentation (std dev of region sizes) + let mean_size = if memory_regions.is_empty() { + 0.0 + } else { + total_memory_size as f64 / memory_regions.len() as f64 + }; + + let variance = memory_regions + .iter() + .map(|r| { + let diff = r.size as f64 - mean_size; + diff * diff + }) + .sum::() + / memory_regions.len().max(1) as f64; + + let memory_fragmentation = variance.sqrt() / mean_size.max(1.0); + + // Thread-based features + let thread_creation_rate = if let Some(thread_list) = threads { + let recent_threads = thread_list + .iter() + .filter(|t| t.creation_time > 0) + .count(); + recent_threads as f64 / thread_list.len().max(1) as f64 + } else { + 0.0 + }; + + // Simulate API call frequency (in real implementation, would track actual calls) + let api_call_frequency = self.estimate_api_call_frequency(process, memory_regions); + + // Calculate entropy score based on memory content patterns + let entropy_score = self.calculate_entropy_score(memory_regions); + + // Time-based features + let creation_time_hours = chrono::Utc::now().hour() as f64; + + // Parent-child relationship analysis + let parent_child_ratio = if process.ppid == 0 { + 0.0 + } else { + process.pid as f64 / process.ppid as f64 + }; + + ProcessFeatures { + pid: process.pid, + parent_pid: process.ppid, + thread_count: process.thread_count, + memory_regions: memory_regions.len(), + executable_regions, + rwx_regions, + private_regions, + image_regions, + total_memory_size, + largest_region_size, + memory_fragmentation, + thread_creation_rate, + api_call_frequency, + entropy_score, + creation_time_hours, + parent_child_ratio, + } + } + + /// Analyze process for anomalies using ML techniques + pub fn analyze_anomaly( + &mut self, + process: &ProcessInfo, + features: &ProcessFeatures, + ) -> Result { + // Update process profile with new data + self.update_process_profile(&process.name, features); + + // Calculate anomaly scores + let mut component_scores = HashMap::new(); + let mut outlier_features = Vec::new(); + + // Get baseline for comparison + let baseline = self + .process_profiles + .get(&process.name) + .or(self.global_baseline.as_ref()); + + if let Some(profile) = baseline { + if profile.sample_count >= self.min_samples_for_profile { + // Analyze each feature for anomalies + self.analyze_feature_anomaly( + "thread_count", + features.thread_count as f64, + profile, + &mut component_scores, + &mut outlier_features, + ); + + self.analyze_feature_anomaly( + "rwx_regions", + features.rwx_regions as f64, + profile, + &mut component_scores, + &mut outlier_features, + ); + + self.analyze_feature_anomaly( + "memory_fragmentation", + features.memory_fragmentation, + profile, + &mut component_scores, + &mut outlier_features, + ); + + self.analyze_feature_anomaly( + "thread_creation_rate", + features.thread_creation_rate, + profile, + &mut component_scores, + &mut outlier_features, + ); + + self.analyze_feature_anomaly( + "api_call_frequency", + features.api_call_frequency, + profile, + &mut component_scores, + &mut outlier_features, + ); + + self.analyze_feature_anomaly( + "entropy_score", + features.entropy_score, + profile, + &mut component_scores, + &mut outlier_features, + ); + } + } + + // Calculate overall anomaly score + let overall_score = if component_scores.is_empty() { + 0.0 // Not enough data for analysis + } else { + // Weighted average of component scores + let weighted_sum: f64 = component_scores + .iter() + .map(|(feature, score)| { + let weight = match feature.as_str() { + "rwx_regions" => 0.3, // High weight for RWX regions + "thread_creation_rate" => 0.25, // High weight for thread anomalies + "entropy_score" => 0.2, // Medium weight for entropy + "api_call_frequency" => 0.15, // Medium weight for API calls + "memory_fragmentation" => 0.1, // Lower weight for fragmentation + _ => 0.05, // Low weight for other features + }; + score * weight + }) + .sum(); + + weighted_sum.min(1.0) + }; + + // Calculate confidence based on sample size and feature coverage + let confidence = if let Some(profile) = baseline { + (profile.sample_count as f64 / 100.0).min(1.0) * + (component_scores.len() as f64 / 6.0).min(1.0) + } else { + 0.0 + }; + + Ok(AnomalyScore { + overall_score, + component_scores, + outlier_features, + confidence, + }) + } + + fn analyze_feature_anomaly( + &self, + feature_name: &str, + value: f64, + profile: &ProcessProfile, + component_scores: &mut HashMap, + outlier_features: &mut Vec, + ) { + if let (Some(&mean), Some(&std)) = ( + profile.feature_means.get(feature_name), + profile.feature_stds.get(feature_name), + ) { + if std > 0.0 { + // Calculate z-score + let z_score = (value - mean).abs() / std; + + // Convert z-score to anomaly score (0-1) + let anomaly_score = (z_score / 4.0).min(1.0); // Cap at 4 standard deviations + + component_scores.insert(feature_name.to_string(), anomaly_score); + + // Mark as outlier if beyond threshold + if z_score > self.outlier_threshold { + outlier_features.push(format!( + "{}: {:.2} (μ={:.2}, σ={:.2}, z={:.2})", + feature_name, value, mean, std, z_score + )); + } + } + } + } + + fn update_process_profile(&mut self, process_name: &str, features: &ProcessFeatures) { + let profile = self + .process_profiles + .entry(process_name.to_string()) + .or_insert_with(|| ProcessProfile { + name: process_name.to_string(), + feature_means: HashMap::new(), + feature_stds: HashMap::new(), + sample_count: 0, + last_updated: chrono::Utc::now(), + }); + + // Update running statistics (using Welford's online algorithm) + profile.sample_count += 1; + let n = profile.sample_count as f64; + + // Define features to track + let feature_values = vec![ + ("thread_count", features.thread_count as f64), + ("memory_regions", features.memory_regions as f64), + ("rwx_regions", features.rwx_regions as f64), + ("memory_fragmentation", features.memory_fragmentation), + ("thread_creation_rate", features.thread_creation_rate), + ("api_call_frequency", features.api_call_frequency), + ("entropy_score", features.entropy_score), + ]; + + for (feature_name, value) in feature_values { + // Update mean + let old_mean = profile.feature_means.get(feature_name).copied().unwrap_or(0.0); + let new_mean = old_mean + (value - old_mean) / n; + profile.feature_means.insert(feature_name.to_string(), new_mean); + + // Update standard deviation (using variance) + if n > 1.0 { + let old_std = profile.feature_stds.get(feature_name).copied().unwrap_or(0.0); + let old_variance = old_std * old_std; + let new_variance = ((n - 2.0) * old_variance + (value - old_mean) * (value - new_mean)) / (n - 1.0); + let new_std = new_variance.max(0.0).sqrt(); + profile.feature_stds.insert(feature_name.to_string(), new_std); + } + } + + profile.last_updated = chrono::Utc::now(); + } + + fn estimate_api_call_frequency(&self, _process: &ProcessInfo, memory_regions: &[crate::MemoryRegion]) -> f64 { + // Heuristic: More executable regions might indicate more API calls + let executable_count = memory_regions + .iter() + .filter(|r| matches!( + r.protection, + crate::MemoryProtection::ReadExecute | crate::MemoryProtection::ReadWriteExecute + )) + .count(); + + (executable_count as f64 / memory_regions.len().max(1) as f64) * 100.0 + } + + fn calculate_entropy_score(&self, memory_regions: &[crate::MemoryRegion]) -> f64 { + // Simplified entropy calculation based on region size distribution + if memory_regions.is_empty() { + return 0.0; + } + + let total_size: usize = memory_regions.iter().map(|r| r.size).sum(); + if total_size == 0 { + return 0.0; + } + + let entropy: f64 = memory_regions + .iter() + .map(|r| { + let p = r.size as f64 / total_size as f64; + if p > 0.0 { + -p * p.log2() + } else { + 0.0 + } + }) + .sum(); + + entropy / 10.0 // Normalize to 0-1 range approximately + } + + pub fn is_anomalous(&self, score: &AnomalyScore) -> bool { + score.overall_score > self.detection_threshold && score.confidence > 0.5 + } + + pub fn get_process_profile(&self, process_name: &str) -> Option<&ProcessProfile> { + self.process_profiles.get(process_name) + } + + pub fn set_detection_threshold(&mut self, threshold: f64) { + self.detection_threshold = threshold.clamp(0.0, 1.0); + } +} + +impl Default for AnomalyDetector { + fn default() -> Self { + Self::new() + } +} \ No newline at end of file diff --git a/ghost-core/src/lib.rs b/ghost-core/src/lib.rs index 9ffd5a7..549fe06 100644 --- a/ghost-core/src/lib.rs +++ b/ghost-core/src/lib.rs @@ -1,17 +1,38 @@ +pub mod anomaly; pub mod detection; +pub mod ebpf; +pub mod testing; pub mod error; +pub mod evasion; pub mod hollowing; pub mod hooks; pub mod memory; pub mod process; pub mod shellcode; +pub mod streaming; pub mod thread; +pub mod threat_intel; +pub use anomaly::{AnomalyDetector, AnomalyScore, ProcessFeatures}; pub use detection::{DetectionEngine, DetectionResult, ThreatLevel}; +#[cfg(target_os = "linux")] +pub use ebpf::{EbpfDetector, EbpfEvent, EbpfError, EbpfStatistics}; pub use error::{GhostError, Result}; +pub use evasion::{ + EvasionDetector, EvasionResult, EvasionTechnique, EvasionSeverity, + TimingAnalyzer, EnvironmentChecker, BehaviorAnalyzer, ObfuscationDetector +}; pub use hollowing::{HollowingDetection, HollowingDetector, HollowingIndicator}; pub use hooks::{detect_hook_injection, HookDetectionResult, HookInfo}; pub use memory::{MemoryProtection, MemoryRegion}; pub use process::ProcessInfo; pub use shellcode::{ShellcodeDetection, ShellcodeDetector}; +pub use streaming::{ + EventStreamingSystem, EventChannel, StreamingEvent, EventType, EventSeverity, + AlertManager, Alert, AlertRule, CorrelationEngine, NotificationSystem +}; pub use thread::ThreadInfo; +pub use threat_intel::{ + ThreatIntelligence, ThreatContext, IndicatorOfCompromise, + ThreatActor, Campaign, IocType, SophisticationLevel +};