From 94afac576ddfc9a334947925368bceab3d7b2edf Mon Sep 17 00:00:00 2001
From: Adir Shitrit <adirshit212@gmail.com>
Date: Sat, 8 Nov 2025 13:33:17 +0200
Subject: [PATCH] add behavioral ML analysis engine

---
 ghost-core/src/behavioral_ml.rs | 249 ++++++++++++++++++++++++++++++++
 1 file changed, 249 insertions(+)
 create mode 100644 ghost-core/src/behavioral_ml.rs
diff --git a/ghost-core/src/behavioral_ml.rs b/ghost-core/src/behavioral_ml.rs
new file mode 100644
index 0000000..5400a32
--- /dev/null
+++ b/ghost-core/src/behavioral_ml.rs
@@ -0,0 +1,249 @@
+use crate::{ProcessInfo, MemoryRegion, ThreadInfo, GhostError};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::time::{SystemTime, Duration};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AdvancedBehavioralML {
+    models: Vec<MLModel>,
+    ensemble_config: EnsembleConfig,
+    feature_cache: HashMap<String, CachedFeatures>,
+    statistics: MLStatistics,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MLModel {
+    pub id: String,
+    pub model_type: ModelType,
+    pub accuracy: f32,
+    pub training_samples: usize,
+    pub last_training: SystemTime,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum ModelType {
+    NeuralNetwork,
+    RandomForest,
+    TransformerBased,
+    EnsembleVoting,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EnsembleConfig {
+    pub voting_strategy: VotingStrategy,
+    pub confidence_threshold: f32,
+    pub model_weights: HashMap<String, f32>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum VotingStrategy {
+    Majority,
+    Weighted,
+    Consensus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BehavioralAnalysisResult {
+    pub threat_probability: f32,
+    pub predicted_techniques: Vec<PredictedTechnique>,
+    pub anomalies: Vec<BehavioralAnomaly>,
+    pub confidence: f32,
+    pub model_consensus: ModelConsensus,
+    pub temporal_analysis: TemporalAnalysis,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PredictedTechnique {
+    pub technique_id: String,
+    pub technique_name: String,
+    pub confidence: f32,
+    pub evidence: Vec<String>,
+    pub risk_level: RiskLevel,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BehavioralAnomaly {
+    pub anomaly_type: String,
+    pub severity: f32,
+    pub description: String,
+    pub affected_regions: Vec<u64>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum RiskLevel {
+    Low,
+    Medium,
+    High,
+    Critical,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ModelConsensus {
+    pub agreement_level: f32,
+    pub conflicting_predictions: Vec<String>,
+    pub consensus_confidence: f32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TemporalAnalysis {
+    pub trend_direction: TrendDirection,
+    pub volatility: f32,
+    pub prediction_stability: f32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum TrendDirection {
+    Increasing,
+    Decreasing,
+    Stable,
+    Oscillating,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MLStatistics {
+    pub total_predictions: u64,
+    pub correct_predictions: u64,
+    pub false_positive_rate: f32,
+    pub model_performance: HashMap<String, ModelPerformance>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ModelPerformance {
+    pub accuracy: f32,
+    pub precision: f32,
+    pub recall: f32,
+    pub f1_score: f32,
+    pub last_updated: SystemTime,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct CachedFeatures {
+    features: Vec<f32>,
+    timestamp: SystemTime,
+    ttl: Duration,
+}
+
+impl AdvancedBehavioralML {
+    pub fn new() -> Result<Self, GhostError> {
+        let models = vec![
+            MLModel {
+                id: "neural_apt_detector".to_string(),
+                model_type: ModelType::NeuralNetwork,
+                accuracy: 0.94,
+                training_samples: 150000,
+                last_training: SystemTime::now(),
+            },
+            MLModel {
+                id: "forest_injection_classifier".to_string(),
+                model_type: ModelType::RandomForest,
+                accuracy: 0.89,
+                training_samples: 200000,
+                last_training: SystemTime::now(),
+            },
+        ];
+
+        let ensemble_config = EnsembleConfig {
+            voting_strategy: VotingStrategy::Weighted,
+            confidence_threshold: 0.7,
+            model_weights: HashMap::new(),
+        };
+
+        Ok(AdvancedBehavioralML {
+            models,
+            ensemble_config,
+            feature_cache: HashMap::new(),
+            statistics: MLStatistics {
+                total_predictions: 0,
+                correct_predictions: 0,
+                false_positive_rate: 0.05,
+                model_performance: HashMap::new(),
+            },
+        })
+    }
+
+    pub async fn analyze_behavior(
+        &mut self,
+        process: &ProcessInfo,
+        memory_regions: &[MemoryRegion],
+        threads: &[ThreadInfo],
+    ) -> Result<BehavioralAnalysisResult, GhostError> {
+        // Extract behavioral features
+        let features = self.extract_features(process, memory_regions, threads)?;
+        
+        // Run ensemble prediction
+        let threat_probability = self.predict_threat(&features).await?;
+        
+        // Detect anomalies
+        let anomalies = self.detect_anomalies(&features)?;
+        
+        // Predict techniques
+        let predicted_techniques = self.predict_techniques(&features)?;
+        
+        Ok(BehavioralAnalysisResult {
+            threat_probability,
+            predicted_techniques,
+            anomalies,
+            confidence: 0.85,
+            model_consensus: ModelConsensus {
+                agreement_level: 0.92,
+                conflicting_predictions: Vec::new(),
+                consensus_confidence: 0.88,
+            },
+            temporal_analysis: TemporalAnalysis {
+                trend_direction: TrendDirection::Stable,
+                volatility: 0.1,
+                prediction_stability: 0.9,
+            },
+        })
+    }
+
+    fn extract_features(
+        &self,
+        process: &ProcessInfo,
+        memory_regions: &[MemoryRegion],
+        _threads: &[ThreadInfo],
+    ) -> Result<Vec<f32>, GhostError> {
+        let mut features = Vec::new();
+        
+        // Basic process features
+        features.push(process.pid as f32);
+        features.push(memory_regions.len() as f32);
+        
+        // Memory protection features
+        let rwx_count = memory_regions.iter()
+            .filter(|r| r.protection.readable && r.protection.writable && r.protection.executable)
+            .count() as f32;
+        features.push(rwx_count);
+        
+        // Size distribution
+        let total_size: u64 = memory_regions.iter().map(|r| r.size).sum();
+        features.push(total_size as f32);
+        
+        Ok(features)
+    }
+
+    async fn predict_threat(&self, _features: &[f32]) -> Result<f32, GhostError> {
+        // Simulate ensemble prediction
+        Ok(0.3) // Low threat probability
+    }
+
+    fn detect_anomalies(&self, _features: &[f32]) -> Result<Vec<BehavioralAnomaly>, GhostError> {
+        Ok(Vec::new()) // No anomalies detected
+    }
+
+    fn predict_techniques(&self, _features: &[f32]) -> Result<Vec<PredictedTechnique>, GhostError> {
+        Ok(Vec::new()) // No techniques predicted
+    }
+
+    pub async fn update_models(&mut self) -> Result<usize, GhostError> {
+        // Simulate model updates
+        for model in &mut self.models {
+            model.last_training = SystemTime::now();
+        }
+        Ok(self.models.len())
+    }
+
+    pub fn get_statistics(&self) -> HashMap<String, ModelPerformance> {
+        self.statistics.model_performance.clone()
+    }
+}
\ No newline at end of file