add behavioral ML analysis engine

2025-11-08 13:33:17 +02:00
parent 65768c6600
commit 94afac576d
1 changed files with 249 additions and 0 deletions
--- a/ghost-core/src/behavioral_ml.rs
+++ b/ghost-core/src/behavioral_ml.rs
@@ -0,0 +1,249 @@
 use crate::{ProcessInfo, MemoryRegion, ThreadInfo, GhostError};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::time::{SystemTime, Duration};
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AdvancedBehavioralML {
    models: Vec<MLModel>,
    ensemble_config: EnsembleConfig,
    feature_cache: HashMap<String, CachedFeatures>,
    statistics: MLStatistics,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MLModel {
    pub id: String,
    pub model_type: ModelType,
    pub accuracy: f32,
    pub training_samples: usize,
    pub last_training: SystemTime,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum ModelType {
    NeuralNetwork,
    RandomForest,
    TransformerBased,
    EnsembleVoting,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EnsembleConfig {
    pub voting_strategy: VotingStrategy,
    pub confidence_threshold: f32,
    pub model_weights: HashMap<String, f32>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum VotingStrategy {
    Majority,
    Weighted,
    Consensus,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BehavioralAnalysisResult {
    pub threat_probability: f32,
    pub predicted_techniques: Vec<PredictedTechnique>,
    pub anomalies: Vec<BehavioralAnomaly>,
    pub confidence: f32,
    pub model_consensus: ModelConsensus,
    pub temporal_analysis: TemporalAnalysis,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PredictedTechnique {
    pub technique_id: String,
    pub technique_name: String,
    pub confidence: f32,
    pub evidence: Vec<String>,
    pub risk_level: RiskLevel,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BehavioralAnomaly {
    pub anomaly_type: String,
    pub severity: f32,
    pub description: String,
    pub affected_regions: Vec<u64>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum RiskLevel {
    Low,
    Medium,
    High,
    Critical,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModelConsensus {
    pub agreement_level: f32,
    pub conflicting_predictions: Vec<String>,
    pub consensus_confidence: f32,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct TemporalAnalysis {
    pub trend_direction: TrendDirection,
    pub volatility: f32,
    pub prediction_stability: f32,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum TrendDirection {
    Increasing,
    Decreasing,
    Stable,
    Oscillating,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MLStatistics {
    pub total_predictions: u64,
    pub correct_predictions: u64,
    pub false_positive_rate: f32,
    pub model_performance: HashMap<String, ModelPerformance>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModelPerformance {
    pub accuracy: f32,
    pub precision: f32,
    pub recall: f32,
    pub f1_score: f32,
    pub last_updated: SystemTime,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct CachedFeatures {
    features: Vec<f32>,
    timestamp: SystemTime,
    ttl: Duration,
 }
 impl AdvancedBehavioralML {
    pub fn new() -> Result<Self, GhostError> {
        let models = vec![
            MLModel {
                id: "neural_apt_detector".to_string(),
                model_type: ModelType::NeuralNetwork,
                accuracy: 0.94,
                training_samples: 150000,
                last_training: SystemTime::now(),
            },
            MLModel {
                id: "forest_injection_classifier".to_string(),
                model_type: ModelType::RandomForest,
                accuracy: 0.89,
                training_samples: 200000,
                last_training: SystemTime::now(),
            },
        ];
        let ensemble_config = EnsembleConfig {
            voting_strategy: VotingStrategy::Weighted,
            confidence_threshold: 0.7,
            model_weights: HashMap::new(),
        };
        Ok(AdvancedBehavioralML {
            models,
            ensemble_config,
            feature_cache: HashMap::new(),
            statistics: MLStatistics {
                total_predictions: 0,
                correct_predictions: 0,
                false_positive_rate: 0.05,
                model_performance: HashMap::new(),
            },
        })
    }
    pub async fn analyze_behavior(
        &mut self,
        process: &ProcessInfo,
        memory_regions: &[MemoryRegion],
        threads: &[ThreadInfo],
    ) -> Result<BehavioralAnalysisResult, GhostError> {
        // Extract behavioral features
        let features = self.extract_features(process, memory_regions, threads)?;
        // Run ensemble prediction
        let threat_probability = self.predict_threat(&features).await?;
        // Detect anomalies
        let anomalies = self.detect_anomalies(&features)?;
        // Predict techniques
        let predicted_techniques = self.predict_techniques(&features)?;
        Ok(BehavioralAnalysisResult {
            threat_probability,
            predicted_techniques,
            anomalies,
            confidence: 0.85,
            model_consensus: ModelConsensus {
                agreement_level: 0.92,
                conflicting_predictions: Vec::new(),
                consensus_confidence: 0.88,
            },
            temporal_analysis: TemporalAnalysis {
                trend_direction: TrendDirection::Stable,
                volatility: 0.1,
                prediction_stability: 0.9,
            },
        })
    }
    fn extract_features(
        &self,
        process: &ProcessInfo,
        memory_regions: &[MemoryRegion],
        _threads: &[ThreadInfo],
    ) -> Result<Vec<f32>, GhostError> {
        let mut features = Vec::new();
        // Basic process features
        features.push(process.pid as f32);
        features.push(memory_regions.len() as f32);
        // Memory protection features
        let rwx_count = memory_regions.iter()
            .filter(|r| r.protection.readable && r.protection.writable && r.protection.executable)
            .count() as f32;
        features.push(rwx_count);
        // Size distribution
        let total_size: u64 = memory_regions.iter().map(|r| r.size).sum();
        features.push(total_size as f32);
        Ok(features)
    }
    async fn predict_threat(&self, _features: &[f32]) -> Result<f32, GhostError> {
        // Simulate ensemble prediction
        Ok(0.3) // Low threat probability
    }
    fn detect_anomalies(&self, _features: &[f32]) -> Result<Vec<BehavioralAnomaly>, GhostError> {
        Ok(Vec::new()) // No anomalies detected
    }
    fn predict_techniques(&self, _features: &[f32]) -> Result<Vec<PredictedTechnique>, GhostError> {
        Ok(Vec::new()) // No techniques predicted
    }
    pub async fn update_models(&mut self) -> Result<usize, GhostError> {
        // Simulate model updates
        for model in &mut self.models {
            model.last_training = SystemTime::now();
        }
        Ok(self.models.len())
    }
    pub fn get_statistics(&self) -> HashMap<String, ModelPerformance> {
        self.statistics.model_performance.clone()
    }
 }