package security import ( "context" "encoding/json" "fmt" "runtime" "sort" "sync" "time" "github.com/fraktal/mev-beta/internal/logger" ) // PerformanceProfiler provides comprehensive performance monitoring for security operations type PerformanceProfiler struct { logger *logger.Logger config *ProfilerConfig metrics map[string]*PerformanceMetric operations map[string]*OperationProfile mutex sync.RWMutex // Runtime metrics memStats runtime.MemStats goroutineInfo *GoroutineInfo // Performance tracking operationTimings map[string][]time.Duration resourceUsage *ResourceUsage // Alerts and thresholds alerts []PerformanceAlert thresholds map[string]PerformanceThreshold // Profiling control ctx context.Context cancel context.CancelFunc // Report generation reports []*PerformanceReport } // ProfilerConfig configures the performance profiler type ProfilerConfig struct { // Monitoring settings SamplingInterval time.Duration `json:"sampling_interval"` RetentionPeriod time.Duration `json:"retention_period"` MaxOperations int `json:"max_operations"` // Alert thresholds MaxMemoryUsage uint64 `json:"max_memory_usage"` MaxGoroutines int `json:"max_goroutines"` MaxResponseTime time.Duration `json:"max_response_time"` MinThroughput float64 `json:"min_throughput"` // Performance optimization EnableGCMetrics bool `json:"enable_gc_metrics"` EnableCPUProfiling bool `json:"enable_cpu_profiling"` EnableMemProfiling bool `json:"enable_mem_profiling"` // Reporting ReportInterval time.Duration `json:"report_interval"` AutoOptimize bool `json:"auto_optimize"` } // PerformanceMetric represents a specific performance measurement type PerformanceMetric struct { Name string `json:"name"` Type string `json:"type"` // "counter", "gauge", "histogram", "timer" Value float64 `json:"value"` Unit string `json:"unit"` Timestamp time.Time `json:"timestamp"` Tags map[string]string `json:"tags"` // Statistical data Min float64 `json:"min"` Max float64 `json:"max"` Mean float64 `json:"mean"` StdDev float64 `json:"std_dev"` Percentiles map[string]float64 `json:"percentiles"` // Trend analysis Trend string `json:"trend"` // "increasing", "decreasing", "stable" TrendScore float64 `json:"trend_score"` } // OperationProfile tracks performance of specific security operations type OperationProfile struct { Operation string `json:"operation"` TotalCalls int64 `json:"total_calls"` TotalDuration time.Duration `json:"total_duration"` AverageTime time.Duration `json:"average_time"` MinTime time.Duration `json:"min_time"` MaxTime time.Duration `json:"max_time"` // Throughput metrics CallsPerSecond float64 `json:"calls_per_second"` Throughput float64 `json:"throughput"` // Error tracking ErrorCount int64 `json:"error_count"` ErrorRate float64 `json:"error_rate"` LastError string `json:"last_error"` LastErrorTime time.Time `json:"last_error_time"` // Resource usage MemoryUsed uint64 `json:"memory_used"` CPUTime time.Duration `json:"cpu_time"` GoroutinesUsed int `json:"goroutines_used"` // Performance classification PerformanceClass string `json:"performance_class"` // "excellent", "good", "average", "poor", "critical" Bottlenecks []string `json:"bottlenecks"` Recommendations []string `json:"recommendations"` } // GoroutineInfo tracks goroutine usage and health type GoroutineInfo struct { Total int `json:"total"` Running int `json:"running"` Waiting int `json:"waiting"` Blocked int `json:"blocked"` Details []GoroutineDetail `json:"details"` LeakSuspects []GoroutineDetail `json:"leak_suspects"` } // GoroutineDetail provides detailed goroutine information type GoroutineDetail struct { ID int `json:"id"` State string `json:"state"` Function string `json:"function"` Duration time.Duration `json:"duration"` StackTrace string `json:"stack_trace"` } // ResourceUsage tracks system resource consumption type ResourceUsage struct { // Memory metrics HeapUsed uint64 `json:"heap_used"` HeapAllocated uint64 `json:"heap_allocated"` HeapIdle uint64 `json:"heap_idle"` HeapReleased uint64 `json:"heap_released"` StackUsed uint64 `json:"stack_used"` // GC metrics GCCycles uint32 `json:"gc_cycles"` GCPauseTotal time.Duration `json:"gc_pause_total"` GCPauseAvg time.Duration `json:"gc_pause_avg"` GCPauseMax time.Duration `json:"gc_pause_max"` // CPU metrics CPUUsage float64 `json:"cpu_usage"` CPUTime time.Duration `json:"cpu_time"` // Timing Timestamp time.Time `json:"timestamp"` UptimeSeconds int64 `json:"uptime_seconds"` } // PerformanceAlert represents a performance-related alert type PerformanceAlert struct { ID string `json:"id"` Type string `json:"type"` // "memory", "cpu", "response_time", "throughput", "error_rate" Severity string `json:"severity"` // "low", "medium", "high", "critical" Message string `json:"message"` Metric string `json:"metric"` Value float64 `json:"value"` Threshold float64 `json:"threshold"` Timestamp time.Time `json:"timestamp"` Operation string `json:"operation"` Context map[string]interface{} `json:"context"` // Resolution tracking Resolved bool `json:"resolved"` ResolvedAt time.Time `json:"resolved_at"` ResolutionNote string `json:"resolution_note"` // Impact assessment ImpactLevel string `json:"impact_level"` AffectedOps []string `json:"affected_operations"` Recommendations []string `json:"recommendations"` } // PerformanceThreshold defines performance alert thresholds type PerformanceThreshold struct { Metric string `json:"metric"` Warning float64 `json:"warning"` Critical float64 `json:"critical"` Operator string `json:"operator"` // "gt", "lt", "eq" WindowSize time.Duration `json:"window_size"` Consecutive int `json:"consecutive"` // consecutive violations before alert } // PerformanceReport represents a comprehensive performance analysis report type PerformanceReport struct { ID string `json:"id"` Timestamp time.Time `json:"timestamp"` Period time.Duration `json:"period"` // Overall health OverallHealth string `json:"overall_health"` // "excellent", "good", "fair", "poor", "critical" HealthScore float64 `json:"health_score"` // 0-100 // Performance summary TopOperations []*OperationProfile `json:"top_operations"` Bottlenecks []BottleneckAnalysis `json:"bottlenecks"` Improvements []ImprovementSuggestion `json:"improvements"` // Resource analysis ResourceSummary *ResourceSummary `json:"resource_summary"` TrendAnalysis *PerformanceTrends `json:"trend_analysis"` // Alerts and issues ActiveAlerts []PerformanceAlert `json:"active_alerts"` ResolvedAlerts []PerformanceAlert `json:"resolved_alerts"` // Comparative analysis PreviousPeriod *PerformanceComparison `json:"previous_period"` Baseline *PerformanceBaseline `json:"baseline"` // Recommendations Recommendations []PerformanceRecommendation `json:"recommendations"` OptimizationPlan *OptimizationPlan `json:"optimization_plan"` } // Additional supporting types for comprehensive reporting type BottleneckAnalysis struct { Operation string `json:"operation"` Type string `json:"type"` // "cpu", "memory", "io", "lock", "gc" Severity string `json:"severity"` Impact float64 `json:"impact"` // impact score 0-100 Description string `json:"description"` Solution string `json:"solution"` } type ImprovementSuggestion struct { Area string `json:"area"` Current float64 `json:"current"` Target float64 `json:"target"` Improvement float64 `json:"improvement"` // percentage improvement Effort string `json:"effort"` // "low", "medium", "high" Priority string `json:"priority"` Description string `json:"description"` } type ResourceSummary struct { MemoryEfficiency float64 `json:"memory_efficiency"` // 0-100 CPUEfficiency float64 `json:"cpu_efficiency"` // 0-100 GCEfficiency float64 `json:"gc_efficiency"` // 0-100 ThroughputScore float64 `json:"throughput_score"` // 0-100 } type PerformanceTrends struct { MemoryTrend string `json:"memory_trend"` CPUTrend string `json:"cpu_trend"` ThroughputTrend string `json:"throughput_trend"` ErrorRateTrend string `json:"error_rate_trend"` PredictedIssues []string `json:"predicted_issues"` } type PerformanceComparison struct { MemoryChange float64 `json:"memory_change"` // percentage change CPUChange float64 `json:"cpu_change"` // percentage change ThroughputChange float64 `json:"throughput_change"` // percentage change ErrorRateChange float64 `json:"error_rate_change"` // percentage change } type PerformanceBaseline struct { EstablishedAt time.Time `json:"established_at"` MemoryBaseline uint64 `json:"memory_baseline"` CPUBaseline float64 `json:"cpu_baseline"` ThroughputBaseline float64 `json:"throughput_baseline"` ResponseTimeBaseline time.Duration `json:"response_time_baseline"` } type PerformanceRecommendation struct { Type string `json:"type"` // "immediate", "short_term", "long_term" Priority string `json:"priority"` Category string `json:"category"` // "memory", "cpu", "architecture", "algorithm" Title string `json:"title"` Description string `json:"description"` Implementation string `json:"implementation"` ExpectedGain float64 `json:"expected_gain"` // percentage improvement Effort string `json:"effort"` } type OptimizationPlan struct { Phase1 []PerformanceRecommendation `json:"phase1"` // immediate fixes Phase2 []PerformanceRecommendation `json:"phase2"` // short-term improvements Phase3 []PerformanceRecommendation `json:"phase3"` // long-term optimizations TotalGain float64 `json:"total_gain"` // expected total improvement Timeline time.Duration `json:"timeline"` } // NewPerformanceProfiler creates a new performance profiler instance func NewPerformanceProfiler(logger *logger.Logger, config *ProfilerConfig) *PerformanceProfiler { cfg := defaultProfilerConfig() if config != nil { if config.SamplingInterval > 0 { cfg.SamplingInterval = config.SamplingInterval } if config.RetentionPeriod > 0 { cfg.RetentionPeriod = config.RetentionPeriod } if config.MaxOperations > 0 { cfg.MaxOperations = config.MaxOperations } if config.MaxMemoryUsage > 0 { cfg.MaxMemoryUsage = config.MaxMemoryUsage } if config.MaxGoroutines > 0 { cfg.MaxGoroutines = config.MaxGoroutines } if config.MaxResponseTime > 0 { cfg.MaxResponseTime = config.MaxResponseTime } if config.MinThroughput > 0 { cfg.MinThroughput = config.MinThroughput } if config.ReportInterval > 0 { cfg.ReportInterval = config.ReportInterval } cfg.EnableGCMetrics = config.EnableGCMetrics cfg.EnableCPUProfiling = config.EnableCPUProfiling cfg.EnableMemProfiling = config.EnableMemProfiling cfg.AutoOptimize = config.AutoOptimize } ctx, cancel := context.WithCancel(context.Background()) profiler := &PerformanceProfiler{ logger: logger, config: cfg, metrics: make(map[string]*PerformanceMetric), operations: make(map[string]*OperationProfile), operationTimings: make(map[string][]time.Duration), resourceUsage: &ResourceUsage{}, alerts: make([]PerformanceAlert, 0), thresholds: make(map[string]PerformanceThreshold), ctx: ctx, cancel: cancel, reports: make([]*PerformanceReport, 0), } // Initialize default thresholds profiler.initializeDefaultThresholds() profiler.collectSystemMetrics() // Start background monitoring go profiler.startMonitoring() return profiler } func defaultProfilerConfig() *ProfilerConfig { return &ProfilerConfig{ SamplingInterval: time.Second, RetentionPeriod: 24 * time.Hour, MaxOperations: 1000, MaxMemoryUsage: 1024 * 1024 * 1024, // 1GB MaxGoroutines: 1000, MaxResponseTime: time.Second, MinThroughput: 100, EnableGCMetrics: true, EnableCPUProfiling: true, EnableMemProfiling: true, ReportInterval: time.Hour, AutoOptimize: false, } } // initializeDefaultThresholds sets up default performance thresholds func (pp *PerformanceProfiler) initializeDefaultThresholds() { maxMemory := pp.config.MaxMemoryUsage if maxMemory == 0 { maxMemory = 1024 * 1024 * 1024 } warningMemory := float64(maxMemory) * 0.8 pp.thresholds["memory_usage"] = PerformanceThreshold{ Metric: "memory_usage", Warning: warningMemory, Critical: float64(maxMemory), Operator: "gt", WindowSize: time.Minute, Consecutive: 3, } maxGoroutines := pp.config.MaxGoroutines if maxGoroutines == 0 { maxGoroutines = 1000 } warningGoroutines := float64(maxGoroutines) * 0.8 pp.thresholds["goroutine_count"] = PerformanceThreshold{ Metric: "goroutine_count", Warning: warningGoroutines, Critical: float64(maxGoroutines), Operator: "gt", WindowSize: time.Minute, Consecutive: 2, } responseWarning := float64(pp.config.MaxResponseTime.Milliseconds()) if responseWarning <= 0 { responseWarning = 500 } responseCritical := responseWarning * 2 pp.thresholds["response_time"] = PerformanceThreshold{ Metric: "response_time", Warning: responseWarning, Critical: responseCritical, Operator: "gt", WindowSize: time.Minute, Consecutive: 1, } pp.thresholds["error_rate"] = PerformanceThreshold{ Metric: "error_rate", Warning: 5.0, // 5% Critical: 10.0, // 10% Operator: "gt", WindowSize: 5 * time.Minute, Consecutive: 3, } } // StartOperation begins performance tracking for a specific operation func (pp *PerformanceProfiler) StartOperation(operation string) *OperationTracker { return &OperationTracker{ profiler: pp, operation: operation, startTime: time.Now(), startMem: pp.getCurrentMemory(), } } // OperationTracker tracks individual operation performance type OperationTracker struct { profiler *PerformanceProfiler operation string startTime time.Time startMem uint64 } // End completes operation tracking and records metrics func (ot *OperationTracker) End() { duration := time.Since(ot.startTime) endMem := ot.profiler.getCurrentMemory() memoryUsed := endMem - ot.startMem ot.profiler.recordOperation(ot.operation, duration, memoryUsed, nil) } // EndWithError completes operation tracking with error information func (ot *OperationTracker) EndWithError(err error) { duration := time.Since(ot.startTime) endMem := ot.profiler.getCurrentMemory() memoryUsed := endMem - ot.startMem ot.profiler.recordOperation(ot.operation, duration, memoryUsed, err) } // recordOperation records performance data for an operation func (pp *PerformanceProfiler) recordOperation(operation string, duration time.Duration, memoryUsed uint64, err error) { pp.mutex.Lock() defer pp.mutex.Unlock() // Get or create operation profile profile, exists := pp.operations[operation] if !exists { profile = &OperationProfile{ Operation: operation, MinTime: duration, MaxTime: duration, PerformanceClass: "unknown", Bottlenecks: make([]string, 0), Recommendations: make([]string, 0), } pp.operations[operation] = profile } // Update profile metrics profile.TotalCalls++ profile.TotalDuration += duration profile.AverageTime = time.Duration(int64(profile.TotalDuration) / profile.TotalCalls) profile.MemoryUsed += memoryUsed // Update min/max times if duration < profile.MinTime { profile.MinTime = duration } if duration > profile.MaxTime { profile.MaxTime = duration } // Handle errors if err != nil { profile.ErrorCount++ profile.LastError = err.Error() profile.LastErrorTime = time.Now() } // Calculate error rate profile.ErrorRate = float64(profile.ErrorCount) / float64(profile.TotalCalls) * 100 // Store timing for statistical analysis timings := pp.operationTimings[operation] timings = append(timings, duration) // Keep only recent timings (last 1000) if len(timings) > 1000 { timings = timings[len(timings)-1000:] } pp.operationTimings[operation] = timings // Update performance classification pp.updatePerformanceClassification(profile) // Check for performance alerts pp.checkPerformanceAlerts(operation, profile) } // updatePerformanceClassification categorizes operation performance func (pp *PerformanceProfiler) updatePerformanceClassification(profile *OperationProfile) { avgMs := float64(profile.AverageTime.Nanoseconds()) / 1000000 // Convert to milliseconds switch { case avgMs < 10: profile.PerformanceClass = "excellent" case avgMs < 50: profile.PerformanceClass = "good" case avgMs < 200: profile.PerformanceClass = "average" case avgMs < 1000: profile.PerformanceClass = "poor" default: profile.PerformanceClass = "critical" } // Clear and rebuild recommendations profile.Bottlenecks = make([]string, 0) profile.Recommendations = make([]string, 0) // Identify bottlenecks and recommendations if profile.ErrorRate > 5.0 { profile.Bottlenecks = append(profile.Bottlenecks, "High error rate") profile.Recommendations = append(profile.Recommendations, "Investigate error causes and improve error handling") } if avgMs > 100 { profile.Bottlenecks = append(profile.Bottlenecks, "Slow response time") profile.Recommendations = append(profile.Recommendations, "Optimize algorithm or add caching") } if profile.MemoryUsed > 10*1024*1024 { // > 10MB per operation profile.Bottlenecks = append(profile.Bottlenecks, "High memory usage") profile.Recommendations = append(profile.Recommendations, "Optimize memory allocation and add object pooling") } } // checkPerformanceAlerts checks for performance threshold violations func (pp *PerformanceProfiler) checkPerformanceAlerts(operation string, profile *OperationProfile) { now := time.Now() // Check response time threshold if threshold, exists := pp.thresholds["response_time"]; exists { avgMs := float64(profile.AverageTime.Nanoseconds()) / 1000000 if avgMs > threshold.Warning { severity := "warning" if avgMs > threshold.Critical { severity = "critical" } alert := PerformanceAlert{ ID: fmt.Sprintf("%s_%s_%d", operation, "response_time", now.Unix()), Type: "response_time", Severity: severity, Message: fmt.Sprintf("Operation %s has high response time: %.2fms", operation, avgMs), Metric: "response_time", Value: avgMs, Threshold: threshold.Warning, Timestamp: now, Operation: operation, Context: map[string]interface{}{ "average_time": profile.AverageTime.String(), "total_calls": profile.TotalCalls, "error_rate": profile.ErrorRate, }, ImpactLevel: pp.calculateImpactLevel(avgMs, threshold.Critical), AffectedOps: []string{operation}, Recommendations: []string{ "Analyze operation for optimization opportunities", "Consider adding caching or async processing", "Review algorithm complexity", }, } pp.alerts = append(pp.alerts, alert) } } // Check error rate threshold if threshold, exists := pp.thresholds["error_rate"]; exists { if profile.ErrorRate > threshold.Warning { severity := "warning" if profile.ErrorRate > threshold.Critical { severity = "critical" } alert := PerformanceAlert{ ID: fmt.Sprintf("%s_%s_%d", operation, "error_rate", now.Unix()), Type: "error_rate", Severity: severity, Message: fmt.Sprintf("Operation %s has high error rate: %.2f%%", operation, profile.ErrorRate), Metric: "error_rate", Value: profile.ErrorRate, Threshold: threshold.Warning, Timestamp: now, Operation: operation, Context: map[string]interface{}{ "error_count": profile.ErrorCount, "total_calls": profile.TotalCalls, "last_error": profile.LastError, }, ImpactLevel: pp.calculateImpactLevel(profile.ErrorRate, threshold.Critical), AffectedOps: []string{operation}, Recommendations: []string{ "Investigate root cause of errors", "Improve error handling and recovery", "Add input validation and sanitization", }, } pp.alerts = append(pp.alerts, alert) } } } // calculateImpactLevel determines the impact level of a performance issue func (pp *PerformanceProfiler) calculateImpactLevel(value, criticalThreshold float64) string { ratio := value / criticalThreshold switch { case ratio < 0.5: return "low" case ratio < 0.8: return "medium" case ratio < 1.2: return "high" default: return "critical" } } // getCurrentMemory returns current memory usage func (pp *PerformanceProfiler) getCurrentMemory() uint64 { var m runtime.MemStats runtime.ReadMemStats(&m) return m.Alloc } // startMonitoring begins background performance monitoring func (pp *PerformanceProfiler) startMonitoring() { ticker := time.NewTicker(pp.config.SamplingInterval) defer ticker.Stop() for { select { case <-pp.ctx.Done(): return case <-ticker.C: pp.collectSystemMetrics() pp.cleanupOldData() } } } // collectSystemMetrics gathers system-level performance metrics func (pp *PerformanceProfiler) collectSystemMetrics() { pp.mutex.Lock() defer pp.mutex.Unlock() var m runtime.MemStats runtime.ReadMemStats(&m) now := time.Now() // Update memory metrics pp.metrics["heap_alloc"] = &PerformanceMetric{ Name: "heap_alloc", Type: "gauge", Value: float64(m.Alloc), Unit: "bytes", Timestamp: now, } pp.metrics["heap_sys"] = &PerformanceMetric{ Name: "heap_sys", Type: "gauge", Value: float64(m.HeapSys), Unit: "bytes", Timestamp: now, } pp.metrics["goroutines"] = &PerformanceMetric{ Name: "goroutines", Type: "gauge", Value: float64(runtime.NumGoroutine()), Unit: "count", Timestamp: now, } pp.metrics["gc_cycles"] = &PerformanceMetric{ Name: "gc_cycles", Type: "counter", Value: float64(m.NumGC), Unit: "count", Timestamp: now, } // Update resource usage pp.resourceUsage = &ResourceUsage{ HeapUsed: m.Alloc, HeapAllocated: m.TotalAlloc, HeapIdle: m.HeapIdle, HeapReleased: m.HeapReleased, StackUsed: m.StackInuse, GCCycles: m.NumGC, Timestamp: now, } // Check system-level alerts pp.checkSystemAlerts() } // checkSystemAlerts monitors system-level performance thresholds func (pp *PerformanceProfiler) checkSystemAlerts() { now := time.Now() // Check memory usage if threshold, exists := pp.thresholds["memory_usage"]; exists { currentMem := float64(pp.resourceUsage.HeapUsed) if currentMem > threshold.Warning { severity := "warning" if currentMem > threshold.Critical { severity = "critical" } alert := PerformanceAlert{ ID: fmt.Sprintf("system_memory_%d", now.Unix()), Type: "memory", Severity: severity, Message: fmt.Sprintf("High system memory usage: %.2f MB", currentMem/1024/1024), Metric: "memory_usage", Value: currentMem, Threshold: threshold.Warning, Timestamp: now, Operation: "system", Context: map[string]interface{}{ "heap_alloc": pp.resourceUsage.HeapUsed, "heap_sys": pp.resourceUsage.HeapAllocated, "gc_cycles": pp.resourceUsage.GCCycles, }, ImpactLevel: pp.calculateImpactLevel(currentMem, threshold.Critical), AffectedOps: []string{"all"}, Recommendations: []string{ "Force garbage collection", "Review memory allocation patterns", "Implement object pooling", "Check for memory leaks", }, } pp.alerts = append(pp.alerts, alert) } } // Check goroutine count if threshold, exists := pp.thresholds["goroutine_count"]; exists { goroutineCount := float64(runtime.NumGoroutine()) if goroutineCount > threshold.Warning { severity := "warning" if goroutineCount > threshold.Critical { severity = "critical" } alert := PerformanceAlert{ ID: fmt.Sprintf("system_goroutines_%d", now.Unix()), Type: "goroutines", Severity: severity, Message: fmt.Sprintf("High goroutine count: %.0f", goroutineCount), Metric: "goroutine_count", Value: goroutineCount, Threshold: threshold.Warning, Timestamp: now, Operation: "system", Context: map[string]interface{}{ "goroutine_count": int(goroutineCount), }, ImpactLevel: pp.calculateImpactLevel(goroutineCount, threshold.Critical), AffectedOps: []string{"all"}, Recommendations: []string{ "Investigate goroutine leaks", "Review concurrent operations", "Implement goroutine pools", "Add proper cleanup in defer statements", }, } pp.alerts = append(pp.alerts, alert) } } } // cleanupOldData removes expired performance data func (pp *PerformanceProfiler) cleanupOldData() { pp.mutex.Lock() defer pp.mutex.Unlock() cutoff := time.Now().Add(-pp.config.RetentionPeriod) // Clean up old alerts activeAlerts := make([]PerformanceAlert, 0) for _, alert := range pp.alerts { if alert.Timestamp.After(cutoff) { activeAlerts = append(activeAlerts, alert) } } pp.alerts = activeAlerts // Clean up old operation timings for operation, timings := range pp.operationTimings { if len(timings) > 100 { // Keep last 100 timings pp.operationTimings[operation] = timings[len(timings)-100:] } } } // GenerateReport creates a comprehensive performance report func (pp *PerformanceProfiler) GenerateReport() (*PerformanceReport, error) { pp.mutex.RLock() defer pp.mutex.RUnlock() now := time.Now() report := &PerformanceReport{ ID: fmt.Sprintf("perf_report_%d", now.Unix()), Timestamp: now, Period: pp.config.ReportInterval, } // Calculate overall health report.OverallHealth, report.HealthScore = pp.calculateOverallHealth() // Get top operations by various metrics report.TopOperations = pp.getTopOperations(10) // Analyze bottlenecks report.Bottlenecks = pp.analyzeBottlenecks() // Generate improvement suggestions report.Improvements = pp.generateImprovementSuggestions() // Resource summary report.ResourceSummary = pp.generateResourceSummary() // Trend analysis report.TrendAnalysis = pp.performTrendAnalysis() // Current alerts report.ActiveAlerts = pp.getActiveAlerts() report.ResolvedAlerts = pp.getResolvedAlerts() // Generate recommendations report.Recommendations = pp.generateRecommendations() report.OptimizationPlan = pp.createOptimizationPlan(report.Recommendations) // Store report pp.reports = append(pp.reports, report) return report, nil } // calculateOverallHealth determines system health and score func (pp *PerformanceProfiler) calculateOverallHealth() (string, float64) { score := 100.0 // Deduct points for performance issues for _, alert := range pp.alerts { switch alert.Severity { case "warning": score -= 5 case "critical": score -= 15 } } // Deduct points for poor performing operations for _, op := range pp.operations { switch op.PerformanceClass { case "poor": score -= 2 case "critical": score -= 5 } } // Ensure score doesn't go below 0 if score < 0 { score = 0 } // Determine health level var health string switch { case score >= 90: health = "excellent" case score >= 80: health = "good" case score >= 60: health = "fair" case score >= 40: health = "poor" default: health = "critical" } return health, score } // getTopOperations returns operations sorted by various performance metrics func (pp *PerformanceProfiler) getTopOperations(limit int) []*OperationProfile { operations := make([]*OperationProfile, 0, len(pp.operations)) for _, op := range pp.operations { operations = append(operations, op) } // Sort by total duration (highest first) sort.Slice(operations, func(i, j int) bool { return operations[i].TotalDuration > operations[j].TotalDuration }) if len(operations) > limit { operations = operations[:limit] } return operations } // analyzeBottlenecks identifies system bottlenecks func (pp *PerformanceProfiler) analyzeBottlenecks() []BottleneckAnalysis { bottlenecks := make([]BottleneckAnalysis, 0) // Check for memory bottlenecks if pp.resourceUsage.HeapUsed > 512*1024*1024 { // > 512MB bottlenecks = append(bottlenecks, BottleneckAnalysis{ Operation: "system", Type: "memory", Severity: "high", Impact: 80.0, Description: "High memory usage detected", Solution: "Implement memory optimization and garbage collection tuning", }) } // Check for goroutine bottlenecks goroutineCount := runtime.NumGoroutine() if goroutineCount > 500 { bottlenecks = append(bottlenecks, BottleneckAnalysis{ Operation: "system", Type: "goroutines", Severity: "medium", Impact: 60.0, Description: fmt.Sprintf("High goroutine count: %d", goroutineCount), Solution: "Implement goroutine pooling and proper lifecycle management", }) } // Check operation-specific bottlenecks for _, op := range pp.operations { if op.PerformanceClass == "critical" || op.PerformanceClass == "poor" { severity := "medium" impact := 50.0 if op.PerformanceClass == "critical" { severity = "high" impact = 75.0 } bottlenecks = append(bottlenecks, BottleneckAnalysis{ Operation: op.Operation, Type: "performance", Severity: severity, Impact: impact, Description: fmt.Sprintf("Operation %s has %s performance", op.Operation, op.PerformanceClass), Solution: "Optimize algorithm and implementation", }) } } return bottlenecks } // generateImprovementSuggestions creates actionable improvement suggestions func (pp *PerformanceProfiler) generateImprovementSuggestions() []ImprovementSuggestion { suggestions := make([]ImprovementSuggestion, 0) // Memory optimization suggestions memUsage := float64(pp.resourceUsage.HeapUsed) / (1024 * 1024) // MB if memUsage > 256 { suggestions = append(suggestions, ImprovementSuggestion{ Area: "memory", Current: memUsage, Target: memUsage * 0.7, Improvement: 30.0, Effort: "medium", Priority: "high", Description: "Reduce memory usage through optimization", }) } // Performance optimization for slow operations for _, op := range pp.operations { if op.PerformanceClass == "poor" || op.PerformanceClass == "critical" { avgMs := float64(op.AverageTime.Nanoseconds()) / 1000000 target := avgMs * 0.5 // 50% improvement suggestions = append(suggestions, ImprovementSuggestion{ Area: fmt.Sprintf("operation_%s", op.Operation), Current: avgMs, Target: target, Improvement: 50.0, Effort: "high", Priority: "high", Description: fmt.Sprintf("Optimize %s operation performance", op.Operation), }) } } return suggestions } // generateResourceSummary creates resource efficiency summary func (pp *PerformanceProfiler) generateResourceSummary() *ResourceSummary { // Calculate efficiency scores (0-100) memEfficiency := pp.calculateMemoryEfficiency() cpuEfficiency := pp.calculateCPUEfficiency() gcEfficiency := pp.calculateGCEfficiency() throughputScore := pp.calculateThroughputScore() return &ResourceSummary{ MemoryEfficiency: memEfficiency, CPUEfficiency: cpuEfficiency, GCEfficiency: gcEfficiency, ThroughputScore: throughputScore, } } // calculateMemoryEfficiency determines memory usage efficiency func (pp *PerformanceProfiler) calculateMemoryEfficiency() float64 { // Simple heuristic: lower memory usage relative to system capacity = higher efficiency maxReasonable := float64(512 * 1024 * 1024) // 512MB current := float64(pp.resourceUsage.HeapUsed) if current > maxReasonable { return 100.0 - ((current-maxReasonable)/maxReasonable)*100.0 } return 100.0 - (current/maxReasonable)*30.0 // Use up to 30% penalty for reasonable usage } // calculateCPUEfficiency determines CPU usage efficiency func (pp *PerformanceProfiler) calculateCPUEfficiency() float64 { // Simplified calculation based on operation performance totalOps := len(pp.operations) if totalOps == 0 { return 100.0 } goodOps := 0 for _, op := range pp.operations { if op.PerformanceClass == "excellent" || op.PerformanceClass == "good" { goodOps++ } } return float64(goodOps) / float64(totalOps) * 100.0 } // calculateGCEfficiency determines garbage collection efficiency func (pp *PerformanceProfiler) calculateGCEfficiency() float64 { // High GC cycles relative to allocation might indicate inefficiency // This is a simplified heuristic if pp.resourceUsage.GCCycles == 0 { return 100.0 } // Lower GC frequency for higher allocations = better efficiency allocations := float64(pp.resourceUsage.HeapAllocated) gcCycles := float64(pp.resourceUsage.GCCycles) ratio := allocations / (gcCycles * 1024 * 1024) // MB per GC cycle switch { case ratio > 100: return 100.0 case ratio > 50: return 90.0 case ratio > 20: return 75.0 case ratio > 10: return 60.0 default: return 40.0 } } // calculateThroughputScore determines overall throughput score func (pp *PerformanceProfiler) calculateThroughputScore() float64 { if len(pp.operations) == 0 { return 100.0 } totalScore := 0.0 for _, op := range pp.operations { switch op.PerformanceClass { case "excellent": totalScore += 100.0 case "good": totalScore += 80.0 case "average": totalScore += 60.0 case "poor": totalScore += 40.0 case "critical": totalScore += 20.0 } } return totalScore / float64(len(pp.operations)) } // performTrendAnalysis analyzes performance trends func (pp *PerformanceProfiler) performTrendAnalysis() *PerformanceTrends { // Simplified trend analysis - in production, this would analyze historical data trends := &PerformanceTrends{ MemoryTrend: "stable", CPUTrend: "stable", ThroughputTrend: "stable", ErrorRateTrend: "stable", PredictedIssues: make([]string, 0), } // Check for concerning patterns activeAlertCount := len(pp.getActiveAlerts()) if activeAlertCount > 5 { trends.PredictedIssues = append(trends.PredictedIssues, "High alert volume may indicate system stress") } // Check memory growth trend if pp.resourceUsage.HeapUsed > 256*1024*1024 { trends.MemoryTrend = "increasing" trends.PredictedIssues = append(trends.PredictedIssues, "Memory usage trending upward") } return trends } // getActiveAlerts returns currently active alerts func (pp *PerformanceProfiler) getActiveAlerts() []PerformanceAlert { active := make([]PerformanceAlert, 0) for _, alert := range pp.alerts { if !alert.Resolved { active = append(active, alert) } } return active } // getResolvedAlerts returns recently resolved alerts func (pp *PerformanceProfiler) getResolvedAlerts() []PerformanceAlert { resolved := make([]PerformanceAlert, 0) for _, alert := range pp.alerts { if alert.Resolved { resolved = append(resolved, alert) } } return resolved } // generateRecommendations creates performance recommendations func (pp *PerformanceProfiler) generateRecommendations() []PerformanceRecommendation { recommendations := make([]PerformanceRecommendation, 0) // Memory recommendations if pp.resourceUsage.HeapUsed > 256*1024*1024 { recommendations = append(recommendations, PerformanceRecommendation{ Type: "immediate", Priority: "high", Category: "memory", Title: "Optimize Memory Usage", Description: "High memory usage detected. Consider implementing object pooling and optimizing data structures.", Implementation: "Add object pools for frequently allocated objects, review string concatenation, optimize slice allocations", ExpectedGain: 25.0, Effort: "medium", }) } // Performance recommendations for slow operations for _, op := range pp.operations { if op.PerformanceClass == "poor" || op.PerformanceClass == "critical" { recommendations = append(recommendations, PerformanceRecommendation{ Type: "short_term", Priority: "high", Category: "algorithm", Title: fmt.Sprintf("Optimize %s Operation", op.Operation), Description: fmt.Sprintf("Operation %s has %s performance with average time %v", op.Operation, op.PerformanceClass, op.AverageTime), Implementation: "Review algorithm complexity, add caching, implement parallel processing where appropriate", ExpectedGain: 40.0, Effort: "high", }) } } // Goroutine recommendations if runtime.NumGoroutine() > 500 { recommendations = append(recommendations, PerformanceRecommendation{ Type: "immediate", Priority: "medium", Category: "architecture", Title: "Implement Goroutine Pooling", Description: "High goroutine count detected. Implement pooling to reduce overhead.", Implementation: "Create worker pools for concurrent operations, add proper goroutine lifecycle management", ExpectedGain: 15.0, Effort: "medium", }) } return recommendations } // createOptimizationPlan creates a phased optimization plan func (pp *PerformanceProfiler) createOptimizationPlan(recommendations []PerformanceRecommendation) *OptimizationPlan { plan := &OptimizationPlan{ Phase1: make([]PerformanceRecommendation, 0), Phase2: make([]PerformanceRecommendation, 0), Phase3: make([]PerformanceRecommendation, 0), TotalGain: 0.0, Timeline: 3 * time.Hour, // 3 hours for all phases } // Categorize recommendations by type for _, rec := range recommendations { plan.TotalGain += rec.ExpectedGain switch rec.Type { case "immediate": plan.Phase1 = append(plan.Phase1, rec) case "short_term": plan.Phase2 = append(plan.Phase2, rec) case "long_term": plan.Phase3 = append(plan.Phase3, rec) } } return plan } // ExportMetrics exports current metrics in various formats func (pp *PerformanceProfiler) ExportMetrics(format string) ([]byte, error) { pp.mutex.RLock() defer pp.mutex.RUnlock() switch format { case "json": return json.MarshalIndent(pp.metrics, "", " ") case "prometheus": return pp.exportPrometheusMetrics(), nil default: return nil, fmt.Errorf("unsupported export format: %s", format) } } // exportPrometheusMetrics exports metrics in Prometheus format func (pp *PerformanceProfiler) exportPrometheusMetrics() []byte { output := []string{ "# HELP mev_bot_performance_metrics Performance metrics for MEV bot", "# TYPE mev_bot_performance_metrics gauge", } for _, metric := range pp.metrics { line := fmt.Sprintf("mev_bot_%s{type=\"%s\",unit=\"%s\"} %f %d", metric.Name, metric.Type, metric.Unit, metric.Value, metric.Timestamp.Unix()) output = append(output, line) } return []byte(fmt.Sprintf("%s\n", output)) } // Stop gracefully shuts down the performance profiler func (pp *PerformanceProfiler) Stop() error { pp.cancel() // Generate final report finalReport, err := pp.GenerateReport() if err != nil { pp.logger.Error("Failed to generate final performance report", "error", err) return err } pp.logger.Info("Performance profiler stopped", "final_health", finalReport.OverallHealth, "health_score", finalReport.HealthScore, "total_operations", len(pp.operations), "active_alerts", len(pp.getActiveAlerts())) return nil }