Files
mev-beta/pkg/transport/benchmarks.go
Krypto Kajun 850223a953 fix(multicall): resolve critical multicall parsing corruption issues
- Added comprehensive bounds checking to prevent buffer overruns in multicall parsing
- Implemented graduated validation system (Strict/Moderate/Permissive) to reduce false positives
- Added LRU caching system for address validation with 10-minute TTL
- Enhanced ABI decoder with missing Universal Router and Arbitrum-specific DEX signatures
- Fixed duplicate function declarations and import conflicts across multiple files
- Added error recovery mechanisms with multiple fallback strategies
- Updated tests to handle new validation behavior for suspicious addresses
- Fixed parser test expectations for improved validation system
- Applied gofmt formatting fixes to ensure code style compliance
- Fixed mutex copying issues in monitoring package by introducing MetricsSnapshot
- Resolved critical security vulnerabilities in heuristic address extraction
- Progress: Updated TODO audit from 10% to 35% complete

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:12:55 -05:00

674 lines
21 KiB
Go

package transport
import (
"context"
"fmt"
"math"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/fraktal/mev-beta/internal/logger"
"github.com/fraktal/mev-beta/pkg/security"
)
// BenchmarkSuite provides comprehensive performance testing for the transport layer
type BenchmarkSuite struct {
logger *logger.Logger
messageBus *UniversalMessageBus
results []BenchmarkResult
config BenchmarkConfig
metrics BenchmarkMetrics
mu sync.RWMutex
}
// BenchmarkConfig configures benchmark parameters
type BenchmarkConfig struct {
MessageSizes []int // Message payload sizes to test
Concurrency []int // Concurrency levels to test
Duration time.Duration // Duration of each benchmark
WarmupDuration time.Duration // Warmup period before measurements
TransportTypes []TransportType // Transport types to benchmark
MessageTypes []MessageType // Message types to test
SerializationFormats []SerializationFormat // Serialization formats to test
EnableMetrics bool // Whether to collect detailed metrics
OutputFormat string // Output format (json, csv, console)
}
// BenchmarkResult contains results from a single benchmark run
type BenchmarkResult struct {
TestName string `json:"test_name"`
Transport TransportType `json:"transport"`
MessageSize int `json:"message_size"`
Concurrency int `json:"concurrency"`
Serialization SerializationFormat `json:"serialization"`
Duration time.Duration `json:"duration"`
MessagesSent int64 `json:"messages_sent"`
MessagesReceived int64 `json:"messages_received"`
BytesSent int64 `json:"bytes_sent"`
BytesReceived int64 `json:"bytes_received"`
ThroughputMsgSec float64 `json:"throughput_msg_sec"`
ThroughputByteSec float64 `json:"throughput_byte_sec"`
LatencyP50 time.Duration `json:"latency_p50"`
LatencyP95 time.Duration `json:"latency_p95"`
LatencyP99 time.Duration `json:"latency_p99"`
ErrorRate float64 `json:"error_rate"`
CPUUsage float64 `json:"cpu_usage"`
MemoryUsage int64 `json:"memory_usage"`
GCPauses int64 `json:"gc_pauses"`
Timestamp time.Time `json:"timestamp"`
}
// BenchmarkMetrics tracks overall benchmark statistics
type BenchmarkMetrics struct {
TotalTests int `json:"total_tests"`
PassedTests int `json:"passed_tests"`
FailedTests int `json:"failed_tests"`
TotalDuration time.Duration `json:"total_duration"`
HighestThroughput float64 `json:"highest_throughput"`
LowestLatency time.Duration `json:"lowest_latency"`
BestTransport TransportType `json:"best_transport"`
Timestamp time.Time `json:"timestamp"`
}
// LatencyTracker tracks message latencies
type LatencyTracker struct {
latencies []time.Duration
mu sync.Mutex
}
// NewBenchmarkSuite creates a new benchmark suite
func NewBenchmarkSuite(messageBus *UniversalMessageBus, logger *logger.Logger) *BenchmarkSuite {
return &BenchmarkSuite{
logger: logger,
messageBus: messageBus,
results: make([]BenchmarkResult, 0),
config: BenchmarkConfig{
MessageSizes: []int{64, 256, 1024, 4096, 16384},
Concurrency: []int{1, 10, 50, 100},
Duration: 30 * time.Second,
WarmupDuration: 5 * time.Second,
TransportTypes: []TransportType{TransportMemory, TransportUnixSocket, TransportTCP},
MessageTypes: []MessageType{MessageTypeEvent, MessageTypeCommand},
SerializationFormats: []SerializationFormat{SerializationJSON},
EnableMetrics: true,
OutputFormat: "console",
},
}
}
// SetConfig updates the benchmark configuration
func (bs *BenchmarkSuite) SetConfig(config BenchmarkConfig) {
bs.mu.Lock()
defer bs.mu.Unlock()
bs.config = config
}
// RunAll executes all benchmark tests
func (bs *BenchmarkSuite) RunAll(ctx context.Context) error {
bs.mu.Lock()
defer bs.mu.Unlock()
startTime := time.Now()
bs.metrics = BenchmarkMetrics{
Timestamp: startTime,
}
for _, transport := range bs.config.TransportTypes {
for _, msgSize := range bs.config.MessageSizes {
for _, concurrency := range bs.config.Concurrency {
for _, serialization := range bs.config.SerializationFormats {
result, err := bs.runSingleBenchmark(ctx, transport, msgSize, concurrency, serialization)
if err != nil {
bs.metrics.FailedTests++
continue
}
bs.results = append(bs.results, result)
bs.metrics.PassedTests++
bs.updateBestMetrics(result)
}
}
}
}
bs.metrics.TotalTests = bs.metrics.PassedTests + bs.metrics.FailedTests
bs.metrics.TotalDuration = time.Since(startTime)
return nil
}
// RunThroughputBenchmark tests message throughput
func (bs *BenchmarkSuite) RunThroughputBenchmark(ctx context.Context, transport TransportType, messageSize int, concurrency int) (BenchmarkResult, error) {
return bs.runSingleBenchmark(ctx, transport, messageSize, concurrency, SerializationJSON)
}
// RunLatencyBenchmark tests message latency
func (bs *BenchmarkSuite) RunLatencyBenchmark(ctx context.Context, transport TransportType, messageSize int) (BenchmarkResult, error) {
return bs.runSingleBenchmark(ctx, transport, messageSize, 1, SerializationJSON)
}
// RunScalabilityBenchmark tests scalability across different concurrency levels
func (bs *BenchmarkSuite) RunScalabilityBenchmark(ctx context.Context, transport TransportType, messageSize int) ([]BenchmarkResult, error) {
var results []BenchmarkResult
for _, concurrency := range bs.config.Concurrency {
result, err := bs.runSingleBenchmark(ctx, transport, messageSize, concurrency, SerializationJSON)
if err != nil {
return nil, fmt.Errorf("scalability benchmark failed at concurrency %d: %w", concurrency, err)
}
results = append(results, result)
}
return results, nil
}
// GetResults returns all benchmark results
func (bs *BenchmarkSuite) GetResults() []BenchmarkResult {
bs.mu.RLock()
defer bs.mu.RUnlock()
results := make([]BenchmarkResult, len(bs.results))
copy(results, bs.results)
return results
}
// GetMetrics returns benchmark metrics
func (bs *BenchmarkSuite) GetMetrics() BenchmarkMetrics {
bs.mu.RLock()
defer bs.mu.RUnlock()
return bs.metrics
}
// GetBestPerformingTransport returns the transport with the highest throughput
func (bs *BenchmarkSuite) GetBestPerformingTransport() TransportType {
bs.mu.RLock()
defer bs.mu.RUnlock()
return bs.metrics.BestTransport
}
// Private methods
func (bs *BenchmarkSuite) runSingleBenchmark(ctx context.Context, transport TransportType, messageSize int, concurrency int, serialization SerializationFormat) (BenchmarkResult, error) {
testName := fmt.Sprintf("%s_%db_%dc_%s", transport, messageSize, concurrency, serialization)
result := BenchmarkResult{
TestName: testName,
Transport: transport,
MessageSize: messageSize,
Concurrency: concurrency,
Serialization: serialization,
Duration: bs.config.Duration,
Timestamp: time.Now(),
}
// Setup test environment
latencyTracker := &LatencyTracker{
latencies: make([]time.Duration, 0),
}
// Create test topic
topic := fmt.Sprintf("benchmark_%s", testName)
// Subscribe to topic
subscription, err := bs.messageBus.Subscribe(topic, func(ctx context.Context, msg *Message) error {
if startTime, ok := msg.Metadata["start_time"].(time.Time); ok {
latency := time.Since(startTime)
latencyTracker.AddLatency(latency)
}
atomic.AddInt64(&result.MessagesReceived, 1)
atomic.AddInt64(&result.BytesReceived, int64(messageSize))
return nil
})
if err != nil {
return result, fmt.Errorf("failed to subscribe: %w", err)
}
defer bs.messageBus.Unsubscribe(subscription.ID)
// Warmup phase
if bs.config.WarmupDuration > 0 {
bs.warmup(ctx, topic, messageSize, concurrency, bs.config.WarmupDuration)
}
// Start system monitoring
var cpuUsage float64
var memUsageBefore, memUsageAfter runtime.MemStats
runtime.ReadMemStats(&memUsageBefore)
monitorCtx, monitorCancel := context.WithCancel(ctx)
defer monitorCancel()
go bs.monitorSystemResources(monitorCtx, &cpuUsage)
// Main benchmark
startTime := time.Now()
benchmarkCtx, cancel := context.WithTimeout(ctx, bs.config.Duration)
defer cancel()
// Launch concurrent senders
var wg sync.WaitGroup
var totalSent int64
var totalErrors int64
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
bs.senderWorker(benchmarkCtx, topic, messageSize, &totalSent, &totalErrors)
}()
}
wg.Wait()
// Wait a bit for remaining messages to be processed
time.Sleep(100 * time.Millisecond)
actualDuration := time.Since(startTime)
runtime.ReadMemStats(&memUsageAfter)
// Calculate results
result.MessagesSent = totalSent
result.BytesSent = totalSent * int64(messageSize)
result.ThroughputMsgSec = float64(totalSent) / actualDuration.Seconds()
result.ThroughputByteSec = float64(result.BytesSent) / actualDuration.Seconds()
result.ErrorRate = float64(totalErrors) / float64(totalSent) * 100
result.CPUUsage = cpuUsage
// Calculate memory usage difference safely
memDiff := memUsageAfter.Alloc - memUsageBefore.Alloc
memDiffInt64, err := security.SafeUint64ToInt64(memDiff)
if err != nil {
bs.logger.Warn("Memory usage difference exceeds int64 max", "diff", memDiff, "error", err)
result.MemoryUsage = math.MaxInt64
} else {
result.MemoryUsage = memDiffInt64
}
// Calculate GC pauses difference safely
gcDiff := int64(memUsageAfter.NumGC) - int64(memUsageBefore.NumGC)
result.GCPauses = gcDiff
// Calculate latency percentiles
if len(latencyTracker.latencies) > 0 {
result.LatencyP50 = latencyTracker.GetPercentile(50)
result.LatencyP95 = latencyTracker.GetPercentile(95)
result.LatencyP99 = latencyTracker.GetPercentile(99)
}
return result, nil
}
func (bs *BenchmarkSuite) warmup(ctx context.Context, topic string, messageSize int, concurrency int, duration time.Duration) {
warmupCtx, cancel := context.WithTimeout(ctx, duration)
defer cancel()
var wg sync.WaitGroup
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
var dummy1, dummy2 int64
bs.senderWorker(warmupCtx, topic, messageSize, &dummy1, &dummy2)
}()
}
wg.Wait()
}
func (bs *BenchmarkSuite) senderWorker(ctx context.Context, topic string, messageSize int, totalSent, totalErrors *int64) {
payload := make([]byte, messageSize)
for i := range payload {
payload[i] = byte(i % 256)
}
for {
select {
case <-ctx.Done():
return
default:
msg := NewMessage(MessageTypeEvent, topic, "benchmark", payload)
msg.Metadata["start_time"] = time.Now()
if err := bs.messageBus.Publish(ctx, msg); err != nil {
atomic.AddInt64(totalErrors, 1)
} else {
atomic.AddInt64(totalSent, 1)
}
}
}
}
func (bs *BenchmarkSuite) monitorSystemResources(ctx context.Context, cpuUsage *float64) {
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
var samples []float64
startTime := time.Now()
for {
select {
case <-ctx.Done():
// Calculate average CPU usage
if len(samples) > 0 {
var total float64
for _, sample := range samples {
total += sample
}
*cpuUsage = total / float64(len(samples))
}
return
case <-ticker.C:
// Simple CPU usage estimation based on runtime stats
var stats runtime.MemStats
runtime.ReadMemStats(&stats)
// This is a simplified CPU usage calculation
// In production, you'd want to use proper OS-specific CPU monitoring
elapsed := time.Since(startTime).Seconds()
cpuSample := float64(stats.NumGC) / elapsed * 100 // Rough approximation
if cpuSample > 100 {
cpuSample = 100
}
samples = append(samples, cpuSample)
}
}
}
func (bs *BenchmarkSuite) updateBestMetrics(result BenchmarkResult) {
if result.ThroughputMsgSec > bs.metrics.HighestThroughput {
bs.metrics.HighestThroughput = result.ThroughputMsgSec
bs.metrics.BestTransport = result.Transport
}
if bs.metrics.LowestLatency == 0 || result.LatencyP50 < bs.metrics.LowestLatency {
bs.metrics.LowestLatency = result.LatencyP50
}
}
// LatencyTracker methods
func (lt *LatencyTracker) AddLatency(latency time.Duration) {
lt.mu.Lock()
defer lt.mu.Unlock()
lt.latencies = append(lt.latencies, latency)
}
func (lt *LatencyTracker) GetPercentile(percentile int) time.Duration {
lt.mu.Lock()
defer lt.mu.Unlock()
if len(lt.latencies) == 0 {
return 0
}
// Sort latencies
sorted := make([]time.Duration, len(lt.latencies))
copy(sorted, lt.latencies)
// Simple insertion sort for small datasets
for i := 1; i < len(sorted); i++ {
for j := i; j > 0 && sorted[j] < sorted[j-1]; j-- {
sorted[j], sorted[j-1] = sorted[j-1], sorted[j]
}
}
// Calculate percentile index
index := int(float64(len(sorted)) * float64(percentile) / 100.0)
if index >= len(sorted) {
index = len(sorted) - 1
}
return sorted[index]
}
// Benchmark report generation
// GenerateReport generates a comprehensive benchmark report
func (bs *BenchmarkSuite) GenerateReport() BenchmarkReport {
bs.mu.RLock()
defer bs.mu.RUnlock()
report := BenchmarkReport{
Summary: bs.generateSummary(),
Results: bs.results,
Metrics: bs.metrics,
Config: bs.config,
Timestamp: time.Now(),
}
report.Analysis = bs.generateAnalysis()
return report
}
// BenchmarkReport contains a complete benchmark report
type BenchmarkReport struct {
Summary ReportSummary `json:"summary"`
Results []BenchmarkResult `json:"results"`
Metrics BenchmarkMetrics `json:"metrics"`
Config BenchmarkConfig `json:"config"`
Analysis ReportAnalysis `json:"analysis"`
Timestamp time.Time `json:"timestamp"`
}
// ReportSummary provides a high-level summary
type ReportSummary struct {
TotalTests int `json:"total_tests"`
Duration time.Duration `json:"duration"`
BestThroughput float64 `json:"best_throughput"`
BestLatency time.Duration `json:"best_latency"`
RecommendedTransport TransportType `json:"recommended_transport"`
TransportRankings []TransportRanking `json:"transport_rankings"`
}
// TransportRanking ranks transports by performance
type TransportRanking struct {
Transport TransportType `json:"transport"`
AvgThroughput float64 `json:"avg_throughput"`
AvgLatency time.Duration `json:"avg_latency"`
Score float64 `json:"score"`
Rank int `json:"rank"`
}
// ReportAnalysis provides detailed analysis
type ReportAnalysis struct {
ScalabilityAnalysis ScalabilityAnalysis `json:"scalability"`
PerformanceBottlenecks []PerformanceIssue `json:"bottlenecks"`
Recommendations []Recommendation `json:"recommendations"`
}
// ScalabilityAnalysis analyzes scaling characteristics
type ScalabilityAnalysis struct {
LinearScaling bool `json:"linear_scaling"`
ScalingFactor float64 `json:"scaling_factor"`
OptimalConcurrency int `json:"optimal_concurrency"`
}
// PerformanceIssue identifies performance problems
type PerformanceIssue struct {
Issue string `json:"issue"`
Severity string `json:"severity"`
Impact string `json:"impact"`
Suggestion string `json:"suggestion"`
}
// Recommendation provides optimization suggestions
type Recommendation struct {
Category string `json:"category"`
Description string `json:"description"`
Priority string `json:"priority"`
Expected string `json:"expected_improvement"`
}
func (bs *BenchmarkSuite) generateSummary() ReportSummary {
rankings := bs.calculateTransportRankings()
return ReportSummary{
TotalTests: bs.metrics.TotalTests,
Duration: bs.metrics.TotalDuration,
BestThroughput: bs.metrics.HighestThroughput,
BestLatency: bs.metrics.LowestLatency,
RecommendedTransport: bs.metrics.BestTransport,
TransportRankings: rankings,
}
}
func (bs *BenchmarkSuite) calculateTransportRankings() []TransportRanking {
// Group results by transport
transportStats := make(map[TransportType][]BenchmarkResult)
for _, result := range bs.results {
transportStats[result.Transport] = append(transportStats[result.Transport], result)
}
var rankings []TransportRanking
for transport, results := range transportStats {
var totalThroughput float64
var totalLatency time.Duration
for _, result := range results {
totalThroughput += result.ThroughputMsgSec
totalLatency += result.LatencyP50
}
avgThroughput := totalThroughput / float64(len(results))
avgLatency := totalLatency / time.Duration(len(results))
// Score calculation (higher throughput + lower latency = better score)
score := avgThroughput / float64(avgLatency.Microseconds())
rankings = append(rankings, TransportRanking{
Transport: transport,
AvgThroughput: avgThroughput,
AvgLatency: avgLatency,
Score: score,
})
}
// Sort by score (descending)
for i := 0; i < len(rankings); i++ {
for j := i + 1; j < len(rankings); j++ {
if rankings[j].Score > rankings[i].Score {
rankings[i], rankings[j] = rankings[j], rankings[i]
}
}
}
// Assign ranks
for i := range rankings {
rankings[i].Rank = i + 1
}
return rankings
}
func (bs *BenchmarkSuite) generateAnalysis() ReportAnalysis {
return ReportAnalysis{
ScalabilityAnalysis: bs.analyzeScalability(),
PerformanceBottlenecks: bs.identifyBottlenecks(),
Recommendations: bs.generateRecommendations(),
}
}
func (bs *BenchmarkSuite) analyzeScalability() ScalabilityAnalysis {
if len(bs.results) < 2 {
return ScalabilityAnalysis{
LinearScaling: false,
ScalingFactor: 0.0,
OptimalConcurrency: 1,
}
}
// Analyze throughput vs concurrency relationship
var throughputData []float64
var concurrencyData []int
for _, result := range bs.results {
if result.Concurrency > 0 && result.Duration > 0 {
throughput := float64(result.MessagesReceived) / result.Duration.Seconds()
throughputData = append(throughputData, throughput)
concurrencyData = append(concurrencyData, result.Concurrency)
}
}
if len(throughputData) < 2 {
return ScalabilityAnalysis{
LinearScaling: false,
ScalingFactor: 0.0,
OptimalConcurrency: 1,
}
}
// Calculate scaling efficiency
// Compare actual throughput improvement with ideal linear scaling
maxThroughput := 0.0
maxThroughputConcurrency := 1
baseThroughput := throughputData[0]
baseConcurrency := float64(concurrencyData[0])
for i, throughput := range throughputData {
if throughput > maxThroughput {
maxThroughput = throughput
maxThroughputConcurrency = concurrencyData[i]
}
}
// Calculate scaling factor (actual vs ideal)
idealThroughput := baseThroughput * float64(maxThroughputConcurrency) / baseConcurrency
actualScalingFactor := maxThroughput / idealThroughput
// Determine if scaling is linear (within 20% of ideal)
linearScaling := actualScalingFactor >= 0.8
return ScalabilityAnalysis{
LinearScaling: linearScaling,
ScalingFactor: actualScalingFactor,
OptimalConcurrency: maxThroughputConcurrency,
}
}
func (bs *BenchmarkSuite) identifyBottlenecks() []PerformanceIssue {
var issues []PerformanceIssue
// Analyze results for common performance issues
for _, result := range bs.results {
if result.ErrorRate > 5.0 {
issues = append(issues, PerformanceIssue{
Issue: fmt.Sprintf("High error rate (%0.2f%%) for %s", result.ErrorRate, result.Transport),
Severity: "high",
Impact: "Reduced reliability and performance",
Suggestion: "Check transport configuration and network stability",
})
}
if result.LatencyP99 > 100*time.Millisecond {
issues = append(issues, PerformanceIssue{
Issue: fmt.Sprintf("High P99 latency (%v) for %s", result.LatencyP99, result.Transport),
Severity: "medium",
Impact: "Poor user experience for latency-sensitive operations",
Suggestion: "Consider using faster transport or optimizing message serialization",
})
}
}
return issues
}
func (bs *BenchmarkSuite) generateRecommendations() []Recommendation {
var recommendations []Recommendation
recommendations = append(recommendations, Recommendation{
Category: "Transport Selection",
Description: fmt.Sprintf("Use %s for best overall performance", bs.metrics.BestTransport),
Priority: "high",
Expected: "20-50% improvement in throughput",
})
recommendations = append(recommendations, Recommendation{
Category: "Concurrency",
Description: "Optimize concurrency level based on workload characteristics",
Priority: "medium",
Expected: "10-30% improvement in resource utilization",
})
return recommendations
}