feat: create v2-prep branch with comprehensive planning

Restructured project for V2 refactor: **Structure Changes:** - Moved all V1 code to orig/ folder (preserved with git mv) - Created docs/planning/ directory - Added orig/README_V1.md explaining V1 preservation **Planning Documents:** - 00_V2_MASTER_PLAN.md: Complete architecture overview - Executive summary of critical V1 issues - High-level component architecture diagrams - 5-phase implementation roadmap - Success metrics and risk mitigation - 07_TASK_BREAKDOWN.md: Atomic task breakdown - 99+ hours of detailed tasks - Every task < 2 hours (atomic) - Clear dependencies and success criteria - Organized by implementation phase **V2 Key Improvements:** - Per-exchange parsers (factory pattern) - Multi-layer strict validation - Multi-index pool cache - Background validation pipeline - Comprehensive observability **Critical Issues Addressed:** - Zero address tokens (strict validation + cache enrichment) - Parsing accuracy (protocol-specific parsers) - No audit trail (background validation channel) - Inefficient lookups (multi-index cache) - Stats disconnection (event-driven metrics) Next Steps: 1. Review planning documents 2. Begin Phase 1: Foundation (P1-001 through P1-010) 3. Implement parsers in Phase 2 4. Build cache system in Phase 3 5. Add validation pipeline in Phase 4 6. Migrate and test in Phase 5 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-10 10:14:26 +01:00
parent 1773daffe7
commit 803de231ba
411 changed files with 20390 additions and 8680 deletions
--- a/orig/pkg/performance/optimizer.go
+++ b/orig/pkg/performance/optimizer.go
@@ -0,0 +1,502 @@
+package performance
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/fraktal/mev-beta/internal/logger"
+)
+
+// PerformanceOptimizer implements various performance optimization strategies
+type PerformanceOptimizer struct {
+	logger *logger.Logger
+
+	// Connection pooling
+	connectionPools map[string]*ConnectionPool
+	poolMutex       sync.RWMutex
+
+	// Adaptive worker scaling
+	workerManager *AdaptiveWorkerManager
+
+	// Smart caching
+	cacheManager *SmartCacheManager
+
+	// Metrics collection
+	metrics *PerformanceMetrics
+}
+
+// ConnectionPool manages a pool of reusable connections
+type ConnectionPool struct {
+	connections chan interface{}
+	maxSize     int
+	currentSize int
+	factory     func() (interface{}, error)
+	cleanup     func(interface{}) error
+	mutex       sync.Mutex
+}
+
+// AdaptiveWorkerManager manages dynamic worker scaling
+type AdaptiveWorkerManager struct {
+	currentWorkers     int
+	maxWorkers         int
+	minWorkers         int
+	targetLatency      time.Duration
+	scaleUpThreshold   float64
+	scaleDownThreshold float64
+	lastScaleAction    time.Time
+	cooldownPeriod     time.Duration
+	metrics            *WorkerMetrics
+	mutex              sync.RWMutex
+}
+
+// SmartCacheManager implements intelligent caching with TTL and invalidation
+type SmartCacheManager struct {
+	caches map[string]*CacheInstance
+	mutex  sync.RWMutex
+}
+
+// CacheInstance represents a single cache with TTL and size limits
+type CacheInstance struct {
+	data       map[string]*CacheEntry
+	maxSize    int
+	defaultTTL time.Duration
+	hits       uint64
+	misses     uint64
+	mutex      sync.RWMutex
+}
+
+// CacheEntry represents a cached item
+type CacheEntry struct {
+	value       interface{}
+	expiry      time.Time
+	lastAccess  time.Time
+	accessCount uint64
+}
+
+// PerformanceMetrics tracks various performance metrics
+type PerformanceMetrics struct {
+	TotalRequests      uint64
+	SuccessfulRequests uint64
+	FailedRequests     uint64
+	AverageLatency     time.Duration
+	P95Latency         time.Duration
+	P99Latency         time.Duration
+	CacheHitRatio      float64
+	ActiveConnections  int
+	ActiveWorkers      int
+	mutex              sync.RWMutex
+}
+
+// WorkerMetrics tracks worker performance
+type WorkerMetrics struct {
+	TasksProcessed    uint64
+	AverageTaskTime   time.Duration
+	QueueSize         int
+	WorkerUtilization float64
+	mutex             sync.RWMutex
+}
+
+// NewPerformanceOptimizer creates a new performance optimizer
+func NewPerformanceOptimizer(logger *logger.Logger) *PerformanceOptimizer {
+	return &PerformanceOptimizer{
+		logger:          logger,
+		connectionPools: make(map[string]*ConnectionPool),
+		workerManager:   NewAdaptiveWorkerManager(10, 100, 2, 100*time.Millisecond),
+		cacheManager:    NewSmartCacheManager(),
+		metrics:         &PerformanceMetrics{},
+	}
+}
+
+// NewConnectionPool creates a new connection pool
+func NewConnectionPool(maxSize int, factory func() (interface{}, error), cleanup func(interface{}) error) *ConnectionPool {
+	return &ConnectionPool{
+		connections: make(chan interface{}, maxSize),
+		maxSize:     maxSize,
+		factory:     factory,
+		cleanup:     cleanup,
+	}
+}
+
+// Get retrieves a connection from the pool
+func (cp *ConnectionPool) Get() (interface{}, error) {
+	select {
+	case conn := <-cp.connections:
+		return conn, nil
+	default:
+		// No available connections, create new one
+		cp.mutex.Lock()
+		defer cp.mutex.Unlock()
+
+		if cp.currentSize < cp.maxSize {
+			conn, err := cp.factory()
+			if err != nil {
+				return nil, err
+			}
+			cp.currentSize++
+			return conn, nil
+		}
+
+		// Pool is full, wait for available connection
+		return <-cp.connections, nil
+	}
+}
+
+// Put returns a connection to the pool
+func (cp *ConnectionPool) Put(conn interface{}) {
+	select {
+	case cp.connections <- conn:
+		// Successfully returned to pool
+	default:
+		// Pool is full, cleanup the connection
+		if cp.cleanup != nil {
+			cp.cleanup(conn)
+		}
+		cp.mutex.Lock()
+		cp.currentSize--
+		cp.mutex.Unlock()
+	}
+}
+
+// NewAdaptiveWorkerManager creates a new adaptive worker manager
+func NewAdaptiveWorkerManager(current, max, min int, targetLatency time.Duration) *AdaptiveWorkerManager {
+	return &AdaptiveWorkerManager{
+		currentWorkers:     current,
+		maxWorkers:         max,
+		minWorkers:         min,
+		targetLatency:      targetLatency,
+		scaleUpThreshold:   1.5, // Scale up if latency > 1.5x target
+		scaleDownThreshold: 0.7, // Scale down if latency < 0.7x target
+		cooldownPeriod:     30 * time.Second,
+		metrics:            &WorkerMetrics{},
+	}
+}
+
+// AdjustWorkerCount adjusts the number of workers based on current performance
+func (awm *AdaptiveWorkerManager) AdjustWorkerCount(currentLatency time.Duration, queueSize int) int {
+	awm.mutex.Lock()
+	defer awm.mutex.Unlock()
+
+	// Check cooldown period
+	if time.Since(awm.lastScaleAction) < awm.cooldownPeriod {
+		return awm.currentWorkers
+	}
+
+	latencyRatio := float64(currentLatency) / float64(awm.targetLatency)
+
+	// Scale up if latency is too high or queue is building up
+	if latencyRatio > awm.scaleUpThreshold || queueSize > awm.currentWorkers*2 {
+		if awm.currentWorkers < awm.maxWorkers {
+			newCount := awm.currentWorkers + (awm.currentWorkers / 4) // Increase by 25%
+			if newCount > awm.maxWorkers {
+				newCount = awm.maxWorkers
+			}
+			awm.currentWorkers = newCount
+			awm.lastScaleAction = time.Now()
+			return newCount
+		}
+	}
+
+	// Scale down if latency is too low and queue is empty
+	if latencyRatio < awm.scaleDownThreshold && queueSize == 0 {
+		if awm.currentWorkers > awm.minWorkers {
+			newCount := awm.currentWorkers - (awm.currentWorkers / 6) // Decrease by ~16%
+			if newCount < awm.minWorkers {
+				newCount = awm.minWorkers
+			}
+			awm.currentWorkers = newCount
+			awm.lastScaleAction = time.Now()
+			return newCount
+		}
+	}
+
+	return awm.currentWorkers
+}
+
+// NewSmartCacheManager creates a new smart cache manager
+func NewSmartCacheManager() *SmartCacheManager {
+	return &SmartCacheManager{
+		caches: make(map[string]*CacheInstance),
+	}
+}
+
+// GetCache retrieves or creates a cache instance
+func (scm *SmartCacheManager) GetCache(name string, maxSize int, defaultTTL time.Duration) *CacheInstance {
+	scm.mutex.RLock()
+	if cache, exists := scm.caches[name]; exists {
+		scm.mutex.RUnlock()
+		return cache
+	}
+	scm.mutex.RUnlock()
+
+	scm.mutex.Lock()
+	defer scm.mutex.Unlock()
+
+	// Double-check after acquiring write lock
+	if cache, exists := scm.caches[name]; exists {
+		return cache
+	}
+
+	cache := &CacheInstance{
+		data:       make(map[string]*CacheEntry),
+		maxSize:    maxSize,
+		defaultTTL: defaultTTL,
+	}
+	scm.caches[name] = cache
+
+	// Start cleanup routine for this cache
+	go cache.startCleanup()
+
+	return cache
+}
+
+// Get retrieves a value from the cache
+func (ci *CacheInstance) Get(key string) (interface{}, bool) {
+	ci.mutex.RLock()
+	defer ci.mutex.RUnlock()
+
+	entry, exists := ci.data[key]
+	if !exists {
+		ci.misses++
+		return nil, false
+	}
+
+	// Check if expired
+	if time.Now().After(entry.expiry) {
+		ci.mutex.RUnlock()
+		ci.mutex.Lock()
+		delete(ci.data, key)
+		ci.mutex.Unlock()
+		ci.mutex.RLock()
+		ci.misses++
+		return nil, false
+	}
+
+	// Update access statistics
+	entry.lastAccess = time.Now()
+	entry.accessCount++
+	ci.hits++
+
+	return entry.value, true
+}
+
+// Set stores a value in the cache
+func (ci *CacheInstance) Set(key string, value interface{}) {
+	ci.SetWithTTL(key, value, ci.defaultTTL)
+}
+
+// SetWithTTL stores a value with custom TTL
+func (ci *CacheInstance) SetWithTTL(key string, value interface{}, ttl time.Duration) {
+	ci.mutex.Lock()
+	defer ci.mutex.Unlock()
+
+	// Check if we need to evict items
+	if len(ci.data) >= ci.maxSize {
+		ci.evictLRU()
+	}
+
+	ci.data[key] = &CacheEntry{
+		value:       value,
+		expiry:      time.Now().Add(ttl),
+		lastAccess:  time.Now(),
+		accessCount: 1,
+	}
+}
+
+// evictLRU evicts the least recently used item
+func (ci *CacheInstance) evictLRU() {
+	var oldestKey string
+	var oldestTime time.Time
+
+	for key, entry := range ci.data {
+		if oldestKey == "" || entry.lastAccess.Before(oldestTime) {
+			oldestKey = key
+			oldestTime = entry.lastAccess
+		}
+	}
+
+	if oldestKey != "" {
+		delete(ci.data, oldestKey)
+	}
+}
+
+// startCleanup starts the cleanup routine for expired entries
+func (ci *CacheInstance) startCleanup() {
+	ticker := time.NewTicker(5 * time.Minute)
+	defer ticker.Stop()
+
+	for range ticker.C {
+		ci.cleanupExpired()
+	}
+}
+
+// cleanupExpired removes expired entries
+func (ci *CacheInstance) cleanupExpired() {
+	ci.mutex.Lock()
+	defer ci.mutex.Unlock()
+
+	now := time.Now()
+	for key, entry := range ci.data {
+		if now.After(entry.expiry) {
+			delete(ci.data, key)
+		}
+	}
+}
+
+// GetHitRatio returns the cache hit ratio
+func (ci *CacheInstance) GetHitRatio() float64 {
+	ci.mutex.RLock()
+	defer ci.mutex.RUnlock()
+
+	total := ci.hits + ci.misses
+	if total == 0 {
+		return 0
+	}
+	return float64(ci.hits) / float64(total)
+}
+
+// OptimizeForRealTime implements real-time processing optimizations
+func (po *PerformanceOptimizer) OptimizeForRealTime(ctx context.Context) {
+	// Create connection pools for RPC endpoints
+	po.createRPCConnectionPools()
+
+	// Start adaptive worker management
+	go po.manageWorkerAdaptation(ctx)
+
+	// Start cache warming
+	go po.warmCaches(ctx)
+
+	// Start metrics collection
+	go po.collectMetrics(ctx)
+
+	po.logger.Info("Performance optimization started for real-time processing")
+}
+
+// createRPCConnectionPools creates connection pools for RPC endpoints
+func (po *PerformanceOptimizer) createRPCConnectionPools() {
+	// Create pool for Arbitrum RPC connections
+	arbitrumPool := NewConnectionPool(
+		20, // Max 20 connections
+		func() (interface{}, error) {
+			// Factory function to create new RPC connection
+			// In production, this would create an actual ethclient connection
+			return "rpc_connection", nil
+		},
+		func(conn interface{}) error {
+			// Cleanup function to close connection
+			return nil
+		},
+	)
+
+	po.poolMutex.Lock()
+	po.connectionPools["arbitrum_rpc"] = arbitrumPool
+	po.poolMutex.Unlock()
+
+	po.logger.Info("Created RPC connection pools")
+}
+
+// manageWorkerAdaptation manages adaptive worker scaling
+func (po *PerformanceOptimizer) manageWorkerAdaptation(ctx context.Context) {
+	ticker := time.NewTicker(10 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			// Get current metrics
+			currentLatency := po.metrics.AverageLatency
+			queueSize := 0 // This would be obtained from actual queue
+
+			// Adjust worker count
+			newWorkerCount := po.workerManager.AdjustWorkerCount(currentLatency, queueSize)
+
+			po.logger.Debug(fmt.Sprintf("Adaptive worker scaling: %d workers (latency: %v, queue: %d)",
+				newWorkerCount, currentLatency, queueSize))
+
+		case <-ctx.Done():
+			return
+		}
+	}
+}
+
+// warmCaches preloads frequently accessed data into caches
+func (po *PerformanceOptimizer) warmCaches(ctx context.Context) {
+	poolCache := po.cacheManager.GetCache("pools", 1000, 5*time.Minute)
+
+	// Warm up with common pool addresses
+	commonPools := []string{
+		"0x88e6A0c2dDD26FEEb64F039a2c41296FcB3f5640", // USDC/WETH V3
+		"0xB4e16d0168e52d35CaCD2c6185b44281Ec28C9Dc", // USDC/WETH V2
+		"0x17c14D2c404D167802b16C450d3c99F88F2c4F4d", // USDC/WETH V3 0.3%
+	}
+
+	for _, pool := range commonPools {
+		// In production, this would fetch real pool data
+		poolCache.Set(pool, map[string]interface{}{
+			"warmed":    true,
+			"timestamp": time.Now(),
+		})
+	}
+
+	po.logger.Info("Cache warming completed")
+}
+
+// collectMetrics collects and reports performance metrics
+func (po *PerformanceOptimizer) collectMetrics(ctx context.Context) {
+	ticker := time.NewTicker(30 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			po.reportMetrics()
+		case <-ctx.Done():
+			return
+		}
+	}
+}
+
+// reportMetrics reports current performance metrics
+func (po *PerformanceOptimizer) reportMetrics() {
+	po.metrics.mutex.RLock()
+	defer po.metrics.mutex.RUnlock()
+
+	// Calculate cache hit ratios
+	totalHitRatio := 0.0
+	cacheCount := 0
+
+	po.cacheManager.mutex.RLock()
+	for name, cache := range po.cacheManager.caches {
+		hitRatio := cache.GetHitRatio()
+		totalHitRatio += hitRatio
+		cacheCount++
+
+		po.logger.Debug(fmt.Sprintf("Cache %s hit ratio: %.2f%%", name, hitRatio*100))
+	}
+	po.cacheManager.mutex.RUnlock()
+
+	if cacheCount > 0 {
+		po.metrics.CacheHitRatio = totalHitRatio / float64(cacheCount)
+	}
+
+	po.logger.Info(fmt.Sprintf("🚀 PERFORMANCE METRICS:"))
+	po.logger.Info(fmt.Sprintf("  Average Latency: %v", po.metrics.AverageLatency))
+	po.logger.Info(fmt.Sprintf("  Cache Hit Ratio: %.2f%%", po.metrics.CacheHitRatio*100))
+	po.logger.Info(fmt.Sprintf("  Active Workers: %d", po.workerManager.currentWorkers))
+	po.logger.Info(fmt.Sprintf("  Total Requests: %d", po.metrics.TotalRequests))
+	po.logger.Info(fmt.Sprintf("  Success Rate: %.2f%%",
+		float64(po.metrics.SuccessfulRequests)/float64(po.metrics.TotalRequests)*100))
+}
+
+// GetConnectionPool retrieves a connection pool by name
+func (po *PerformanceOptimizer) GetConnectionPool(name string) *ConnectionPool {
+	po.poolMutex.RLock()
+	defer po.poolMutex.RUnlock()
+	return po.connectionPools[name]
+}
+
+// GetCache retrieves a cache instance
+func (po *PerformanceOptimizer) GetCache(name string, maxSize int, defaultTTL time.Duration) *CacheInstance {
+	return po.cacheManager.GetCache(name, maxSize, defaultTTL)
+}