Files
mev-beta/pkg/transport/provider_manager.go
Krypto Kajun c7142ef671 fix(critical): fix empty token graph + aggressive settings for 24h execution
CRITICAL BUG FIX:
- MultiHopScanner.updateTokenGraph() was EMPTY - adding no pools!
- Result: Token graph had 0 pools, found 0 arbitrage paths
- All opportunities showed estimatedProfitETH: 0.000000

FIX APPLIED:
- Populated token graph with 8 high-liquidity Arbitrum pools:
  * WETH/USDC (0.05% and 0.3% fees)
  * USDC/USDC.e (0.01% - common arbitrage)
  * ARB/USDC, WETH/ARB, WETH/USDT
  * WBTC/WETH, LINK/WETH
- These are REAL verified pool addresses with high volume

AGGRESSIVE THRESHOLD CHANGES:
- Min profit: 0.0001 ETH → 0.00001 ETH (10x lower, ~$0.02)
- Min ROI: 0.05% → 0.01% (5x lower)
- Gas multiplier: 5x → 1.5x (3.3x lower safety margin)
- Max slippage: 3% → 5% (67% higher tolerance)
- Max paths: 100 → 200 (more thorough scanning)
- Cache expiry: 2min → 30sec (fresher opportunities)

EXPECTED RESULTS (24h):
- 20-50 opportunities with profit > $0.02 (was 0)
- 5-15 execution attempts (was 0)
- 1-2 successful executions (was 0)
- $0.02-$0.20 net profit (was $0)

WARNING: Aggressive settings may result in some losses
Monitor closely for first 6 hours and adjust if needed

Target: First profitable execution within 24 hours

🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-29 04:18:27 -05:00

561 lines
16 KiB
Go

package transport
import (
"context"
"fmt"
"net/http"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/ethereum/go-ethereum/rpc"
"golang.org/x/time/rate"
"gopkg.in/yaml.v3"
)
// ProviderConfig represents a single RPC provider configuration
type ProviderConfig struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
HTTPEndpoint string `yaml:"http_endpoint"`
WSEndpoint string `yaml:"ws_endpoint"`
Priority int `yaml:"priority"`
RateLimit RateLimitConfig `yaml:"rate_limit"`
Features []string `yaml:"features"`
HealthCheck HealthCheckConfig `yaml:"health_check"`
AnvilConfig *AnvilConfig `yaml:"anvil_config,omitempty"` // For Anvil fork providers
}
// AnvilConfig defines Anvil-specific configuration
type AnvilConfig struct {
ForkURL string `yaml:"fork_url"`
ChainID int `yaml:"chain_id"`
Port int `yaml:"port"`
BlockTime int `yaml:"block_time"`
AutoImpersonate bool `yaml:"auto_impersonate"`
StateInterval int `yaml:"state_interval"`
}
// RateLimitConfig defines rate limiting parameters
type RateLimitConfig struct {
RequestsPerSecond int `yaml:"requests_per_second"`
Burst int `yaml:"burst"`
Timeout time.Duration `yaml:"timeout"`
RetryDelay time.Duration `yaml:"retry_delay"`
MaxRetries int `yaml:"max_retries"`
}
// HealthCheckConfig defines health check parameters
type HealthCheckConfig struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"`
Timeout time.Duration `yaml:"timeout"`
}
// RotationConfig defines provider rotation strategy
type RotationConfig struct {
Strategy string `yaml:"strategy"`
HealthCheckRequired bool `yaml:"health_check_required"`
FallbackEnabled bool `yaml:"fallback_enabled"`
RetryFailedAfter time.Duration `yaml:"retry_failed_after"`
}
// ProviderPoolConfig defines configuration for a provider pool
type ProviderPoolConfig struct {
Strategy string `yaml:"strategy"`
MaxConcurrentConnections int `yaml:"max_concurrent_connections"`
HealthCheckInterval string `yaml:"health_check_interval"`
FailoverEnabled bool `yaml:"failover_enabled"`
Providers []string `yaml:"providers"`
}
// ProvidersConfig represents the complete provider configuration
type ProvidersConfig struct {
ProviderPools map[string]ProviderPoolConfig `yaml:"provider_pools"`
Providers []ProviderConfig `yaml:"providers"`
Rotation RotationConfig `yaml:"rotation"`
GlobalLimits GlobalLimits `yaml:"global_limits"`
Monitoring MonitoringConfig `yaml:"monitoring"`
}
// GlobalLimits defines global connection limits
type GlobalLimits struct {
MaxConcurrentConnections int `yaml:"max_concurrent_connections"`
ConnectionTimeout time.Duration `yaml:"connection_timeout"`
ReadTimeout time.Duration `yaml:"read_timeout"`
WriteTimeout time.Duration `yaml:"write_timeout"`
IdleTimeout time.Duration `yaml:"idle_timeout"`
}
// MonitoringConfig defines monitoring settings
type MonitoringConfig struct {
Enabled bool `yaml:"enabled"`
MetricsInterval time.Duration `yaml:"metrics_interval"`
LogSlowRequests bool `yaml:"log_slow_requests"`
SlowRequestThreshold time.Duration `yaml:"slow_request_threshold"`
TrackProviderPerformance bool `yaml:"track_provider_performance"`
}
// Provider represents an active RPC provider connection
type Provider struct {
Config ProviderConfig
HTTPClient *ethclient.Client
WSClient *ethclient.Client
RateLimiter *rate.Limiter
HTTPConn *rpc.Client
WSConn *rpc.Client
IsHealthy bool
LastHealthCheck time.Time
RequestCount int64
ErrorCount int64
AvgResponseTime time.Duration
mutex sync.RWMutex
}
// ProviderManager manages multiple RPC providers with rotation and failover
type ProviderManager struct {
providers []*Provider
config ProvidersConfig
currentProvider int
mutex sync.RWMutex
healthTicker *time.Ticker
metricsTicker *time.Ticker
stopChan chan struct{}
}
// NewProviderManager creates a new provider manager from configuration
func NewProviderManager(configPath string) (*ProviderManager, error) {
// Load configuration
config, err := LoadProvidersConfig(configPath)
if err != nil {
return nil, fmt.Errorf("failed to load provider config: %w", err)
}
pm := &ProviderManager{
config: config,
stopChan: make(chan struct{}),
}
// Initialize providers
if err := pm.initializeProviders(); err != nil {
return nil, fmt.Errorf("failed to initialize providers: %w", err)
}
// Start health checks and metrics collection
pm.startBackgroundTasks()
return pm, nil
}
// LoadProvidersConfig loads provider configuration from YAML file
func LoadProvidersConfig(path string) (ProvidersConfig, error) {
var config ProvidersConfig
// Read the YAML file
data, err := os.ReadFile(path)
if err != nil {
return config, fmt.Errorf("failed to read config file %s: %w", path, err)
}
// Unmarshal the YAML data
expanded := os.ExpandEnv(string(data))
if strings.Contains(expanded, "${") {
return config, fmt.Errorf("unresolved environment variables found in provider config %s", path)
}
if err := yaml.Unmarshal([]byte(expanded), &config); err != nil {
return config, fmt.Errorf("failed to parse YAML config: %w", err)
}
// Validate the configuration
if err := validateConfig(&config); err != nil {
return config, fmt.Errorf("invalid configuration: %w", err)
}
return config, nil
}
// validateConfig validates the provider configuration
func validateConfig(config *ProvidersConfig) error {
if len(config.Providers) == 0 {
return fmt.Errorf("no providers configured")
}
for i, provider := range config.Providers {
if provider.Name == "" {
return fmt.Errorf("provider %d has no name", i)
}
if provider.HTTPEndpoint == "" && provider.WSEndpoint == "" {
return fmt.Errorf("provider %s has no endpoints", provider.Name)
}
if provider.RateLimit.RequestsPerSecond <= 0 {
return fmt.Errorf("provider %s has invalid rate limit", provider.Name)
}
}
return nil
}
// initializeProviders sets up all configured providers
func (pm *ProviderManager) initializeProviders() error {
pm.providers = make([]*Provider, 0, len(pm.config.Providers))
for _, providerConfig := range pm.config.Providers {
provider, err := createProvider(providerConfig)
if err != nil {
// Log error but continue with other providers
continue
}
pm.providers = append(pm.providers, provider)
}
if len(pm.providers) == 0 {
return fmt.Errorf("no providers successfully initialized")
}
return nil
}
// createProvider creates a new provider instance (shared utility function)
func createProvider(config ProviderConfig) (*Provider, error) {
// Create rate limiter
rateLimiter := rate.NewLimiter(
rate.Limit(config.RateLimit.RequestsPerSecond),
config.RateLimit.Burst,
)
provider := &Provider{
Config: config,
RateLimiter: rateLimiter,
IsHealthy: true, // Assume healthy until proven otherwise
}
// Initialize HTTP connection
if config.HTTPEndpoint != "" {
httpClient := &http.Client{
Timeout: config.RateLimit.Timeout, // Use config timeout
}
rpcClient, err := rpc.DialHTTPWithClient(config.HTTPEndpoint, httpClient)
if err != nil {
return nil, fmt.Errorf("failed to connect to HTTP endpoint %s: %w", config.HTTPEndpoint, err)
}
provider.HTTPConn = rpcClient
provider.HTTPClient = ethclient.NewClient(rpcClient)
}
// Initialize WebSocket connection
if config.WSEndpoint != "" {
wsClient, err := rpc.DialWebsocket(context.Background(), config.WSEndpoint, "")
if err != nil {
// Don't fail if WS connection fails, HTTP might still work
fmt.Printf("Warning: failed to connect to WebSocket endpoint %s: %v\n", config.WSEndpoint, err)
} else {
provider.WSConn = wsClient
provider.WSClient = ethclient.NewClient(wsClient)
}
}
return provider, nil
}
// GetHealthyProvider returns the next healthy provider based on rotation strategy
func (pm *ProviderManager) GetHealthyProvider() (*Provider, error) {
pm.mutex.RLock()
defer pm.mutex.RUnlock()
if len(pm.providers) == 0 {
return nil, fmt.Errorf("no providers available")
}
switch pm.config.Rotation.Strategy {
case "round_robin":
return pm.getNextRoundRobin()
case "weighted":
return pm.getWeightedProvider()
case "priority_based":
return pm.getPriorityProvider()
default:
return pm.getNextRoundRobin()
}
}
// getNextRoundRobin implements round-robin provider selection
func (pm *ProviderManager) getNextRoundRobin() (*Provider, error) {
startIndex := pm.currentProvider
for i := 0; i < len(pm.providers); i++ {
index := (startIndex + i) % len(pm.providers)
provider := pm.providers[index]
if pm.isProviderUsable(provider) {
pm.currentProvider = (index + 1) % len(pm.providers)
return provider, nil
}
}
return nil, fmt.Errorf("no healthy providers available")
}
// getPriorityProvider returns the highest priority healthy provider
func (pm *ProviderManager) getPriorityProvider() (*Provider, error) {
var bestProvider *Provider
highestPriority := int(^uint(0) >> 1) // Max int
for _, provider := range pm.providers {
if pm.isProviderUsable(provider) && provider.Config.Priority < highestPriority {
bestProvider = provider
highestPriority = provider.Config.Priority
}
}
if bestProvider == nil {
return nil, fmt.Errorf("no healthy providers available")
}
return bestProvider, nil
}
// getWeightedProvider implements weighted provider selection based on performance
func (pm *ProviderManager) getWeightedProvider() (*Provider, error) {
// For now, fallback to priority-based selection
// In a full implementation, this would consider response times and success rates
return pm.getPriorityProvider()
}
// isProviderUsable checks if a provider is healthy and within rate limits
func (pm *ProviderManager) isProviderUsable(provider *Provider) bool {
provider.mutex.RLock()
defer provider.mutex.RUnlock()
// Check health status
if pm.config.Rotation.HealthCheckRequired && !provider.IsHealthy {
return false
}
// Check rate limit
if !provider.RateLimiter.Allow() {
return false
}
return true
}
// GetHTTPClient returns an HTTP client for the current provider
func (pm *ProviderManager) GetHTTPClient() (*ethclient.Client, error) {
provider, err := pm.GetHealthyProvider()
if err != nil {
return nil, err
}
if provider.HTTPClient == nil {
return nil, fmt.Errorf("provider %s has no HTTP client", provider.Config.Name)
}
return provider.HTTPClient, nil
}
// GetWSClient returns a WebSocket client for the current provider
func (pm *ProviderManager) GetWSClient() (*ethclient.Client, error) {
provider, err := pm.GetHealthyProvider()
if err != nil {
return nil, err
}
if provider.WSClient == nil {
return nil, fmt.Errorf("provider %s has no WebSocket client", provider.Config.Name)
}
return provider.WSClient, nil
}
// GetRPCClient returns a raw RPC client for advanced operations
func (pm *ProviderManager) GetRPCClient(preferWS bool) (*rpc.Client, error) {
provider, err := pm.GetHealthyProvider()
if err != nil {
return nil, err
}
if preferWS && provider.WSConn != nil {
return provider.WSConn, nil
}
if provider.HTTPConn != nil {
return provider.HTTPConn, nil
}
return nil, fmt.Errorf("provider %s has no available RPC client", provider.Config.Name)
}
// startBackgroundTasks starts health checking and metrics collection
func (pm *ProviderManager) startBackgroundTasks() {
// Start health checks
if pm.config.Monitoring.Enabled {
pm.healthTicker = time.NewTicker(time.Minute) // Default 1 minute
go pm.healthCheckLoop()
pm.metricsTicker = time.NewTicker(pm.config.Monitoring.MetricsInterval)
go pm.metricsLoop()
}
}
// healthCheckLoop periodically checks provider health
func (pm *ProviderManager) healthCheckLoop() {
for {
select {
case <-pm.healthTicker.C:
pm.performHealthChecks()
case <-pm.stopChan:
return
}
}
}
// metricsLoop periodically collects provider metrics
func (pm *ProviderManager) metricsLoop() {
for {
select {
case <-pm.metricsTicker.C:
pm.collectMetrics()
case <-pm.stopChan:
return
}
}
}
// performHealthChecks checks all providers' health
func (pm *ProviderManager) performHealthChecks() {
for _, provider := range pm.providers {
go pm.checkProviderHealth(provider)
}
}
// checkProviderHealth performs a health check on a single provider
func (pm *ProviderManager) checkProviderHealth(provider *Provider) {
pm.performProviderHealthCheck(provider, func(ctx context.Context, provider *Provider) error {
// Try to get latest block number as health check
if provider.HTTPClient != nil {
_, err := provider.HTTPClient.BlockNumber(ctx)
return err
} else if provider.WSClient != nil {
_, err := provider.WSClient.BlockNumber(ctx)
return err
}
return fmt.Errorf("no client available for health check")
})
}
// RACE CONDITION FIX: performProviderHealthCheck executes health check with proper synchronization
func (pm *ProviderManager) performProviderHealthCheck(provider *Provider, healthChecker func(context.Context, *Provider) error) {
ctx, cancel := context.WithTimeout(context.Background(), provider.Config.HealthCheck.Timeout)
defer cancel()
start := time.Now()
err := healthChecker(ctx, provider)
duration := time.Since(start)
// RACE CONDITION FIX: Use atomic operations for counters
atomic.AddInt64(&provider.RequestCount, 1)
provider.mutex.Lock()
defer provider.mutex.Unlock()
provider.LastHealthCheck = time.Now()
if err != nil {
// RACE CONDITION FIX: Use atomic operation for error count
atomic.AddInt64(&provider.ErrorCount, 1)
provider.IsHealthy = false
} else {
provider.IsHealthy = true
}
// Update average response time
// Simple moving average calculation
if provider.AvgResponseTime == 0 {
provider.AvgResponseTime = duration
} else {
// Weight new measurement at 20% to smooth out spikes
provider.AvgResponseTime = time.Duration(
float64(provider.AvgResponseTime)*0.8 + float64(duration)*0.2,
)
}
}
// RACE CONDITION FIX: IncrementRequestCount safely increments request counter
func (p *Provider) IncrementRequestCount() {
atomic.AddInt64(&p.RequestCount, 1)
}
// RACE CONDITION FIX: IncrementErrorCount safely increments error counter
func (p *Provider) IncrementErrorCount() {
atomic.AddInt64(&p.ErrorCount, 1)
}
// RACE CONDITION FIX: GetRequestCount safely gets request count
func (p *Provider) GetRequestCount() int64 {
return atomic.LoadInt64(&p.RequestCount)
}
// RACE CONDITION FIX: GetErrorCount safely gets error count
func (p *Provider) GetErrorCount() int64 {
return atomic.LoadInt64(&p.ErrorCount)
}
// collectMetrics collects performance metrics
func (pm *ProviderManager) collectMetrics() {
// Implementation would collect and report metrics
// For now, just log basic stats
}
// Close shuts down the provider manager
func (pm *ProviderManager) Close() error {
close(pm.stopChan)
if pm.healthTicker != nil {
pm.healthTicker.Stop()
}
if pm.metricsTicker != nil {
pm.metricsTicker.Stop()
}
// Close all connections
for _, provider := range pm.providers {
if provider.HTTPConn != nil {
provider.HTTPConn.Close()
}
if provider.WSConn != nil {
provider.WSConn.Close()
}
}
return nil
}
// GetProviderStats returns current provider statistics
func (pm *ProviderManager) GetProviderStats() map[string]interface{} {
pm.mutex.RLock()
defer pm.mutex.RUnlock()
stats := make(map[string]interface{})
for _, provider := range pm.providers {
provider.mutex.RLock()
providerStats := map[string]interface{}{
"name": provider.Config.Name,
"healthy": provider.IsHealthy,
"last_health_check": provider.LastHealthCheck,
"request_count": provider.GetRequestCount(), // RACE CONDITION FIX: Use atomic getter
"error_count": provider.GetErrorCount(), // RACE CONDITION FIX: Use atomic getter
"avg_response_time": provider.AvgResponseTime,
}
provider.mutex.RUnlock()
stats[provider.Config.Name] = providerStats
}
return stats
}