fix: resolve all compilation issues across transport and lifecycle packages
- Fixed duplicate type declarations in transport package - Removed unused variables in lifecycle and dependency injection - Fixed big.Int arithmetic operations in uniswap contracts - Added missing methods to MetricsCollector (IncrementCounter, RecordLatency, etc.) - Fixed jitter calculation in TCP transport retry logic - Updated ComponentHealth field access to use transport type - Ensured all core packages build successfully All major compilation errors resolved: ✅ Transport package builds clean ✅ Lifecycle package builds clean ✅ Main MEV bot application builds clean ✅ Fixed method signature mismatches ✅ Resolved type conflicts and duplications 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
612
pkg/transport/failover.go
Normal file
612
pkg/transport/failover.go
Normal file
@@ -0,0 +1,612 @@
|
||||
package transport
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FailoverManager handles transport failover and redundancy
|
||||
type FailoverManager struct {
|
||||
transports map[string]*ManagedTransport
|
||||
primaryTransport string
|
||||
backupTransports []string
|
||||
failoverPolicy FailoverPolicy
|
||||
healthChecker HealthChecker
|
||||
circuitBreaker *CircuitBreaker
|
||||
mu sync.RWMutex
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
metrics FailoverMetrics
|
||||
notifications chan FailoverEvent
|
||||
}
|
||||
|
||||
// ManagedTransport wraps a transport with management metadata
|
||||
type ManagedTransport struct {
|
||||
Transport Transport
|
||||
ID string
|
||||
Name string
|
||||
Priority int
|
||||
Status TransportStatus
|
||||
LastHealthCheck time.Time
|
||||
FailureCount int
|
||||
LastFailure time.Time
|
||||
Config TransportConfig
|
||||
Metrics TransportMetrics
|
||||
}
|
||||
|
||||
// TransportStatus represents the current status of a transport
|
||||
type TransportStatus string
|
||||
|
||||
const (
|
||||
StatusHealthy TransportStatus = "healthy"
|
||||
StatusDegraded TransportStatus = "degraded"
|
||||
StatusUnhealthy TransportStatus = "unhealthy"
|
||||
StatusDisabled TransportStatus = "disabled"
|
||||
)
|
||||
|
||||
// FailoverPolicy defines when and how to failover
|
||||
type FailoverPolicy struct {
|
||||
FailureThreshold int // Number of failures before marking unhealthy
|
||||
HealthCheckInterval time.Duration // How often to check health
|
||||
FailoverTimeout time.Duration // Timeout for failover operations
|
||||
RetryInterval time.Duration // Interval between retry attempts
|
||||
MaxRetries int // Maximum retry attempts
|
||||
AutoFailback bool // Whether to automatically failback to primary
|
||||
FailbackDelay time.Duration // Delay before attempting failback
|
||||
RequireAllHealthy bool // Whether all transports must be healthy
|
||||
}
|
||||
|
||||
// FailoverMetrics tracks failover statistics
|
||||
type FailoverMetrics struct {
|
||||
TotalFailovers int64 `json:"total_failovers"`
|
||||
TotalFailbacks int64 `json:"total_failbacks"`
|
||||
CurrentTransport string `json:"current_transport"`
|
||||
LastFailover time.Time `json:"last_failover"`
|
||||
LastFailback time.Time `json:"last_failback"`
|
||||
FailoverDuration time.Duration `json:"failover_duration"`
|
||||
FailoverSuccessRate float64 `json:"failover_success_rate"`
|
||||
HealthCheckFailures int64 `json:"health_check_failures"`
|
||||
CircuitBreakerTrips int64 `json:"circuit_breaker_trips"`
|
||||
}
|
||||
|
||||
// FailoverEvent represents a failover-related event
|
||||
type FailoverEvent struct {
|
||||
Type FailoverEventType `json:"type"`
|
||||
FromTransport string `json:"from_transport"`
|
||||
ToTransport string `json:"to_transport"`
|
||||
Reason string `json:"reason"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Success bool `json:"success"`
|
||||
Duration time.Duration `json:"duration"`
|
||||
}
|
||||
|
||||
// FailoverEventType defines types of failover events
|
||||
type FailoverEventType string
|
||||
|
||||
const (
|
||||
EventFailover FailoverEventType = "failover"
|
||||
EventFailback FailoverEventType = "failback"
|
||||
EventHealthCheck FailoverEventType = "health_check"
|
||||
EventCircuitBreak FailoverEventType = "circuit_break"
|
||||
EventRecovery FailoverEventType = "recovery"
|
||||
)
|
||||
|
||||
// HealthChecker interface for custom health checking logic
|
||||
type HealthChecker interface {
|
||||
CheckHealth(ctx context.Context, transport Transport) (bool, error)
|
||||
GetHealthScore(transport Transport) float64
|
||||
}
|
||||
|
||||
// NewFailoverManager creates a new failover manager
|
||||
func NewFailoverManager(policy FailoverPolicy) *FailoverManager {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
fm := &FailoverManager{
|
||||
transports: make(map[string]*ManagedTransport),
|
||||
failoverPolicy: policy,
|
||||
healthChecker: NewDefaultHealthChecker(),
|
||||
circuitBreaker: NewCircuitBreaker(CircuitBreakerConfig{
|
||||
FailureThreshold: policy.FailureThreshold,
|
||||
RecoveryTimeout: policy.RetryInterval,
|
||||
MaxRetries: policy.MaxRetries,
|
||||
}),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
notifications: make(chan FailoverEvent, 100),
|
||||
}
|
||||
|
||||
// Start background routines
|
||||
go fm.healthCheckLoop()
|
||||
go fm.failoverMonitorLoop()
|
||||
|
||||
return fm
|
||||
}
|
||||
|
||||
// RegisterTransport adds a transport to the failover manager
|
||||
func (fm *FailoverManager) RegisterTransport(id, name string, transport Transport, priority int, config TransportConfig) error {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
managedTransport := &ManagedTransport{
|
||||
Transport: transport,
|
||||
ID: id,
|
||||
Name: name,
|
||||
Priority: priority,
|
||||
Status: StatusHealthy,
|
||||
LastHealthCheck: time.Now(),
|
||||
Config: config,
|
||||
}
|
||||
|
||||
fm.transports[id] = managedTransport
|
||||
|
||||
// Set as primary if it's the first or highest priority transport
|
||||
if fm.primaryTransport == "" || priority > fm.transports[fm.primaryTransport].Priority {
|
||||
fm.primaryTransport = id
|
||||
} else {
|
||||
fm.backupTransports = append(fm.backupTransports, id)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnregisterTransport removes a transport from the failover manager
|
||||
func (fm *FailoverManager) UnregisterTransport(id string) error {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
if _, exists := fm.transports[id]; !exists {
|
||||
return fmt.Errorf("transport not found: %s", id)
|
||||
}
|
||||
|
||||
delete(fm.transports, id)
|
||||
|
||||
// Update primary if needed
|
||||
if fm.primaryTransport == id {
|
||||
fm.selectNewPrimary()
|
||||
}
|
||||
|
||||
// Remove from backups
|
||||
for i, backupID := range fm.backupTransports {
|
||||
if backupID == id {
|
||||
fm.backupTransports = append(fm.backupTransports[:i], fm.backupTransports[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetActiveTransport returns the currently active transport
|
||||
func (fm *FailoverManager) GetActiveTransport() (Transport, error) {
|
||||
fm.mu.RLock()
|
||||
defer fm.mu.RUnlock()
|
||||
|
||||
if fm.primaryTransport == "" {
|
||||
return nil, fmt.Errorf("no active transport available")
|
||||
}
|
||||
|
||||
transport, exists := fm.transports[fm.primaryTransport]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("primary transport not found: %s", fm.primaryTransport)
|
||||
}
|
||||
|
||||
if transport.Status == StatusHealthy || transport.Status == StatusDegraded {
|
||||
return transport.Transport, nil
|
||||
}
|
||||
|
||||
// Try to failover to a backup
|
||||
if err := fm.performFailover(); err != nil {
|
||||
return nil, fmt.Errorf("failover failed: %w", err)
|
||||
}
|
||||
|
||||
// Return new primary after failover
|
||||
newPrimary := fm.transports[fm.primaryTransport]
|
||||
return newPrimary.Transport, nil
|
||||
}
|
||||
|
||||
// Send sends a message through the active transport with automatic failover
|
||||
func (fm *FailoverManager) Send(ctx context.Context, msg *Message) error {
|
||||
transport, err := fm.GetActiveTransport()
|
||||
if err != nil {
|
||||
return fmt.Errorf("no available transport: %w", err)
|
||||
}
|
||||
|
||||
// Try to send through circuit breaker
|
||||
return fm.circuitBreaker.Execute(func() error {
|
||||
return transport.Send(ctx, msg)
|
||||
})
|
||||
}
|
||||
|
||||
// Receive receives messages from the active transport
|
||||
func (fm *FailoverManager) Receive(ctx context.Context) (<-chan *Message, error) {
|
||||
transport, err := fm.GetActiveTransport()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("no available transport: %w", err)
|
||||
}
|
||||
|
||||
return transport.Receive(ctx)
|
||||
}
|
||||
|
||||
// ForceFailover manually triggers a failover to a specific transport
|
||||
func (fm *FailoverManager) ForceFailover(targetTransportID string) error {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
target, exists := fm.transports[targetTransportID]
|
||||
if !exists {
|
||||
return fmt.Errorf("target transport not found: %s", targetTransportID)
|
||||
}
|
||||
|
||||
if target.Status != StatusHealthy && target.Status != StatusDegraded {
|
||||
return fmt.Errorf("target transport is not healthy: %s", target.Status)
|
||||
}
|
||||
|
||||
return fm.switchPrimary(targetTransportID, "manual failover")
|
||||
}
|
||||
|
||||
// GetTransportStatus returns the status of all transports
|
||||
func (fm *FailoverManager) GetTransportStatus() map[string]TransportStatus {
|
||||
fm.mu.RLock()
|
||||
defer fm.mu.RUnlock()
|
||||
|
||||
status := make(map[string]TransportStatus)
|
||||
for id, transport := range fm.transports {
|
||||
status[id] = transport.Status
|
||||
}
|
||||
return status
|
||||
}
|
||||
|
||||
// GetMetrics returns failover metrics
|
||||
func (fm *FailoverManager) GetMetrics() FailoverMetrics {
|
||||
fm.mu.RLock()
|
||||
defer fm.mu.RUnlock()
|
||||
return fm.metrics
|
||||
}
|
||||
|
||||
// GetNotifications returns a channel for failover events
|
||||
func (fm *FailoverManager) GetNotifications() <-chan FailoverEvent {
|
||||
return fm.notifications
|
||||
}
|
||||
|
||||
// SetHealthChecker sets a custom health checker
|
||||
func (fm *FailoverManager) SetHealthChecker(checker HealthChecker) {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
fm.healthChecker = checker
|
||||
}
|
||||
|
||||
// Stop gracefully stops the failover manager
|
||||
func (fm *FailoverManager) Stop() error {
|
||||
fm.cancel()
|
||||
close(fm.notifications)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Private methods
|
||||
|
||||
func (fm *FailoverManager) healthCheckLoop() {
|
||||
ticker := time.NewTicker(fm.failoverPolicy.HealthCheckInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-fm.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
fm.performHealthChecks()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) failoverMonitorLoop() {
|
||||
for {
|
||||
select {
|
||||
case <-fm.ctx.Done():
|
||||
return
|
||||
default:
|
||||
if fm.shouldPerformFailover() {
|
||||
if err := fm.performFailover(); err != nil {
|
||||
fm.metrics.HealthCheckFailures++
|
||||
}
|
||||
}
|
||||
|
||||
if fm.shouldPerformFailback() {
|
||||
if err := fm.performFailback(); err != nil {
|
||||
fm.metrics.HealthCheckFailures++
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(time.Second) // Check every second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) performHealthChecks() {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
for id, transport := range fm.transports {
|
||||
healthy, err := fm.healthChecker.CheckHealth(fm.ctx, transport.Transport)
|
||||
transport.LastHealthCheck = time.Now()
|
||||
|
||||
previousStatus := transport.Status
|
||||
|
||||
if err != nil || !healthy {
|
||||
transport.FailureCount++
|
||||
transport.LastFailure = time.Now()
|
||||
|
||||
if transport.FailureCount >= fm.failoverPolicy.FailureThreshold {
|
||||
transport.Status = StatusUnhealthy
|
||||
} else {
|
||||
transport.Status = StatusDegraded
|
||||
}
|
||||
} else {
|
||||
// Reset failure count on successful health check
|
||||
transport.FailureCount = 0
|
||||
transport.Status = StatusHealthy
|
||||
}
|
||||
|
||||
// Notify status change
|
||||
if previousStatus != transport.Status {
|
||||
fm.notifyEvent(FailoverEvent{
|
||||
Type: EventHealthCheck,
|
||||
ToTransport: id,
|
||||
Reason: fmt.Sprintf("status changed from %s to %s", previousStatus, transport.Status),
|
||||
Timestamp: time.Now(),
|
||||
Success: transport.Status == StatusHealthy,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) shouldPerformFailover() bool {
|
||||
fm.mu.RLock()
|
||||
defer fm.mu.RUnlock()
|
||||
|
||||
if fm.primaryTransport == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
primary := fm.transports[fm.primaryTransport]
|
||||
return primary.Status == StatusUnhealthy
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) shouldPerformFailback() bool {
|
||||
if !fm.failoverPolicy.AutoFailback {
|
||||
return false
|
||||
}
|
||||
|
||||
fm.mu.RLock()
|
||||
defer fm.mu.RUnlock()
|
||||
|
||||
// Find the highest priority healthy transport
|
||||
var highestPriority int
|
||||
var highestPriorityID string
|
||||
|
||||
for id, transport := range fm.transports {
|
||||
if transport.Status == StatusHealthy && transport.Priority > highestPriority {
|
||||
highestPriority = transport.Priority
|
||||
highestPriorityID = id
|
||||
}
|
||||
}
|
||||
|
||||
// Failback if there's a higher priority transport available
|
||||
return highestPriorityID != "" && highestPriorityID != fm.primaryTransport
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) performFailover() error {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
// Find the best backup transport
|
||||
var bestBackup string
|
||||
var bestPriority int
|
||||
|
||||
for _, backupID := range fm.backupTransports {
|
||||
backup := fm.transports[backupID]
|
||||
if (backup.Status == StatusHealthy || backup.Status == StatusDegraded) && backup.Priority > bestPriority {
|
||||
bestBackup = backupID
|
||||
bestPriority = backup.Priority
|
||||
}
|
||||
}
|
||||
|
||||
if bestBackup == "" {
|
||||
return fmt.Errorf("no healthy backup transport available")
|
||||
}
|
||||
|
||||
return fm.switchPrimary(bestBackup, "automatic failover")
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) performFailback() error {
|
||||
fm.mu.Lock()
|
||||
defer fm.mu.Unlock()
|
||||
|
||||
// Find the highest priority healthy transport
|
||||
var highestPriority int
|
||||
var highestPriorityID string
|
||||
|
||||
for id, transport := range fm.transports {
|
||||
if transport.Status == StatusHealthy && transport.Priority > highestPriority {
|
||||
highestPriority = transport.Priority
|
||||
highestPriorityID = id
|
||||
}
|
||||
}
|
||||
|
||||
if highestPriorityID == "" || highestPriorityID == fm.primaryTransport {
|
||||
return nil // No failback needed
|
||||
}
|
||||
|
||||
// Wait for failback delay
|
||||
if time.Since(fm.metrics.LastFailover) < fm.failoverPolicy.FailbackDelay {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fm.switchPrimary(highestPriorityID, "automatic failback")
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) switchPrimary(newPrimaryID, reason string) error {
|
||||
start := time.Now()
|
||||
oldPrimary := fm.primaryTransport
|
||||
|
||||
// Update primary and backup lists
|
||||
fm.primaryTransport = newPrimaryID
|
||||
|
||||
// Rebuild backup list
|
||||
fm.backupTransports = make([]string, 0)
|
||||
for id := range fm.transports {
|
||||
if id != newPrimaryID {
|
||||
fm.backupTransports = append(fm.backupTransports, id)
|
||||
}
|
||||
}
|
||||
|
||||
// Update metrics
|
||||
duration := time.Since(start)
|
||||
if oldPrimary != newPrimaryID {
|
||||
if reason == "automatic failback" {
|
||||
fm.metrics.TotalFailbacks++
|
||||
fm.metrics.LastFailback = time.Now()
|
||||
} else {
|
||||
fm.metrics.TotalFailovers++
|
||||
fm.metrics.LastFailover = time.Now()
|
||||
}
|
||||
fm.metrics.FailoverDuration = duration
|
||||
fm.metrics.CurrentTransport = newPrimaryID
|
||||
}
|
||||
|
||||
// Notify
|
||||
eventType := EventFailover
|
||||
if reason == "automatic failback" {
|
||||
eventType = EventFailback
|
||||
}
|
||||
|
||||
fm.notifyEvent(FailoverEvent{
|
||||
Type: eventType,
|
||||
FromTransport: oldPrimary,
|
||||
ToTransport: newPrimaryID,
|
||||
Reason: reason,
|
||||
Timestamp: time.Now(),
|
||||
Success: true,
|
||||
Duration: duration,
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) selectNewPrimary() {
|
||||
var bestID string
|
||||
var bestPriority int
|
||||
|
||||
for id, transport := range fm.transports {
|
||||
if transport.Status == StatusHealthy && transport.Priority > bestPriority {
|
||||
bestID = id
|
||||
bestPriority = transport.Priority
|
||||
}
|
||||
}
|
||||
|
||||
fm.primaryTransport = bestID
|
||||
}
|
||||
|
||||
func (fm *FailoverManager) notifyEvent(event FailoverEvent) {
|
||||
select {
|
||||
case fm.notifications <- event:
|
||||
default:
|
||||
// Channel full, drop event
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultHealthChecker implements basic health checking
|
||||
type DefaultHealthChecker struct{}
|
||||
|
||||
func NewDefaultHealthChecker() *DefaultHealthChecker {
|
||||
return &DefaultHealthChecker{}
|
||||
}
|
||||
|
||||
func (dhc *DefaultHealthChecker) CheckHealth(ctx context.Context, transport Transport) (bool, error) {
|
||||
health := transport.Health()
|
||||
return health.Status == "healthy", nil
|
||||
}
|
||||
|
||||
func (dhc *DefaultHealthChecker) GetHealthScore(transport Transport) float64 {
|
||||
health := transport.Health()
|
||||
switch health.Status {
|
||||
case "healthy":
|
||||
return 1.0
|
||||
case "degraded":
|
||||
return 0.5
|
||||
default:
|
||||
return 0.0
|
||||
}
|
||||
}
|
||||
|
||||
// CircuitBreaker implements circuit breaker pattern for transport operations
|
||||
type CircuitBreaker struct {
|
||||
config CircuitBreakerConfig
|
||||
state CircuitBreakerState
|
||||
failureCount int
|
||||
lastFailure time.Time
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
type CircuitBreakerConfig struct {
|
||||
FailureThreshold int
|
||||
RecoveryTimeout time.Duration
|
||||
MaxRetries int
|
||||
}
|
||||
|
||||
type CircuitBreakerState string
|
||||
|
||||
const (
|
||||
StateClosed CircuitBreakerState = "closed"
|
||||
StateOpen CircuitBreakerState = "open"
|
||||
StateHalfOpen CircuitBreakerState = "half_open"
|
||||
)
|
||||
|
||||
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker {
|
||||
return &CircuitBreaker{
|
||||
config: config,
|
||||
state: StateClosed,
|
||||
}
|
||||
}
|
||||
|
||||
func (cb *CircuitBreaker) Execute(operation func() error) error {
|
||||
cb.mu.Lock()
|
||||
defer cb.mu.Unlock()
|
||||
|
||||
if cb.state == StateOpen {
|
||||
if time.Since(cb.lastFailure) < cb.config.RecoveryTimeout {
|
||||
return fmt.Errorf("circuit breaker is open")
|
||||
}
|
||||
cb.state = StateHalfOpen
|
||||
}
|
||||
|
||||
err := operation()
|
||||
if err != nil {
|
||||
cb.onFailure()
|
||||
return err
|
||||
}
|
||||
|
||||
cb.onSuccess()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cb *CircuitBreaker) onFailure() {
|
||||
cb.failureCount++
|
||||
cb.lastFailure = time.Now()
|
||||
|
||||
if cb.failureCount >= cb.config.FailureThreshold {
|
||||
cb.state = StateOpen
|
||||
}
|
||||
}
|
||||
|
||||
func (cb *CircuitBreaker) onSuccess() {
|
||||
cb.failureCount = 0
|
||||
cb.state = StateClosed
|
||||
}
|
||||
|
||||
func (cb *CircuitBreaker) GetState() CircuitBreakerState {
|
||||
cb.mu.Lock()
|
||||
defer cb.mu.Unlock()
|
||||
return cb.state
|
||||
}
|
||||
Reference in New Issue
Block a user