package recovery import ( "context" "errors" "fmt" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/fraktal/mev-beta/internal/logger" ) func TestRetryHandler_ExecuteWithRetry_Success(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) attempts := 0 operation := func(ctx context.Context, attempt int) error { attempts++ if attempts == 2 { return nil // Success on second attempt } return errors.New("temporary failure") } result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation) assert.True(t, result.Success) assert.Equal(t, 2, result.Attempts) assert.Nil(t, result.LastError) assert.Equal(t, 2, attempts) } func TestRetryHandler_ExecuteWithRetry_MaxAttemptsReached(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) attempts := 0 operation := func(ctx context.Context, attempt int) error { attempts++ return errors.New("persistent failure") } result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation) assert.False(t, result.Success) assert.Equal(t, 3, result.Attempts) // Default max attempts assert.NotNil(t, result.LastError) assert.Equal(t, "persistent failure", result.LastError.Error()) assert.Equal(t, 3, attempts) } func TestRetryHandler_ExecuteWithRetry_ContextCanceled(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) ctx, cancel := context.WithCancel(context.Background()) attempts := 0 operation := func(ctx context.Context, attempt int) error { attempts++ if attempts == 2 { cancel() // Cancel context on second attempt } return errors.New("failure") } result := handler.ExecuteWithRetry(ctx, "test_operation", operation) assert.False(t, result.Success) assert.LessOrEqual(t, result.Attempts, 3) assert.NotNil(t, result.LastError) } func TestRetryHandler_ExecuteWithRetry_CustomConfig(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) // Set custom configuration customConfig := RetryConfig{ MaxAttempts: 5, InitialDelay: 10 * time.Millisecond, MaxDelay: 100 * time.Millisecond, BackoffFactor: 2.0, JitterEnabled: false, TimeoutPerAttempt: 1 * time.Second, } handler.SetConfig("custom_operation", customConfig) attempts := 0 operation := func(ctx context.Context, attempt int) error { attempts++ return errors.New("persistent failure") } start := time.Now() result := handler.ExecuteWithRetry(context.Background(), "custom_operation", operation) duration := time.Since(start) assert.False(t, result.Success) assert.Equal(t, 5, result.Attempts) // Custom max attempts assert.Equal(t, 5, attempts) // Should have taken some time due to delays (at least 150ms for delays) expectedMinDuration := 10*time.Millisecond + 20*time.Millisecond + 40*time.Millisecond + 80*time.Millisecond assert.GreaterOrEqual(t, duration, expectedMinDuration) } func TestRetryHandler_ExecuteWithRetry_Disabled(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) handler.Disable() attempts := 0 operation := func(ctx context.Context, attempt int) error { attempts++ return errors.New("failure") } result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation) assert.False(t, result.Success) assert.Equal(t, 1, result.Attempts) // Only one attempt when disabled assert.Equal(t, 1, attempts) } func TestRetryHandler_CalculateDelay(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) config := RetryConfig{ InitialDelay: 100 * time.Millisecond, MaxDelay: 1 * time.Second, BackoffFactor: 2.0, JitterEnabled: false, } tests := []struct { attempt int expectedMin time.Duration expectedMax time.Duration }{ {1, 100 * time.Millisecond, 100 * time.Millisecond}, {2, 200 * time.Millisecond, 200 * time.Millisecond}, {3, 400 * time.Millisecond, 400 * time.Millisecond}, {4, 800 * time.Millisecond, 800 * time.Millisecond}, {5, 1 * time.Second, 1 * time.Second}, // Should be capped at MaxDelay } for _, tt := range tests { t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) { delay := handler.calculateDelay(config, tt.attempt) assert.GreaterOrEqual(t, delay, tt.expectedMin) assert.LessOrEqual(t, delay, tt.expectedMax) }) } } func TestRetryHandler_CalculateDelay_WithJitter(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) config := RetryConfig{ InitialDelay: 100 * time.Millisecond, MaxDelay: 1 * time.Second, BackoffFactor: 2.0, JitterEnabled: true, } // Test jitter variation delays := make([]time.Duration, 10) for i := 0; i < 10; i++ { delays[i] = handler.calculateDelay(config, 2) // 200ms base } // Should have some variation due to jitter allSame := true for i := 1; i < len(delays); i++ { if delays[i] != delays[0] { allSame = false break } } assert.False(t, allSame, "Jitter should cause variation in delays") // All delays should be reasonable (within 10% of base) baseDelay := 200 * time.Millisecond for _, delay := range delays { assert.GreaterOrEqual(t, delay, baseDelay*9/10) // 10% below assert.LessOrEqual(t, delay, baseDelay*11/10) // 10% above } } func TestRetryHandler_GetStats(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) // Execute some operations successOp := func(ctx context.Context, attempt int) error { return nil } failOp := func(ctx context.Context, attempt int) error { return errors.New("failure") } handler.ExecuteWithRetry(context.Background(), "test_success", successOp) handler.ExecuteWithRetry(context.Background(), "test_success", successOp) handler.ExecuteWithRetry(context.Background(), "test_fail", failOp) stats := handler.GetStats() // Check success stats successStats := stats["test_success"] require.NotNil(t, successStats) assert.Equal(t, 2, successStats.TotalAttempts) assert.Equal(t, 2, successStats.SuccessfulRetries) assert.Equal(t, 0, successStats.FailedRetries) // Check failure stats failStats := stats["test_fail"] require.NotNil(t, failStats) assert.Equal(t, 3, failStats.TotalAttempts) // Default max attempts assert.Equal(t, 0, failStats.SuccessfulRetries) assert.Equal(t, 1, failStats.FailedRetries) } func TestRetryHandler_GetHealthSummary(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) // Execute some operations to generate stats successOp := func(ctx context.Context, attempt int) error { return nil } partialFailOp := func(ctx context.Context, attempt int) error { if attempt < 2 { return errors.New("temporary failure") } return nil } // 2 immediate successes handler.ExecuteWithRetry(context.Background(), "immediate_success", successOp) handler.ExecuteWithRetry(context.Background(), "immediate_success", successOp) // 1 success after retry handler.ExecuteWithRetry(context.Background(), "retry_success", partialFailOp) summary := handler.GetHealthSummary() assert.True(t, summary["enabled"].(bool)) assert.Equal(t, 2, summary["total_operations"].(int)) assert.Equal(t, 2, summary["healthy_operations"].(int)) assert.Equal(t, 0, summary["unhealthy_operations"].(int)) // Check operation details details := summary["operation_details"].(map[string]interface{}) immediateDetails := details["immediate_success"].(map[string]interface{}) assert.Equal(t, 1.0, immediateDetails["success_rate"].(float64)) assert.Equal(t, 1.0, immediateDetails["average_attempts"].(float64)) assert.True(t, immediateDetails["is_healthy"].(bool)) retryDetails := details["retry_success"].(map[string]interface{}) assert.Equal(t, 1.0, retryDetails["success_rate"].(float64)) assert.Equal(t, 2.0, retryDetails["average_attempts"].(float64)) assert.True(t, retryDetails["is_healthy"].(bool)) // Still healthy despite retries } func TestRetryHandler_ConcurrentExecution(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) const numGoroutines = 50 const operationsPerGoroutine = 20 done := make(chan bool, numGoroutines) successCount := make(chan int, numGoroutines) operation := func(ctx context.Context, attempt int) error { // 80% success rate if attempt <= 1 && time.Now().UnixNano()%5 != 0 { return nil } if attempt == 2 { return nil // Always succeed on second attempt } return errors.New("failure") } // Launch concurrent retry operations for i := 0; i < numGoroutines; i++ { go func(id int) { defer func() { done <- true }() successes := 0 for j := 0; j < operationsPerGoroutine; j++ { result := handler.ExecuteWithRetry(context.Background(), fmt.Sprintf("concurrent_op_%d", id), operation) if result.Success { successes++ } } successCount <- successes }(i) } // Collect results totalSuccesses := 0 for i := 0; i < numGoroutines; i++ { select { case <-done: totalSuccesses += <-successCount case <-time.After(30 * time.Second): t.Fatal("Concurrent retry test timed out") } } totalOperations := numGoroutines * operationsPerGoroutine successRate := float64(totalSuccesses) / float64(totalOperations) t.Logf("Concurrent execution: %d/%d operations succeeded (%.2f%%)", totalSuccesses, totalOperations, successRate*100) // Should have high success rate due to retries assert.GreaterOrEqual(t, successRate, 0.8, "Success rate should be at least 80%") // Verify stats are consistent stats := handler.GetStats() assert.NotEmpty(t, stats, "Should have recorded stats") } func TestRetryHandler_EdgeCases(t *testing.T) { log := logger.New("debug", "text", "") handler := NewRetryHandler(log) t.Run("nil operation", func(t *testing.T) { assert.Panics(t, func() { handler.ExecuteWithRetry(context.Background(), "nil_op", nil) }) }) t.Run("empty operation type", func(t *testing.T) { operation := func(ctx context.Context, attempt int) error { return nil } result := handler.ExecuteWithRetry(context.Background(), "", operation) assert.True(t, result.Success) }) t.Run("very long operation type", func(t *testing.T) { longName := string(make([]byte, 1000)) operation := func(ctx context.Context, attempt int) error { return nil } result := handler.ExecuteWithRetry(context.Background(), longName, operation) assert.True(t, result.Success) }) }