mev-beta/orig/internal/recovery/retry_handler_test.go

package recovery

import (
	"context"
	"errors"
	"fmt"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"github.com/fraktal/mev-beta/internal/logger"
)

func TestRetryHandler_ExecuteWithRetry_Success(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	attempts := 0
	operation := func(ctx context.Context, attempt int) error {
		attempts++
		if attempts == 2 {
			return nil // Success on second attempt
		}
		return errors.New("temporary failure")
	}

	result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation)

	assert.True(t, result.Success)
	assert.Equal(t, 2, result.Attempts)
	assert.Nil(t, result.LastError)
	assert.Equal(t, 2, attempts)
}

func TestRetryHandler_ExecuteWithRetry_MaxAttemptsReached(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	attempts := 0
	operation := func(ctx context.Context, attempt int) error {
		attempts++
		return errors.New("persistent failure")
	}

	result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation)

	assert.False(t, result.Success)
	assert.Equal(t, 3, result.Attempts) // Default max attempts
	assert.NotNil(t, result.LastError)
	assert.Equal(t, "persistent failure", result.LastError.Error())
	assert.Equal(t, 3, attempts)
}

func TestRetryHandler_ExecuteWithRetry_ContextCanceled(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	ctx, cancel := context.WithCancel(context.Background())

	attempts := 0
	operation := func(ctx context.Context, attempt int) error {
		attempts++
		if attempts == 2 {
			cancel() // Cancel context on second attempt
		}
		return errors.New("failure")
	}

	result := handler.ExecuteWithRetry(ctx, "test_operation", operation)

	assert.False(t, result.Success)
	assert.LessOrEqual(t, result.Attempts, 3)
	assert.NotNil(t, result.LastError)
}

func TestRetryHandler_ExecuteWithRetry_CustomConfig(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	// Set custom configuration
	customConfig := RetryConfig{
		MaxAttempts:       5,
		InitialDelay:      10 * time.Millisecond,
		MaxDelay:          100 * time.Millisecond,
		BackoffFactor:     2.0,
		JitterEnabled:     false,
		TimeoutPerAttempt: 1 * time.Second,
	}
	handler.SetConfig("custom_operation", customConfig)

	attempts := 0
	operation := func(ctx context.Context, attempt int) error {
		attempts++
		return errors.New("persistent failure")
	}

	start := time.Now()
	result := handler.ExecuteWithRetry(context.Background(), "custom_operation", operation)
	duration := time.Since(start)

	assert.False(t, result.Success)
	assert.Equal(t, 5, result.Attempts) // Custom max attempts
	assert.Equal(t, 5, attempts)

	// Should have taken some time due to delays (at least 150ms for delays)
	expectedMinDuration := 10*time.Millisecond + 20*time.Millisecond + 40*time.Millisecond + 80*time.Millisecond
	assert.GreaterOrEqual(t, duration, expectedMinDuration)
}

func TestRetryHandler_ExecuteWithRetry_Disabled(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)
	handler.Disable()

	attempts := 0
	operation := func(ctx context.Context, attempt int) error {
		attempts++
		return errors.New("failure")
	}

	result := handler.ExecuteWithRetry(context.Background(), "test_operation", operation)

	assert.False(t, result.Success)
	assert.Equal(t, 1, result.Attempts) // Only one attempt when disabled
	assert.Equal(t, 1, attempts)
}

func TestRetryHandler_CalculateDelay(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	config := RetryConfig{
		InitialDelay:  100 * time.Millisecond,
		MaxDelay:      1 * time.Second,
		BackoffFactor: 2.0,
		JitterEnabled: false,
	}

	tests := []struct {
		attempt     int
		expectedMin time.Duration
		expectedMax time.Duration
	}{
		{1, 100 * time.Millisecond, 100 * time.Millisecond},
		{2, 200 * time.Millisecond, 200 * time.Millisecond},
		{3, 400 * time.Millisecond, 400 * time.Millisecond},
		{4, 800 * time.Millisecond, 800 * time.Millisecond},
		{5, 1 * time.Second, 1 * time.Second}, // Should be capped at MaxDelay
	}

	for _, tt := range tests {
		t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) {
			delay := handler.calculateDelay(config, tt.attempt)
			assert.GreaterOrEqual(t, delay, tt.expectedMin)
			assert.LessOrEqual(t, delay, tt.expectedMax)
		})
	}
}

func TestRetryHandler_CalculateDelay_WithJitter(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	config := RetryConfig{
		InitialDelay:  100 * time.Millisecond,
		MaxDelay:      1 * time.Second,
		BackoffFactor: 2.0,
		JitterEnabled: true,
	}

	// Test jitter variation
	delays := make([]time.Duration, 10)
	for i := 0; i < 10; i++ {
		delays[i] = handler.calculateDelay(config, 2) // 200ms base
	}

	// Should have some variation due to jitter
	allSame := true
	for i := 1; i < len(delays); i++ {
		if delays[i] != delays[0] {
			allSame = false
			break
		}
	}
	assert.False(t, allSame, "Jitter should cause variation in delays")

	// All delays should be reasonable (within 10% of base)
	baseDelay := 200 * time.Millisecond
	for _, delay := range delays {
		assert.GreaterOrEqual(t, delay, baseDelay*9/10) // 10% below
		assert.LessOrEqual(t, delay, baseDelay*11/10)   // 10% above
	}
}

func TestRetryHandler_GetStats(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	// Execute some operations
	successOp := func(ctx context.Context, attempt int) error {
		return nil
	}
	failOp := func(ctx context.Context, attempt int) error {
		return errors.New("failure")
	}

	handler.ExecuteWithRetry(context.Background(), "test_success", successOp)
	handler.ExecuteWithRetry(context.Background(), "test_success", successOp)
	handler.ExecuteWithRetry(context.Background(), "test_fail", failOp)

	stats := handler.GetStats()

	// Check success stats
	successStats := stats["test_success"]
	require.NotNil(t, successStats)
	assert.Equal(t, 2, successStats.TotalAttempts)
	assert.Equal(t, 2, successStats.SuccessfulRetries)
	assert.Equal(t, 0, successStats.FailedRetries)

	// Check failure stats
	failStats := stats["test_fail"]
	require.NotNil(t, failStats)
	assert.Equal(t, 3, failStats.TotalAttempts) // Default max attempts
	assert.Equal(t, 0, failStats.SuccessfulRetries)
	assert.Equal(t, 1, failStats.FailedRetries)
}

func TestRetryHandler_GetHealthSummary(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	// Execute some operations to generate stats
	successOp := func(ctx context.Context, attempt int) error {
		return nil
	}
	partialFailOp := func(ctx context.Context, attempt int) error {
		if attempt < 2 {
			return errors.New("temporary failure")
		}
		return nil
	}

	// 2 immediate successes
	handler.ExecuteWithRetry(context.Background(), "immediate_success", successOp)
	handler.ExecuteWithRetry(context.Background(), "immediate_success", successOp)

	// 1 success after retry
	handler.ExecuteWithRetry(context.Background(), "retry_success", partialFailOp)

	summary := handler.GetHealthSummary()

	assert.True(t, summary["enabled"].(bool))
	assert.Equal(t, 2, summary["total_operations"].(int))
	assert.Equal(t, 2, summary["healthy_operations"].(int))
	assert.Equal(t, 0, summary["unhealthy_operations"].(int))

	// Check operation details
	details := summary["operation_details"].(map[string]interface{})

	immediateDetails := details["immediate_success"].(map[string]interface{})
	assert.Equal(t, 1.0, immediateDetails["success_rate"].(float64))
	assert.Equal(t, 1.0, immediateDetails["average_attempts"].(float64))
	assert.True(t, immediateDetails["is_healthy"].(bool))

	retryDetails := details["retry_success"].(map[string]interface{})
	assert.Equal(t, 1.0, retryDetails["success_rate"].(float64))
	assert.Equal(t, 2.0, retryDetails["average_attempts"].(float64))
	assert.True(t, retryDetails["is_healthy"].(bool)) // Still healthy despite retries
}

func TestRetryHandler_ConcurrentExecution(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	const numGoroutines = 50
	const operationsPerGoroutine = 20

	done := make(chan bool, numGoroutines)
	successCount := make(chan int, numGoroutines)

	operation := func(ctx context.Context, attempt int) error {
		// 80% success rate
		if attempt <= 1 && time.Now().UnixNano()%5 != 0 {
			return nil
		}
		if attempt == 2 {
			return nil // Always succeed on second attempt
		}
		return errors.New("failure")
	}

	// Launch concurrent retry operations
	for i := 0; i < numGoroutines; i++ {
		go func(id int) {
			defer func() { done <- true }()

			successes := 0
			for j := 0; j < operationsPerGoroutine; j++ {
				result := handler.ExecuteWithRetry(context.Background(),
					fmt.Sprintf("concurrent_op_%d", id), operation)
				if result.Success {
					successes++
				}
			}
			successCount <- successes
		}(i)
	}

	// Collect results
	totalSuccesses := 0
	for i := 0; i < numGoroutines; i++ {
		select {
		case <-done:
			totalSuccesses += <-successCount
		case <-time.After(30 * time.Second):
			t.Fatal("Concurrent retry test timed out")
		}
	}

	totalOperations := numGoroutines * operationsPerGoroutine
	successRate := float64(totalSuccesses) / float64(totalOperations)

	t.Logf("Concurrent execution: %d/%d operations succeeded (%.2f%%)",
		totalSuccesses, totalOperations, successRate*100)

	// Should have high success rate due to retries
	assert.GreaterOrEqual(t, successRate, 0.8, "Success rate should be at least 80%")

	// Verify stats are consistent
	stats := handler.GetStats()
	assert.NotEmpty(t, stats, "Should have recorded stats")
}

func TestRetryHandler_EdgeCases(t *testing.T) {
	log := logger.New("debug", "text", "")
	handler := NewRetryHandler(log)

	t.Run("nil operation", func(t *testing.T) {
		assert.Panics(t, func() {
			handler.ExecuteWithRetry(context.Background(), "nil_op", nil)
		})
	})

	t.Run("empty operation type", func(t *testing.T) {
		operation := func(ctx context.Context, attempt int) error {
			return nil
		}
		result := handler.ExecuteWithRetry(context.Background(), "", operation)
		assert.True(t, result.Success)
	})

	t.Run("very long operation type", func(t *testing.T) {
		longName := string(make([]byte, 1000))
		operation := func(ctx context.Context, attempt int) error {
			return nil
		}
		result := handler.ExecuteWithRetry(context.Background(), longName, operation)
		assert.True(t, result.Success)
	})
}