mev-beta/pkg/calldata/multicall.go

package calldata

import (
	"context"
	"encoding/binary"
	"encoding/hex"
	"encoding/json"
	"errors"
	"fmt"
	"math"
	"math/big"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/ethereum/go-ethereum/accounts/abi"
	"github.com/ethereum/go-ethereum/common"

	"github.com/fraktal/mev-beta/internal/validation"
	"github.com/fraktal/mev-beta/pkg/common/selectors"
)

// safeConvertUint64ToInt safely converts a uint64 to int, capping at MaxInt32 if overflow would occur
func safeConvertUint64ToInt(v uint64) int {
	if v > math.MaxInt32 {
		return math.MaxInt32
	}
	return int(v)
}

var (
	errInvalidMulticallPayload = errors.New("invalid multicall payload")
	errNoMulticallData         = errors.New("no multicall data found")
)

// CRITICAL FIX: Address validation cache to prevent repeated validation
type AddressValidationCache struct {
	goodAddresses sync.Map // map[common.Address]time.Time
	badAddresses  sync.Map // map[common.Address]time.Time
	cacheTimeout  time.Duration
	stats         struct {
		hits   atomic.Int64
		misses atomic.Int64
	}
}

var (
	validationCacheInstance *AddressValidationCache
	validationCacheOnce     sync.Once
)

func getValidationCache() *AddressValidationCache {
	validationCacheOnce.Do(func() {
		validationCacheInstance = &AddressValidationCache{
			cacheTimeout: 10 * time.Minute, // Cache results for 10 minutes
		}
	})
	return validationCacheInstance
}

func (c *AddressValidationCache) IsKnownGood(addr common.Address) bool {
	if value, ok := c.goodAddresses.Load(addr); ok {
		if timestamp, ok := value.(time.Time); ok {
			if time.Since(timestamp) < c.cacheTimeout {
				c.stats.hits.Add(1)
				return true
			}
			// Expired, remove from cache
			c.goodAddresses.Delete(addr)
		}
	}
	c.stats.misses.Add(1)
	return false
}

func (c *AddressValidationCache) IsKnownBad(addr common.Address) bool {
	if value, ok := c.badAddresses.Load(addr); ok {
		if timestamp, ok := value.(time.Time); ok {
			if time.Since(timestamp) < c.cacheTimeout {
				c.stats.hits.Add(1)
				return true
			}
			// Expired, remove from cache
			c.badAddresses.Delete(addr)
		}
	}
	c.stats.misses.Add(1)
	return false
}

func (c *AddressValidationCache) MarkGood(addr common.Address) {
	c.goodAddresses.Store(addr, time.Now())
	// Remove from bad cache if present
	c.badAddresses.Delete(addr)
}

func (c *AddressValidationCache) MarkBad(addr common.Address) {
	c.badAddresses.Store(addr, time.Now())
	// Remove from good cache if present
	c.goodAddresses.Delete(addr)
}

func (c *AddressValidationCache) GetStats() (hits, misses int64) {
	return c.stats.hits.Load(), c.stats.misses.Load()
}

// CleanupCorruptedAddresses performs comprehensive cleanup of corrupted address cache entries
func (c *AddressValidationCache) CleanupCorruptedAddresses() {
	now := time.Now()

	// Clean expired bad addresses
	c.badAddresses.Range(func(key, value interface{}) bool {
		if timestamp, ok := value.(time.Time); ok {
			if now.Sub(timestamp) > c.cacheTimeout {
				c.badAddresses.Delete(key)
			}
		}
		return true
	})

	// Clean expired good addresses
	c.goodAddresses.Range(func(key, value interface{}) bool {
		if timestamp, ok := value.(time.Time); ok {
			if now.Sub(timestamp) > c.cacheTimeout {
				c.goodAddresses.Delete(key)
			}
		}
		return true
	})
}

// ClearAllBadAddresses removes all cached bad addresses (for emergency cleanup)
func (c *AddressValidationCache) ClearAllBadAddresses() {
	c.badAddresses.Range(func(key, value interface{}) bool {
		c.badAddresses.Delete(key)
		return true
	})
}

// GetCacheHealth returns cache health metrics
func (c *AddressValidationCache) GetCacheHealth() map[string]interface{} {
	badCount := 0
	goodCount := 0
	expiredBadCount := 0
	expiredGoodCount := 0
	now := time.Now()

	c.badAddresses.Range(func(key, value interface{}) bool {
		badCount++
		if timestamp, ok := value.(time.Time); ok {
			if now.Sub(timestamp) > c.cacheTimeout {
				expiredBadCount++
			}
		}
		return true
	})

	c.goodAddresses.Range(func(key, value interface{}) bool {
		goodCount++
		if timestamp, ok := value.(time.Time); ok {
			if now.Sub(timestamp) > c.cacheTimeout {
				expiredGoodCount++
			}
		}
		return true
	})

	hits, misses := c.GetStats()
	hitRate := float64(hits) / float64(hits+misses) * 100

	return map[string]interface{}{
		"bad_addresses":         badCount,
		"good_addresses":        goodCount,
		"expired_bad":           expiredBadCount,
		"expired_good":          expiredGoodCount,
		"hit_rate_percent":      hitRate,
		"cache_hits":            hits,
		"cache_misses":          misses,
		"cache_timeout_minutes": c.cacheTimeout.Minutes(),
	}
}

// MulticallContext carries metadata useful when extracting tokens and logging diagnostics.
type MulticallContext struct {
	TxHash      string
	Protocol    string
	Stage       string
	BlockNumber uint64
}

// ExtractTokensFromMulticall walks a multicall payload (function parameters without selector)
// and returns up to two unique ERC-20 style token addresses discovered inside
// the inner calls. The function understands the common Uniswap v2/v3 selectors.
func ExtractTokensFromMulticall(data []byte) ([]common.Address, error) {
	return ExtractTokensFromMulticallWithContext(data, nil)
}

func trimSelector(s string) string {
	return strings.TrimPrefix(strings.ToLower(s), "0x")
}

// ExtractTokensFromMulticallWithContext behaves like ExtractTokensFromMulticall while including contextual metadata.
func ExtractTokensFromMulticallWithContext(data []byte, ctx *MulticallContext) ([]common.Address, error) {
	return ExtractTokensFromMulticallWithRecovery(data, ctx, true)
}

// ExtractTokensFromMulticallWithRecovery provides error recovery for multicall parsing failures
func ExtractTokensFromMulticallWithRecovery(data []byte, ctx *MulticallContext, enableRecovery bool) ([]common.Address, error) {
	// Primary parsing attempt - structured ABI decoding
	swaps, err := DecodeSwapCallsFromMulticall(data, ctx)
	if err == nil && len(swaps) > 0 {
		result := extractTokensFromSwaps(swaps, data, ctx, ValidationLevelModerate)
		if len(result) > 0 {
			return result, nil
		}
	}

	// CRITICAL FIX: Error recovery - if structured parsing fails, try fallback methods
	if !enableRecovery {
		if err != nil {
			return nil, err
		}
		return []common.Address{}, nil
	}

	// Recovery Method 1: Direct multicall parsing with permissive validation
	calls, callErr := decodeMulticallCalls(data)
	if callErr == nil && len(calls) > 0 {
		validator := getAddressValidator()
		heuristicTokens := heuristicExtractTokens(calls, validator)
		if len(heuristicTokens) >= 2 {
			// Validate heuristic results with moderate threshold before returning
			validatedTokens := make([]common.Address, 0, 2)
			for _, addr := range heuristicTokens {
				if valid, _ := validateTokenCandidateWithLevel(addr, validator, ValidationLevelModerate); valid {
					validatedTokens = append(validatedTokens, addr)
					if len(validatedTokens) >= 2 {
						break
					}
				}
			}
			if len(validatedTokens) > 0 {
				return validatedTokens, nil
			}
		}
	}

	// Recovery Method 2: Pattern-based extraction for completely unknown structures
	if len(data) >= 64 {
		patternTokens := extractTokensByPattern(data, ctx)
		if len(patternTokens) > 0 {
			return patternTokens, nil
		}
	}

	// If all recovery methods fail, return the original error
	if err != nil {
		return nil, fmt.Errorf("multicall parsing failed, recovery unsuccessful: %w", err)
	}

	return []common.Address{}, fmt.Errorf("no tokens extracted from multicall data")
}

func extractTokensFromSwaps(swaps []*SwapCall, data []byte, ctx *MulticallContext, level ValidationLevel) []common.Address {
	unique := make(map[common.Address]struct{})
	rejected := make(map[common.Address]struct{})
	result := make([]common.Address, 0, 2)
	validator := getAddressValidator()

	for _, swap := range swaps {
		if swap == nil {
			continue
		}
		for _, addr := range []common.Address{swap.TokenIn, swap.TokenOut} {
			valid, reason := validateTokenCandidateWithLevel(addr, validator, level)
			if !valid {
				if _, seen := rejected[addr]; !seen {
					logRejectedTokenCandidate(addr, reason, data, ctx)
					rejected[addr] = struct{}{}
				}
				continue
			}
			if _, seen := unique[addr]; seen {
				continue
			}
			unique[addr] = struct{}{}
			result = append(result, addr)
			if len(result) >= 2 {
				return result[:2]
			}
		}
	}

	return result
}

// extractTokensByPattern performs pattern-based token extraction as last resort
func extractTokensByPattern(data []byte, ctx *MulticallContext) []common.Address {
	validator := getAddressValidator()
	tokens := make([]common.Address, 0, 2)

	// Look for common token address patterns in 32-byte aligned data
	for i := 0; i+32 <= len(data); i += 32 {
		word := data[i : i+32]

		// Check if this could be an address (last 20 bytes)
		if len(word) >= 20 {
			addr := common.BytesToAddress(word[12:32])

			// Use strict validation for pattern-based extraction
			if valid, _ := validateTokenCandidateWithLevel(addr, validator, ValidationLevelStrict); valid {
				// Additional check: ensure it looks like a real contract address
				if !isAllZeros(word[12:32]) && !isAllSame(word[12:32]) {
					tokens = append(tokens, addr)
					if len(tokens) >= 2 {
						break
					}
				}
			}
		}
	}

	return tokens
}

// isAllSame checks if all bytes in a slice are the same value
func isAllSame(data []byte) bool {
	if len(data) == 0 {
		return true
	}
	first := data[0]
	for _, b := range data[1:] {
		if b != first {
			return false
		}
	}
	return true
}

// DecodeMulticallCalls returns the raw call payloads contained within a multicall parameter blob.
// The provided data must exclude the 4-byte function selector (i.e., start at the encoded arguments).
func DecodeMulticallCalls(data []byte) ([][]byte, error) {
	return decodeMulticallCalls(data)
}

// decodeMulticallCalls parses the ABI-encoded bytes[] payload emitted by the
// Uniswap router multicall variants. The data parameter must exclude the
// 4-byte function selector.
func decodeMulticallCalls(data []byte) ([][]byte, error) {
	candidates, err := prepareMulticallCandidates()
	if err != nil {
		return nil, errInvalidMulticallPayload
	}

	for _, candidate := range candidates {
		values, err := candidate.args.Unpack(data)
		if err != nil || len(values) <= candidate.dataIndex {
			continue
		}

		raw := values[candidate.dataIndex]
		calls, ok := toByteSliceSlice(raw)
		if !ok {
			continue
		}

		if len(calls) > 0 {
			return calls, nil
		}
	}

	return nil, errNoMulticallData
}

// extractTokensFromCall attempts to decode known swap call patterns and returns
// the involved token addresses in call order.
func extractTokensFromCall(call []byte) ([]common.Address, error) {
	if len(call) < 4 {
		return nil, errors.New("call too short")
	}

	selector := hex.EncodeToString(call[:4])
	payload := call[4:]

	switch selector {
	case trimSelector(selectors.UniswapV3MulticallWithDeadline),
		trimSelector(selectors.UniswapV3MulticallWithBlockhash),
		trimSelector(selectors.UniswapV3Multicall):
		return ExtractTokensFromMulticall(payload)
	case trimSelector(selectors.UniswapV3ExactInputSingle), trimSelector(selectors.UniswapV3ExactInputSingleLegacy):
		return extractExactInputSingleTokens(payload), nil
	case trimSelector(selectors.UniswapV3ExactOutputSingle):
		return extractExactInputSingleTokens(payload), nil
	case trimSelector(selectors.UniswapV3ExactInput), trimSelector(selectors.UniswapV3ExactInputLegacy):
		return extractExactInputTokens(payload), nil
	case trimSelector(selectors.UniswapV3ExactOutput):
		return extractExactInputTokens(payload), nil
	case trimSelector(selectors.UniswapV2SwapExactTokensForTokens),
		trimSelector(selectors.UniswapV2SwapTokensForExactTokens),
		trimSelector(selectors.UniswapV2SwapExactETHForTokens),
		trimSelector(selectors.UniswapV2SwapExactTokensForETH),
		trimSelector(selectors.UniswapV2SwapExactTokensForTokensSupportingFee):
		return extractSwapPathTokens(payload), nil
	}

	return nil, nil
}

func extractExactInputSingleTokens(payload []byte) []common.Address {
	if len(payload) < 64 {
		return nil
	}
	tokenIn := common.BytesToAddress(payload[12:32])
	tokenOut := common.BytesToAddress(payload[44:64])
	return []common.Address{tokenIn, tokenOut}
}

func extractExactInputTokens(payload []byte) []common.Address {
	if len(payload) < 64 {
		return nil
	}

	pathOffset, ok := readUint64(payload[0:32])
	if !ok {
		return nil
	}

	start := safeConvertUint64ToInt(pathOffset)
	if start+32 > len(payload) {
		return nil
	}

	length, ok := readUint64(payload[start : start+32])
	if !ok {
		return nil
	}

	pathStart := start + 32
	pathEnd := pathStart + safeConvertUint64ToInt(length)
	if pathEnd > len(payload) || length < 40 {
		return nil
	}

	path := payload[pathStart:pathEnd]
	tokenIn := common.BytesToAddress(path[0:20])
	tokenOut := common.BytesToAddress(path[len(path)-20:])
	return []common.Address{tokenIn, tokenOut}
}

func extractSwapPathTokens(payload []byte) []common.Address {
	if len(payload) < 96 {
		return nil
	}

	pathOffset, ok := readUint64(payload[64:96])
	if !ok {
		return nil
	}

	start := safeConvertUint64ToInt(pathOffset)
	if start+32 > len(payload) {
		return nil
	}

	length, ok := readUint64(payload[start : start+32])
	if !ok || length < 2 {
		return nil
	}

	tokens := make([]common.Address, 0, 2)
	for i := 0; i < safeConvertUint64ToInt(length); i++ {
		pos := start + 32 + i*32
		if pos+32 > len(payload) {
			return nil
		}
		addr := common.BytesToAddress(payload[pos+12 : pos+32])
		tokens = append(tokens, addr)
	}

	if len(tokens) >= 2 {
		return []common.Address{tokens[0], tokens[len(tokens)-1]}
	}

	return nil
}

func readUint64(word []byte) (uint64, bool) {
	if len(word) != 32 {
		return 0, false
	}

	for _, b := range word[:24] {
		if b != 0 {
			return 0, false
		}
	}

	return binary.BigEndian.Uint64(word[24:]), true
}

var (
	addressValidatorOnce   sync.Once
	addressValidator       *validation.AddressValidator
	multicallABIOnce       sync.Once
	multicallCandidates    []abiArgumentCandidate
	multicallABIError      error
	knownNonTokenAddresses = map[common.Address]struct{}{
		common.HexToAddress("0xE592427A0AEce92De3Edee1F18E0157C05861564"): {}, // Uniswap V3 Router
		common.HexToAddress("0x4752ba5dbc23f44d87826276bf6fd6b1c372ad24"): {}, // Uniswap V2 Router
		common.HexToAddress("0x1b02dA8Cb0d097eB8D57A175b88c7D8b47997506"): {}, // SushiSwap Router
		common.HexToAddress("0xA51afAFe0263b40EdaEf0Df8781eA9aa03E381a3"): {}, // Universal Router
		common.HexToAddress("0x1111111254EEB25477B68fb85Ed929f73A960582"): {}, // 1inch Router v5
		common.HexToAddress("0xC36442b4a4522E871399CD717aBDD847Ab11FE88"): {}, // Uniswap V3 Position Manager
		common.HexToAddress("0x0000000000000000000000000000000000000000"): {},
	}
)

func getAddressValidator() *validation.AddressValidator {
	addressValidatorOnce.Do(func() {
		addressValidator = validation.NewAddressValidator()
	})
	return addressValidator
}

type abiArgumentCandidate struct {
	args      abi.Arguments
	dataIndex int
}

func prepareMulticallCandidates() ([]abiArgumentCandidate, error) {
	multicallABIOnce.Do(func() {
		uintType, err := abi.NewType("uint256", "", nil)
		if err != nil {
			multicallABIError = err
			return
		}

		bytes32Type, err := abi.NewType("bytes32", "", nil)
		if err != nil {
			multicallABIError = err
			return
		}

		bytesArrayType, err := abi.NewType("bytes[]", "", nil)
		if err != nil {
			multicallABIError = err
			return
		}

		boolType, err := abi.NewType("bool", "", nil)
		if err != nil {
			multicallABIError = err
			return
		}

		multicallCandidates = []abiArgumentCandidate{
			{
				args: abi.Arguments{
					{Name: "data", Type: bytesArrayType},
				},
				dataIndex: 0,
			},
			{
				args: abi.Arguments{
					{Name: "deadline", Type: uintType},
					{Name: "data", Type: bytesArrayType},
				},
				dataIndex: 1,
			},
			{
				args: abi.Arguments{
					{Name: "previousBlockhash", Type: bytes32Type},
					{Name: "data", Type: bytesArrayType},
				},
				dataIndex: 1,
			},
			{
				args: abi.Arguments{
					{Name: "deadline", Type: uintType},
					{Name: "data", Type: bytesArrayType},
					{Name: "revertOnFail", Type: boolType},
				},
				dataIndex: 1,
			},
		}
	})

	return multicallCandidates, multicallABIError
}

func toByteSliceSlice(value interface{}) ([][]byte, bool) {
	switch v := value.(type) {
	case [][]byte:
		return v, true
	case []interface{}:
		result := make([][]byte, 0, len(v))
		for _, elem := range v {
			bytes, ok := elem.([]byte)
			if !ok {
				return nil, false
			}
			result = append(result, bytes)
		}
		return result, true
	default:
		return nil, false
	}
}

func heuristicExtractTokens(calls [][]byte, validator *validation.AddressValidator) []common.Address {
	unique := make(map[common.Address]struct{})
	rejected := make(map[common.Address]struct{})
	results := make([]common.Address, 0, 2)

	addCandidate := func(addr common.Address, source []byte) {
		if len(results) >= 2 {
			return
		}
		// CRITICAL FIX: Use permissive validation for heuristic extraction
		valid, reason := validateTokenCandidateWithLevel(addr, validator, ValidationLevelPermissive)
		if !valid {
			if _, seen := rejected[addr]; !seen {
				logRejectedTokenCandidate(addr, reason, source, nil)
				rejected[addr] = struct{}{}
			}
			return
		}
		if _, seen := unique[addr]; seen {
			return
		}
		unique[addr] = struct{}{}
		results = append(results, addr)
	}

	for _, call := range calls {
		if len(call) < 4 {
			continue
		}

		payload := call[4:]
		payloadLen := len(payload)

		// CRITICAL FIX: Ensure payload length is multiple of 32 and sufficient
		if payloadLen < 32 || payloadLen%32 != 0 {
			continue
		}

		words := payloadLen / 32

		for i := 0; i < words; i++ {
			start := i * 32
			end := start + 32

			// CRITICAL FIX: Additional bounds checking
			if end > payloadLen {
				break
			}

			word := payload[start:end]

			if !looksLikeOffsetWord(word, payloadLen) {
				// CRITICAL FIX: Validate address extraction bounds
				if len(word) >= 32 && len(word[12:32]) == 20 {
					addr := common.BytesToAddress(word[12:32])
					// CRITICAL FIX: Additional validation before adding candidate
					if addr != (common.Address{}) && !isAllZeros(word[12:32]) {
						addCandidate(addr, call)
						if len(results) >= 2 {
							return results
						}
					}
				}
			}

			if ptr, ok := readUint64(word); ok {
				offset := safeConvertUint64ToInt(ptr)

				// CRITICAL FIX: More conservative bounds checking
				if offset < 0 || offset >= payloadLen || offset+32 > payloadLen {
					continue
				}

				lengthWord := payload[offset : offset+32]
				length, ok := readUint64(lengthWord)
				if !ok {
					continue
				}

				// CRITICAL FIX: Limit array size to prevent DoS and corruption
				maxElements := 100 // Reasonable limit for token arrays
				arrayLength := safeConvertUint64ToInt(length)
				if arrayLength > maxElements || arrayLength < 0 {
					continue
				}

				for j := 0; j < arrayLength; j++ {
					elemStart := offset + 32 + j*32
					elemEnd := elemStart + 32

					// CRITICAL FIX: Strict bounds checking for array elements
					if elemStart < 0 || elemEnd > payloadLen || elemStart >= elemEnd {
						break
					}

					elemWord := payload[elemStart:elemEnd]

					if !looksLikeOffsetWord(elemWord, payloadLen) {
						// CRITICAL FIX: Validate element address extraction
						if len(elemWord) >= 32 && len(elemWord[12:32]) == 20 {
							addr := common.BytesToAddress(elemWord[12:32])
							// CRITICAL FIX: Additional validation for array elements
							if addr != (common.Address{}) && !isAllZeros(elemWord[12:32]) {
								addCandidate(addr, call)
								if len(results) >= 2 {
									return results
								}
							}
						}
					}
				}
			}
		}
	}

	return results
}

// ValidationLevel represents different validation strictness levels
type ValidationLevel int

const (
	// ValidationLevelStrict requires corruption score < 15 (for critical operations)
	ValidationLevelStrict ValidationLevel = iota
	// ValidationLevelModerate requires corruption score < 30 (for normal operations)
	ValidationLevelModerate
	// ValidationLevelPermissive requires corruption score < 50 (for heuristic operations)
	ValidationLevelPermissive
)

func validateTokenCandidate(addr common.Address, validator *validation.AddressValidator) (bool, string) {
	return validateTokenCandidateWithLevel(addr, validator, ValidationLevelModerate)
}

func validateTokenCandidateWithLevel(addr common.Address, validator *validation.AddressValidator, level ValidationLevel) (bool, string) {
	if addr == (common.Address{}) {
		return false, "zero_address"
	}

	// CRITICAL FIX: Check cache first to avoid repeated validation
	cache := getValidationCache()
	if cache.IsKnownGood(addr) {
		return true, ""
	}
	if cache.IsKnownBad(addr) {
		return false, "cached_invalid"
	}

	if _, blocked := knownNonTokenAddresses[addr]; blocked {
		cache.MarkBad(addr)
		return false, "known_non_token_address"
	}

	if looksSuspicious(addr) {
		cache.MarkBad(addr)
		return false, "suspicious_pattern"
	}

	if validator == nil {
		cache.MarkGood(addr)
		return true, ""
	}

	result := validator.ValidateAddress(addr.Hex())
	if !result.IsValid {
		reason := "invalid_address"
		if len(result.ErrorMessages) > 0 {
			reason = fmt.Sprintf("invalid_address: %s", strings.Join(result.ErrorMessages, "; "))
		}
		cache.MarkBad(addr)
		return false, reason
	}

	// CRITICAL FIX: Graduated validation thresholds based on operation type
	var maxCorruptionScore int
	switch level {
	case ValidationLevelStrict:
		maxCorruptionScore = 15 // For critical financial operations
	case ValidationLevelModerate:
		maxCorruptionScore = 30 // For normal address validation
	case ValidationLevelPermissive:
		maxCorruptionScore = 50 // For heuristic extraction
	default:
		maxCorruptionScore = 30 // Default to moderate
	}

	if result.CorruptionScore >= maxCorruptionScore {
		reason := fmt.Sprintf("corruption_score_%d_exceeds_threshold_%d", result.CorruptionScore, maxCorruptionScore)
		if len(result.ErrorMessages) > 0 {
			reason = fmt.Sprintf("%s: %s", reason, strings.Join(result.ErrorMessages, "; "))
		}
		cache.MarkBad(addr)
		return false, reason
	}

	// Cache the good result
	cache.MarkGood(addr)
	return true, ""
}

func logRejectedTokenCandidate(addr common.Address, reason string, payload []byte, ctx *MulticallContext) {
	if reason == "" {
		return
	}

	lower := strings.ToLower(reason)
	if !(strings.Contains(lower, "corruption") ||
		strings.Contains(lower, "invalid") ||
		strings.Contains(lower, "zero") ||
		strings.Contains(lower, "suspicious")) {
		return
	}

	recordCorruptedTokenCandidate(addr, reason, payload, ctx)
}

func isLikelyValidToken(addr common.Address, validator *validation.AddressValidator) bool {
	valid, _ := validateTokenCandidate(addr, validator)
	return valid
}

func looksSuspicious(addr common.Address) bool {
	bytes := addr.Bytes()
	nonZero := 0
	for _, b := range bytes {
		if b != 0 {
			nonZero++
		}
	}

	if nonZero < 4 {
		return true
	}

	if bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0 && nonZero <= 6 {
		return true
	}

	trailingZeros := 0
	for i := len(bytes) - 1; i >= 0; i-- {
		if bytes[i] != 0 {
			break
		}
		trailingZeros++
	}
	if trailingZeros >= 8 {
		return true
	}

	if float64(len(bytes)-nonZero)/float64(len(bytes)) > 0.7 {
		return true
	}

	return false
}

func looksLikeOffsetWord(word []byte, payloadLen int) bool {
	if len(word) != 32 || payloadLen == 0 {
		return false
	}

	// Quick check: high 12 bytes must be zero for addresses; keep for pointer detection
	isHighZero := true
	for i := 0; i < 12; i++ {
		if word[i] != 0 {
			isHighZero = false
			break
		}
	}
	if !isHighZero {
		return false
	}

	offset := new(big.Int).SetBytes(word).Uint64()
	if offset == 0 {
		return false
	}

	// Pointers in ABI encoding are multiples of 32 and typically within payload length
	if offset%32 == 0 && safeConvertUint64ToInt(offset) <= payloadLen+64 {
		return true
	}

	return false
}

var (
	suspiciousMulticallCaptureCount atomic.Int32
	suspiciousMulticallOnce         sync.Once
	corruptedCandidateCaptureCount  atomic.Int32
	corruptedCandidateOnce          sync.Once
)

func recordSuspiciousMulticall(data []byte, invalidCount int, ctx *MulticallContext) {
	if suspiciousMulticallCaptureCount.Add(1) > 50 {
		return
	}

	logDir := filepath.Join("logs", "diagnostics")
	suspiciousMulticallOnce.Do(func() {
		_ = os.MkdirAll(logDir, 0o755)
	})

	logFile := filepath.Join(logDir, "multicall_samples.log")
	f, err := os.OpenFile(logFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
	if err != nil {
		return
	}
	defer f.Close()

	hexPayload := hex.EncodeToString(data)
	const maxHexLen = 2048
	if len(hexPayload) > maxHexLen {
		hexPayload = hexPayload[:maxHexLen] + "...(truncated)"
	}

	var txHash, protocol, stage string
	var blockNumber uint64
	if ctx != nil {
		txHash = ctx.TxHash
		protocol = ctx.Protocol
		stage = ctx.Stage
		blockNumber = ctx.BlockNumber
	}
	if txHash == "" {
		txHash = "n/a"
	}
	if protocol == "" {
		protocol = "n/a"
	}
	if stage == "" {
		stage = "n/a"
	}

	entry := fmt.Sprintf("%s invalid_addresses=%d payload_len=%d tx_hash=%s protocol=%s stage=%s block=%d hex=%s\n",
		time.Now().Format(time.RFC3339), invalidCount, len(data),
		txHash, protocol, stage, blockNumber, hexPayload)
	_, _ = f.WriteString(entry)
}

func recordCorruptedTokenCandidate(addr common.Address, reason string, payload []byte, ctx *MulticallContext) {
	captureIdx := corruptedCandidateCaptureCount.Add(1)
	if captureIdx > 200 {
		return
	}

	logDir := filepath.Join("logs", "diagnostics")
	corruptedCandidateOnce.Do(func() {
		_ = os.MkdirAll(logDir, 0o755)
	})

	entry := map[string]interface{}{
		"timestamp":     time.Now().UTC().Format(time.RFC3339),
		"address":       addr.Hex(),
		"reason":        reason,
		"payload_len":   len(payload),
		"capture_index": captureIdx,
	}

	if ctx != nil {
		if ctx.TxHash != "" {
			entry["tx_hash"] = ctx.TxHash
		}
		if ctx.Protocol != "" {
			entry["protocol"] = ctx.Protocol
		}
		if ctx.Stage != "" {
			entry["stage"] = ctx.Stage
		}
		if ctx.BlockNumber != 0 {
			entry["block_number"] = ctx.BlockNumber
		}
	}

	if len(payload) > 0 {
		hexPayload := hex.EncodeToString(payload)
		const maxHexLen = 2048
		if len(hexPayload) > maxHexLen {
			hexPayload = hexPayload[:maxHexLen] + "...(truncated)"
		}
		entry["payload_hex"] = hexPayload
	}

	if data, err := json.Marshal(entry); err == nil {
		_ = appendToFile(filepath.Join(logDir, "corrupted_token_candidates.log"), append(data, '\n'))
	}

	errorLogMessage := fmt.Sprintf("%s [WARN] extractTokensGeneric: rejected candidate %s (%s) capture=%d payload_log=%s\n",
		time.Now().Format("2006/01/02 15:04:05"),
		addr.Hex(),
		reason,
		captureIdx,
		filepath.Join("logs", "diagnostics", "corrupted_token_candidates.log"),
	)
	_ = appendToFile(filepath.Join("logs", "mev_bot_errors.log"), []byte(errorLogMessage))
}

func appendToFile(path string, data []byte) error {
	f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
	if err != nil {
		return err
	}
	defer f.Close()

	_, err = f.Write(data)
	return err
}

// isAllZeros checks if a byte slice contains only zero bytes
// This prevents extraction of corrupted zero addresses
func isAllZeros(data []byte) bool {
	for _, b := range data {
		if b != 0 {
			return false
		}
	}
	return true
}

// GetCacheStats returns cache performance statistics
func (c *AddressValidationCache) GetCacheStats() map[string]interface{} {
	hits := c.stats.hits.Load()
	misses := c.stats.misses.Load()
	total := hits + misses

	hitRate := 0.0
	if total > 0 {
		hitRate = float64(hits) / float64(total) * 100
	}

	// Count cache entries
	goodCount := 0
	badCount := 0

	c.goodAddresses.Range(func(key, value interface{}) bool {
		goodCount++
		return true
	})

	c.badAddresses.Range(func(key, value interface{}) bool {
		badCount++
		return true
	})

	return map[string]interface{}{
		"cache_hits":        hits,
		"cache_misses":      misses,
		"total_requests":    total,
		"hit_rate_pct":      hitRate,
		"good_addresses":    goodCount,
		"bad_addresses":     badCount,
		"total_cached":      goodCount + badCount,
		"cache_timeout_min": int(c.cacheTimeout.Minutes()),
	}
}

// StartAutomaticCleanup starts a background goroutine for periodic cache cleanup
func (c *AddressValidationCache) StartAutomaticCleanup(ctx context.Context, cleanupInterval time.Duration) {
	ticker := time.NewTicker(cleanupInterval)
	go func() {
		defer ticker.Stop()
		for {
			select {
			case <-ctx.Done():
				return
			case <-ticker.C:
				c.CleanupCorruptedAddresses()
			}
		}
	}()
}