Files
mev-beta/internal/validation/address.go
Krypto Kajun 850223a953 fix(multicall): resolve critical multicall parsing corruption issues
- Added comprehensive bounds checking to prevent buffer overruns in multicall parsing
- Implemented graduated validation system (Strict/Moderate/Permissive) to reduce false positives
- Added LRU caching system for address validation with 10-minute TTL
- Enhanced ABI decoder with missing Universal Router and Arbitrum-specific DEX signatures
- Fixed duplicate function declarations and import conflicts across multiple files
- Added error recovery mechanisms with multiple fallback strategies
- Updated tests to handle new validation behavior for suspicious addresses
- Fixed parser test expectations for improved validation system
- Applied gofmt formatting fixes to ensure code style compliance
- Fixed mutex copying issues in monitoring package by introducing MetricsSnapshot
- Resolved critical security vulnerabilities in heuristic address extraction
- Progress: Updated TODO audit from 10% to 35% complete

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:12:55 -05:00

705 lines
24 KiB
Go

// Package validation provides comprehensive Ethereum address validation and corruption detection.
// This package is critical for the MEV bot's security and reliability, preventing costly errors
// from malformed or corrupted addresses that could cause transaction failures or security issues.
//
// Key features:
// - Multi-layer address validation (format, length, corruption detection)
// - Contract type classification and prevention of ERC-20/pool confusion
// - Corruption scoring system to identify suspicious addresses
// - Known contract registry for instant validation of major protocols
package validation
import (
"fmt"
"regexp"
"strings"
"github.com/ethereum/go-ethereum/common"
)
// AddressValidator provides comprehensive Ethereum address validation with advanced
// corruption detection and contract type classification. This validator is designed
// to prevent the costly errors that can occur when malformed addresses are used
// in contract calls, particularly in high-frequency MEV operations.
//
// The validator implements multiple validation layers:
// 1. Basic format validation (hex format, length, prefix)
// 2. Corruption pattern detection (repetitive patterns, suspicious zeros)
// 3. Contract type classification to prevent misuse
// 4. Known contract registry for instant validation
type AddressValidator struct {
// Known corrupted patterns that should be immediately rejected
// These patterns are derived from observed corruption incidents in production
corruptedPatterns []string
// Precompiled regex equivalents for efficient matching
corruptedPatternRegex []*regexp.Regexp
// Registry of known contract addresses and their verified types
// This enables instant validation without requiring RPC calls
knownContracts map[common.Address]ContractType
// Enhanced known contract registry for detailed validation
knownContractsRegistry *KnownContractRegistry
}
// ContractType represents the classification of an Ethereum contract.
// This classification is critical for preventing the ERC-20/pool confusion
// that was causing massive log spam and transaction failures in production.
type ContractType int
const (
// ContractTypeUnknown indicates the contract type could not be determined
// This is the default for addresses not in the known contracts registry
ContractTypeUnknown ContractType = iota
// ContractTypeERC20Token indicates a standard ERC-20 token contract
// These contracts should never be used in pool-specific operations
ContractTypeERC20Token
// ContractTypeUniswapV2Pool indicates a Uniswap V2 compatible pool contract
// These support token0(), token1(), and getReserves() functions
ContractTypeUniswapV2Pool
// ContractTypeUniswapV3Pool indicates a Uniswap V3 compatible pool contract
// These support token0(), token1(), and slot0() functions
ContractTypeUniswapV3Pool
// ContractTypeRouter indicates a DEX router contract
// These should not be used directly as token or pool addresses
ContractTypeRouter
// ContractTypeFactory indicates a pool factory contract
// These create pools but are not pools themselves
ContractTypeFactory
)
// String returns a human-readable representation of the contract type.
func (ct ContractType) String() string {
switch ct {
case ContractTypeERC20Token:
return "ERC20_TOKEN"
case ContractTypeUniswapV2Pool:
return "UNISWAP_V2_POOL"
case ContractTypeUniswapV3Pool:
return "UNISWAP_V3_POOL"
case ContractTypeRouter:
return "ROUTER"
case ContractTypeFactory:
return "FACTORY"
case ContractTypeUnknown:
fallthrough
default:
return "UNKNOWN"
}
}
// ValidationError represents a structured validation failure for an address.
type ValidationError struct {
Code string
Message string
Context map[string]interface{}
}
// Error implements the error interface.
func (e *ValidationError) Error() string {
if e == nil {
return ""
}
if e.Code != "" {
return fmt.Sprintf("%s: %s", e.Code, e.Message)
}
return e.Message
}
// AddressValidationResult contains comprehensive validation results and metadata
// for an Ethereum address. This structure provides detailed information about
// the validation process, including any issues found and confidence metrics.
type AddressValidationResult struct {
// IsValid indicates whether the address passed all validation checks
// A false value means the address should not be used in transactions
IsValid bool
// Address contains the parsed Ethereum address (only valid if IsValid is true)
Address common.Address
// ContractType indicates the classification of the contract (if known)
ContractType ContractType
// ErrorMessages contains detailed descriptions of validation failures
// These are critical issues that prevent the address from being used
ErrorMessages []string
// WarningMessages contains non-critical issues or concerns
// These don't prevent usage but should be logged for monitoring
WarningMessages []string
// CorruptionScore is a 0-100 metric indicating likelihood of corruption
// Higher scores indicate more suspicious patterns (0=clean, 100=definitely corrupted)
// Addresses with scores >30 are typically rejected in critical operations
CorruptionScore int
}
// NewAddressValidator creates a new address validator
func NewAddressValidator() *AddressValidator {
patterns := []string{
// Patterns indicating clear corruption
"0000000000000000000000000000000000000000", // All zeros
"000000000000000000000000000000000000000", // Missing one zero
"00000000000000000000000000000000000000000", // Extra zero
// Patterns with trailing zeros indicating truncation
"00000000000000000000000000$",
"000000000000000000000000$",
"0000000000000000000000$",
// Patterns with leading non-hex after 0x
"^0x[^0-9a-fA-F]",
}
compiled := make([]*regexp.Regexp, 0, len(patterns))
for _, pattern := range patterns {
compiled = append(compiled, regexp.MustCompile(pattern))
}
return &AddressValidator{
corruptedPatterns: patterns,
corruptedPatternRegex: compiled,
knownContracts: make(map[common.Address]ContractType),
knownContractsRegistry: NewKnownContractRegistry(),
}
}
// InitializeKnownContracts populates the validator with known Arbitrum contracts
func (av *AddressValidator) InitializeKnownContracts() {
// Known Arbitrum tokens
av.knownContracts[common.HexToAddress("0xFF970A61A04b1cA14834A43f5dE4533eBDDB5CC8")] = ContractTypeERC20Token // USDC
av.knownContracts[common.HexToAddress("0x82aF49447D8a07e3bd95BD0d56f35241523fBab1")] = ContractTypeERC20Token // WETH
av.knownContracts[common.HexToAddress("0xFd086bC7CD5C481DCC9C85ebE478A1C0b69FCbb9")] = ContractTypeERC20Token // USDT
av.knownContracts[common.HexToAddress("0x2f2a2543B76A4166549F7aaB2e75Bef0aefC5B0f")] = ContractTypeERC20Token // WBTC
av.knownContracts[common.HexToAddress("0x912CE59144191C1204E64559FE8253a0e49E6548")] = ContractTypeERC20Token // ARB
// Known Arbitrum routers
av.knownContracts[common.HexToAddress("0xE592427A0AEce92De3Edee1F18E0157C05861564")] = ContractTypeRouter // Uniswap V3 Router
av.knownContracts[common.HexToAddress("0x4752ba5dbc23f44d87826276bf6fd6b1c372ad24")] = ContractTypeRouter // Uniswap V2 Router02
av.knownContracts[common.HexToAddress("0xA51afAFe0263b40EdaEf0Df8781eA9aa03E381a3")] = ContractTypeRouter // Universal Router
av.knownContracts[common.HexToAddress("0xC36442b4a4522E871399CD717aBDD847Ab11FE88")] = ContractTypeRouter // Position Manager
// Known Arbitrum factories
av.knownContracts[common.HexToAddress("0x1F98431c8aD98523631AE4a59f267346ea31F984")] = ContractTypeFactory // Uniswap V3 Factory
av.knownContracts[common.HexToAddress("0xf1D7CC64Fb4452F05c498126312eBE29f30Fbcf9")] = ContractTypeFactory // Uniswap V2 Factory
// Known high-volume pools
av.knownContracts[common.HexToAddress("0xC6962004f452bE9203591991D15f6b388e09E8D0")] = ContractTypeUniswapV3Pool // USDC/WETH 0.05%
av.knownContracts[common.HexToAddress("0x17c14D2c404D167802b16C450d3c99F88F2c4F4d")] = ContractTypeUniswapV3Pool // USDC/WETH 0.3%
av.knownContracts[common.HexToAddress("0x2f5e87C9312fa29aed5c179E456625D79015299c")] = ContractTypeUniswapV3Pool // WBTC/WETH 0.05%
}
// ValidateAddress performs comprehensive validation of an Ethereum address string.
// This is the primary validation function that applies all validation layers
// in sequence to determine if an address is safe to use in transactions.
//
// The validation process includes:
// 1. Basic format validation (hex format, 0x prefix)
// 2. Length validation (must be exactly 42 characters)
// 3. Corruption pattern detection
// 4. Contract type classification
// 5. Corruption scoring
//
// Parameters:
// - addressStr: The address string to validate (should include 0x prefix)
//
// Returns:
// - *AddressValidationResult: Comprehensive validation results
func (av *AddressValidator) ValidateAddress(addressStr string) *AddressValidationResult {
// Initialize the result structure with safe defaults
result := &AddressValidationResult{
IsValid: false, // Default to invalid until all checks pass
ErrorMessages: make([]string, 0),
WarningMessages: make([]string, 0),
CorruptionScore: 0, // Start with zero corruption score
}
// Basic format validation
if !av.isValidHexFormat(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "invalid hex format")
result.CorruptionScore += 50
return result
}
// Length validation
if !av.isValidLength(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "invalid address length")
result.CorruptionScore += 50
return result
}
// Corruption pattern detection
corruptionDetected, patterns := av.detectCorruption(addressStr)
if corruptionDetected {
result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("corruption detected: %v", patterns))
result.CorruptionScore += 70
return result
}
// Convert to common.Address for further validation
if !common.IsHexAddress(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "not a valid Ethereum address")
result.CorruptionScore += 30
return result
}
address := common.HexToAddress(addressStr)
result.Address = address
// Check for zero address
if address == (common.Address{}) {
result.ErrorMessages = append(result.ErrorMessages, "zero address")
result.CorruptionScore += 40
return result
}
// Check known contract types
if contractType, exists := av.knownContracts[address]; exists {
result.ContractType = contractType
} else {
result.ContractType = ContractTypeUnknown
result.WarningMessages = append(result.WarningMessages, "unknown contract type")
}
// Additional pattern-based corruption detection
result.CorruptionScore += av.calculateCorruptionScore(addressStr)
// Mark as valid if corruption score is low enough
if result.CorruptionScore < 30 {
result.IsValid = true
}
return result
}
// isValidHexFormat checks if the string is a valid hex format
func (av *AddressValidator) isValidHexFormat(addressStr string) bool {
if len(addressStr) < 3 {
return false
}
if !strings.HasPrefix(addressStr, "0x") && !strings.HasPrefix(addressStr, "0X") {
return false
}
// Check if all characters after 0x are valid hex
hexPart := addressStr[2:]
if len(hexPart) == 0 {
return false
}
for i := 0; i < len(hexPart); i++ {
switch {
case hexPart[i] >= '0' && hexPart[i] <= '9':
case hexPart[i] >= 'a' && hexPart[i] <= 'f':
case hexPart[i] >= 'A' && hexPart[i] <= 'F':
default:
return false
}
}
return true
}
// isValidLength checks if the address has the correct length
func (av *AddressValidator) isValidLength(addressStr string) bool {
// Ethereum addresses should be 42 characters (0x + 40 hex chars)
return len(addressStr) == 42
}
// detectCorruption checks for known corruption patterns
func (av *AddressValidator) detectCorruption(addressStr string) (bool, []string) {
var detectedPatterns []string
for idx, re := range av.corruptedPatternRegex {
if re.MatchString(addressStr) {
detectedPatterns = append(detectedPatterns, av.corruptedPatterns[idx])
}
}
return len(detectedPatterns) > 0, detectedPatterns
}
// calculateCorruptionScore calculates a 0-100 score indicating the likelihood
// that an address has been corrupted or malformed. This scoring system is based
// on patterns observed in production corruption incidents.
//
// Scoring factors:
// - Trailing zeros (indicates truncation): +1 per excess zero
// - Leading zeros in middle (unusual pattern): +0.5 per zero
// - Repetitive patterns (indicates generation errors): +10
// - Other suspicious patterns: variable points
//
// Parameters:
// - addressStr: The address string to analyze (with 0x prefix)
//
// Returns:
// - int: Corruption score (0=clean, 100=definitely corrupted)
func (av *AddressValidator) calculateCorruptionScore(addressStr string) int {
score := 0
// Extract the hex part (remove 0x prefix)
hexPart := addressStr[2:]
// Count trailing zeros (sign of truncation)
trailingZeros := 0
for i := len(hexPart) - 1; i >= 0; i-- {
if hexPart[i] == '0' {
trailingZeros++
} else {
break
}
}
// More than 10 trailing zeros is suspicious
if trailingZeros > 10 {
score += trailingZeros
}
// Count leading zeros after first non-zero
leadingZeros := 0
foundNonZero := false
for _, char := range hexPart {
if char != '0' {
foundNonZero = true
} else if foundNonZero {
leadingZeros++
}
}
// Large blocks of zeros in the middle are suspicious
if leadingZeros > 8 {
score += leadingZeros / 2
}
// Check for repetitive patterns
if av.hasRepetitivePattern(hexPart) {
score += 10
}
// Overall zero density check for leading-zero patterns (common corruption)
zeroCount := strings.Count(hexPart, "0")
if strings.HasPrefix(hexPart, "0000") && float64(zeroCount)/float64(len(hexPart)) > 0.7 {
score += 30
}
// Leading zero prefix (common corruption pattern from truncated data)
if strings.HasPrefix(hexPart, "000000") {
score += 20
}
return score
}
// hasRepetitivePattern detects repetitive patterns in hex strings that indicate
// corruption or artificial generation. These patterns are rarely seen in legitimate
// Ethereum addresses and often indicate data corruption or malicious generation.
//
// Detected patterns include:
// - Long sequences of the same character (000000000000, ffffffffffff)
// - Addresses where all characters are identical
// - Other suspicious repetitive patterns
//
// Parameters:
// - hexStr: The hex string to analyze (without 0x prefix)
//
// Returns:
// - bool: true if repetitive patterns are detected
func (av *AddressValidator) hasRepetitivePattern(hexStr string) bool {
// Define patterns that indicate corruption or artificial generation
// These patterns are extremely rare in legitimate Ethereum addresses
patterns := []string{"000000000000", "ffffffffffff", "aaaaaaaaaaaa", "bbbbbbbbbbbb",
"1111111111", "2222222222", "3333333333", "4444444444", "5555555555",
"6666666666", "7777777777", "8888888888", "9999999999"}
for _, pattern := range patterns {
if strings.Contains(hexStr, pattern) {
return true
}
}
// Additional check for address with same character repeated throughout
if len(hexStr) >= 10 {
firstChar := hexStr[0]
allSame := true
for i := 1; i < len(hexStr); i++ {
if hexStr[i] != firstChar {
allSame = false
break
}
}
if allSame {
return true
}
}
return false
}
// IsValidPoolAddress checks if an address is valid for pool operations
func (av *AddressValidator) IsValidPoolAddress(address common.Address) bool {
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return false
}
// Must not be a token, router, or factory for pool operations
switch result.ContractType {
case ContractTypeERC20Token, ContractTypeRouter, ContractTypeFactory:
return false
case ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool:
return true
case ContractTypeUnknown:
// Allow unknown contracts but warn
return result.CorruptionScore < 20
}
return false
}
// IsValidTokenAddress checks if an address is valid for token operations
func (av *AddressValidator) IsValidTokenAddress(address common.Address) bool {
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return false
}
// Prefer known tokens, but allow unknown contracts with low corruption scores
switch result.ContractType {
case ContractTypeERC20Token:
return true
case ContractTypeRouter, ContractTypeFactory, ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool:
return false
case ContractTypeUnknown:
return result.CorruptionScore < 15
}
return false
}
// GetContractType returns the contract type for a given address
func (av *AddressValidator) GetContractType(address common.Address) ContractType {
if contractType, exists := av.knownContracts[address]; exists {
return contractType
}
return ContractTypeUnknown
}
// ValidateContractTypeConsistency validates that addresses don't have conflicting contract types
func (av *AddressValidator) ValidateContractTypeConsistency(tokenAddresses []common.Address, poolAddresses []common.Address) error {
// CRITICAL: Ensure no address appears in both token and pool lists
for _, token := range tokenAddresses {
for _, pool := range poolAddresses {
if token == pool {
return fmt.Errorf("address %s cannot be both a token and a pool", token.Hex())
}
}
}
// CRITICAL: Validate each token address is actually a token
for _, token := range tokenAddresses {
if !av.IsValidTokenAddress(token) {
return fmt.Errorf("address %s is not a valid token address", token.Hex())
}
// Additional check: ensure it's not marked as a pool in known contracts
if contractType := av.GetContractType(token); contractType == ContractTypeUniswapV2Pool || contractType == ContractTypeUniswapV3Pool {
return fmt.Errorf("address %s is marked as a pool but being used as a token", token.Hex())
}
}
// CRITICAL: Validate each pool address is actually a pool
for _, pool := range poolAddresses {
if !av.IsValidPoolAddress(pool) {
return fmt.Errorf("address %s is not a valid pool address", pool.Hex())
}
// Additional check: ensure it's not marked as a token in known contracts
if contractType := av.GetContractType(pool); contractType == ContractTypeERC20Token {
return fmt.Errorf("address %s is marked as a token but being used as a pool", pool.Hex())
}
}
return nil
}
// PreventERC20PoolConfusion is a critical safety function that prevents the costly
// error where ERC-20 token contracts are incorrectly used as pool contracts.
// This was the root cause of the 535K+ log spam incident in production.
//
// The function performs a type safety check to ensure that:
// - ERC-20 tokens are not used in pool operations
// - Pool contracts are not used in token operations
// - Unknown contracts meet safety thresholds
//
// This is a mandatory check for all contract address usage in critical operations.
//
// Parameters:
// - address: The contract address to validate
// - expectedType: The contract type expected by the calling code
//
// Returns:
// - error: nil if the address is safe to use, error describing the issue otherwise
func (av *AddressValidator) PreventERC20PoolConfusion(address common.Address, expectedType ContractType) error {
// Check if we have prior knowledge about this contract
knownType := av.GetContractType(address)
// If we have knowledge about this contract, use it
if knownType != ContractTypeUnknown {
if knownType != expectedType {
return fmt.Errorf("contract type mismatch for %s: expected %s but known as %s",
address.Hex(), contractTypeToString(expectedType), contractTypeToString(knownType))
}
return nil
}
// For unknown contracts, perform basic validation
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return fmt.Errorf("invalid address %s: %v", address.Hex(), result.ErrorMessages)
}
// High corruption score indicates potential misclassification
if result.CorruptionScore > 25 {
return fmt.Errorf("high corruption score (%d) for address %s, refusing to use as %s",
result.CorruptionScore, address.Hex(), contractTypeToString(expectedType))
}
return nil
}
// contractTypeToString converts ContractType to string representation
func contractTypeToString(ct ContractType) string {
switch ct {
case ContractTypeERC20Token:
return "ERC-20 Token"
case ContractTypeUniswapV2Pool:
return "Uniswap V2 Pool"
case ContractTypeUniswapV3Pool:
return "Uniswap V3 Pool"
case ContractTypeRouter:
return "Router"
case ContractTypeFactory:
return "Factory"
default:
return "Unknown"
}
}
// IsKnownContract checks if we have specific knowledge about a contract
func (av *AddressValidator) IsKnownContract(address common.Address) bool {
_, exists := av.knownContracts[address]
return exists
}
// GetValidationStats returns statistics about validation results
func (av *AddressValidator) GetValidationStats() map[string]interface{} {
return map[string]interface{}{
"known_contracts": len(av.knownContracts),
"corruption_patterns": len(av.corruptedPatterns),
}
}
// SanitizeAddress attempts to clean up a potentially corrupted address
func (av *AddressValidator) SanitizeAddress(addressStr string) (string, error) {
// Remove common prefixes that might be corrupted
cleaned := strings.TrimSpace(addressStr)
// Ensure 0x prefix
if !strings.HasPrefix(cleaned, "0x") && !strings.HasPrefix(cleaned, "0X") {
if len(cleaned) == 40 && av.isValidHexFormat("0x"+cleaned) {
cleaned = "0x" + cleaned
} else {
return "", fmt.Errorf("cannot sanitize address without 0x prefix")
}
}
// Normalize to lowercase
cleaned = strings.ToLower(cleaned)
// Validate the sanitized address
result := av.ValidateAddress(cleaned)
if !result.IsValid {
return "", fmt.Errorf("sanitized address is still invalid: %v", result.ErrorMessages)
}
return cleaned, nil
}
// ValidatePoolOperation validates if a pool-specific operation should be allowed on an address
// This prevents the critical error where ERC-20 tokens are treated as pools
func (av *AddressValidator) ValidatePoolOperation(address common.Address, operation string) error {
// Check if this is a known ERC-20 token
if av.knownContractsRegistry != nil {
if err := av.knownContractsRegistry.ValidatePoolCall(address, operation); err != nil {
return err
}
}
// Additional validation for suspicious addresses
addressStr := address.Hex()
// Check for zero address (common corruption)
if address == (common.Address{}) {
return fmt.Errorf("pool operation '%s' attempted on zero address (likely corruption)", operation)
}
// Check for addresses with excessive zeros (likely corruption)
if strings.Count(addressStr, "0") > 30 {
return fmt.Errorf("pool operation '%s' attempted on suspicious address %s (excessive zeros)", operation, addressStr)
}
// Fallback to general validation
result := av.ValidateAddress(addressStr)
if !result.IsValid {
return fmt.Errorf("pool operation '%s' attempted on invalid address %s: %v", operation, addressStr, result.ErrorMessages)
}
if result.CorruptionScore > 25 {
return fmt.Errorf("pool operation '%s' blocked due to corruption score %d on address %s", operation, result.CorruptionScore, addressStr)
}
return nil
}
// GetDetailedAddressAnalysis provides comprehensive analysis of an address including corruption patterns
func (av *AddressValidator) GetDetailedAddressAnalysis(address common.Address) map[string]interface{} {
analysis := make(map[string]interface{})
addressStr := address.Hex()
// Basic validation
result := av.ValidateAddress(addressStr)
analysis["is_valid"] = result.IsValid
analysis["corruption_score"] = result.CorruptionScore
analysis["contract_type"] = result.ContractType.String()
analysis["error_messages"] = result.ErrorMessages
// Known contract information
if av.knownContractsRegistry != nil {
contractType, name := av.knownContractsRegistry.GetContractType(address)
analysis["known_contract_type"] = contractType.String()
analysis["known_contract_name"] = name
isERC20, tokenName := av.knownContractsRegistry.IsKnownERC20(address)
analysis["is_known_erc20"] = isERC20
if isERC20 {
analysis["token_name"] = tokenName
}
// Corruption pattern analysis
corruptionPattern := av.knownContractsRegistry.GetCorruptionPattern(address)
analysis["corruption_pattern"] = corruptionPattern
}
// Address characteristics
analysis["is_zero_address"] = address == (common.Address{})
analysis["zero_count"] = strings.Count(addressStr, "0")
analysis["hex_length"] = len(addressStr)
return analysis
}