// Package validation provides comprehensive Ethereum address validation and corruption detection. // This package is critical for the MEV bot's security and reliability, preventing costly errors // from malformed or corrupted addresses that could cause transaction failures or security issues. // // Key features: // - Multi-layer address validation (format, length, corruption detection) // - Contract type classification and prevention of ERC-20/pool confusion // - Corruption scoring system to identify suspicious addresses // - Known contract registry for instant validation of major protocols package validation import ( "fmt" "regexp" "strings" "github.com/ethereum/go-ethereum/common" ) // AddressValidator provides comprehensive Ethereum address validation with advanced // corruption detection and contract type classification. This validator is designed // to prevent the costly errors that can occur when malformed addresses are used // in contract calls, particularly in high-frequency MEV operations. // // The validator implements multiple validation layers: // 1. Basic format validation (hex format, length, prefix) // 2. Corruption pattern detection (repetitive patterns, suspicious zeros) // 3. Contract type classification to prevent misuse // 4. Known contract registry for instant validation type AddressValidator struct { // Known corrupted patterns that should be immediately rejected // These patterns are derived from observed corruption incidents in production corruptedPatterns []string // Precompiled regex equivalents for efficient matching corruptedPatternRegex []*regexp.Regexp // Registry of known contract addresses and their verified types // This enables instant validation without requiring RPC calls knownContracts map[common.Address]ContractType // Enhanced known contract registry for detailed validation knownContractsRegistry *KnownContractRegistry } // ContractType represents the classification of an Ethereum contract. // This classification is critical for preventing the ERC-20/pool confusion // that was causing massive log spam and transaction failures in production. type ContractType int const ( // ContractTypeUnknown indicates the contract type could not be determined // This is the default for addresses not in the known contracts registry ContractTypeUnknown ContractType = iota // ContractTypeERC20Token indicates a standard ERC-20 token contract // These contracts should never be used in pool-specific operations ContractTypeERC20Token // ContractTypeUniswapV2Pool indicates a Uniswap V2 compatible pool contract // These support token0(), token1(), and getReserves() functions ContractTypeUniswapV2Pool // ContractTypeUniswapV3Pool indicates a Uniswap V3 compatible pool contract // These support token0(), token1(), and slot0() functions ContractTypeUniswapV3Pool // ContractTypeRouter indicates a DEX router contract // These should not be used directly as token or pool addresses ContractTypeRouter // ContractTypeFactory indicates a pool factory contract // These create pools but are not pools themselves ContractTypeFactory ) // String returns a human-readable representation of the contract type. func (ct ContractType) String() string { switch ct { case ContractTypeERC20Token: return "ERC20_TOKEN" case ContractTypeUniswapV2Pool: return "UNISWAP_V2_POOL" case ContractTypeUniswapV3Pool: return "UNISWAP_V3_POOL" case ContractTypeRouter: return "ROUTER" case ContractTypeFactory: return "FACTORY" case ContractTypeUnknown: fallthrough default: return "UNKNOWN" } } // ValidationError represents a structured validation failure for an address. type ValidationError struct { Code string Message string Context map[string]interface{} } // Error implements the error interface. func (e *ValidationError) Error() string { if e == nil { return "" } if e.Code != "" { return fmt.Sprintf("%s: %s", e.Code, e.Message) } return e.Message } // AddressValidationResult contains comprehensive validation results and metadata // for an Ethereum address. This structure provides detailed information about // the validation process, including any issues found and confidence metrics. type AddressValidationResult struct { // IsValid indicates whether the address passed all validation checks // A false value means the address should not be used in transactions IsValid bool // Address contains the parsed Ethereum address (only valid if IsValid is true) Address common.Address // ContractType indicates the classification of the contract (if known) ContractType ContractType // ErrorMessages contains detailed descriptions of validation failures // These are critical issues that prevent the address from being used ErrorMessages []string // WarningMessages contains non-critical issues or concerns // These don't prevent usage but should be logged for monitoring WarningMessages []string // CorruptionScore is a 0-100 metric indicating likelihood of corruption // Higher scores indicate more suspicious patterns (0=clean, 100=definitely corrupted) // Addresses with scores >30 are typically rejected in critical operations CorruptionScore int } // NewAddressValidator creates a new address validator func NewAddressValidator() *AddressValidator { patterns := []string{ // Patterns indicating clear corruption "0000000000000000000000000000000000000000", // All zeros "000000000000000000000000000000000000000", // Missing one zero "00000000000000000000000000000000000000000", // Extra zero // Patterns with trailing zeros indicating truncation "00000000000000000000000000$", "000000000000000000000000$", "0000000000000000000000$", // Patterns with leading non-hex after 0x "^0x[^0-9a-fA-F]", } compiled := make([]*regexp.Regexp, 0, len(patterns)) for _, pattern := range patterns { compiled = append(compiled, regexp.MustCompile(pattern)) } return &AddressValidator{ corruptedPatterns: patterns, corruptedPatternRegex: compiled, knownContracts: make(map[common.Address]ContractType), knownContractsRegistry: NewKnownContractRegistry(), } } // InitializeKnownContracts populates the validator with known Arbitrum contracts func (av *AddressValidator) InitializeKnownContracts() { // Known Arbitrum tokens av.knownContracts[common.HexToAddress("0xFF970A61A04b1cA14834A43f5dE4533eBDDB5CC8")] = ContractTypeERC20Token // USDC av.knownContracts[common.HexToAddress("0x82aF49447D8a07e3bd95BD0d56f35241523fBab1")] = ContractTypeERC20Token // WETH av.knownContracts[common.HexToAddress("0xFd086bC7CD5C481DCC9C85ebE478A1C0b69FCbb9")] = ContractTypeERC20Token // USDT av.knownContracts[common.HexToAddress("0x2f2a2543B76A4166549F7aaB2e75Bef0aefC5B0f")] = ContractTypeERC20Token // WBTC av.knownContracts[common.HexToAddress("0x912CE59144191C1204E64559FE8253a0e49E6548")] = ContractTypeERC20Token // ARB // Known Arbitrum routers av.knownContracts[common.HexToAddress("0xE592427A0AEce92De3Edee1F18E0157C05861564")] = ContractTypeRouter // Uniswap V3 Router av.knownContracts[common.HexToAddress("0x4752ba5dbc23f44d87826276bf6fd6b1c372ad24")] = ContractTypeRouter // Uniswap V2 Router02 av.knownContracts[common.HexToAddress("0xA51afAFe0263b40EdaEf0Df8781eA9aa03E381a3")] = ContractTypeRouter // Universal Router av.knownContracts[common.HexToAddress("0xC36442b4a4522E871399CD717aBDD847Ab11FE88")] = ContractTypeRouter // Position Manager // Known Arbitrum factories av.knownContracts[common.HexToAddress("0x1F98431c8aD98523631AE4a59f267346ea31F984")] = ContractTypeFactory // Uniswap V3 Factory av.knownContracts[common.HexToAddress("0xf1D7CC64Fb4452F05c498126312eBE29f30Fbcf9")] = ContractTypeFactory // Uniswap V2 Factory // Known high-volume pools av.knownContracts[common.HexToAddress("0xC6962004f452bE9203591991D15f6b388e09E8D0")] = ContractTypeUniswapV3Pool // USDC/WETH 0.05% av.knownContracts[common.HexToAddress("0x17c14D2c404D167802b16C450d3c99F88F2c4F4d")] = ContractTypeUniswapV3Pool // USDC/WETH 0.3% av.knownContracts[common.HexToAddress("0x2f5e87C9312fa29aed5c179E456625D79015299c")] = ContractTypeUniswapV3Pool // WBTC/WETH 0.05% } // ValidateAddress performs comprehensive validation of an Ethereum address string. // This is the primary validation function that applies all validation layers // in sequence to determine if an address is safe to use in transactions. // // The validation process includes: // 1. Basic format validation (hex format, 0x prefix) // 2. Length validation (must be exactly 42 characters) // 3. Corruption pattern detection // 4. Contract type classification // 5. Corruption scoring // // Parameters: // - addressStr: The address string to validate (should include 0x prefix) // // Returns: // - *AddressValidationResult: Comprehensive validation results func (av *AddressValidator) ValidateAddress(addressStr string) *AddressValidationResult { // Initialize the result structure with safe defaults result := &AddressValidationResult{ IsValid: false, // Default to invalid until all checks pass ErrorMessages: make([]string, 0), WarningMessages: make([]string, 0), CorruptionScore: 0, // Start with zero corruption score } // Basic format validation if !av.isValidHexFormat(addressStr) { result.ErrorMessages = append(result.ErrorMessages, "invalid hex format") result.CorruptionScore += 50 return result } // Length validation if !av.isValidLength(addressStr) { result.ErrorMessages = append(result.ErrorMessages, "invalid address length") result.CorruptionScore += 50 return result } // Corruption pattern detection corruptionDetected, patterns := av.detectCorruption(addressStr) if corruptionDetected { result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("corruption detected: %v", patterns)) result.CorruptionScore += 70 return result } // Convert to common.Address for further validation if !common.IsHexAddress(addressStr) { result.ErrorMessages = append(result.ErrorMessages, "not a valid Ethereum address") result.CorruptionScore += 30 return result } address := common.HexToAddress(addressStr) result.Address = address // Check for zero address if address == (common.Address{}) { result.ErrorMessages = append(result.ErrorMessages, "zero address") result.CorruptionScore += 40 return result } // Check known contract types if contractType, exists := av.knownContracts[address]; exists { result.ContractType = contractType } else { result.ContractType = ContractTypeUnknown result.WarningMessages = append(result.WarningMessages, "unknown contract type") } // Additional pattern-based corruption detection result.CorruptionScore += av.calculateCorruptionScore(addressStr) // Mark as valid if corruption score is low enough if result.CorruptionScore < 30 { result.IsValid = true } return result } // isValidHexFormat checks if the string is a valid hex format func (av *AddressValidator) isValidHexFormat(addressStr string) bool { if len(addressStr) < 3 { return false } if !strings.HasPrefix(addressStr, "0x") && !strings.HasPrefix(addressStr, "0X") { return false } // Check if all characters after 0x are valid hex hexPart := addressStr[2:] if len(hexPart) == 0 { return false } for i := 0; i < len(hexPart); i++ { switch { case hexPart[i] >= '0' && hexPart[i] <= '9': case hexPart[i] >= 'a' && hexPart[i] <= 'f': case hexPart[i] >= 'A' && hexPart[i] <= 'F': default: return false } } return true } // isValidLength checks if the address has the correct length func (av *AddressValidator) isValidLength(addressStr string) bool { // Ethereum addresses should be 42 characters (0x + 40 hex chars) return len(addressStr) == 42 } // detectCorruption checks for known corruption patterns func (av *AddressValidator) detectCorruption(addressStr string) (bool, []string) { var detectedPatterns []string for idx, re := range av.corruptedPatternRegex { if re.MatchString(addressStr) { detectedPatterns = append(detectedPatterns, av.corruptedPatterns[idx]) } } return len(detectedPatterns) > 0, detectedPatterns } // calculateCorruptionScore calculates a 0-100 score indicating the likelihood // that an address has been corrupted or malformed. This scoring system is based // on patterns observed in production corruption incidents. // // Scoring factors: // - Trailing zeros (indicates truncation): +1 per excess zero // - Leading zeros in middle (unusual pattern): +0.5 per zero // - Repetitive patterns (indicates generation errors): +10 // - Other suspicious patterns: variable points // // Parameters: // - addressStr: The address string to analyze (with 0x prefix) // // Returns: // - int: Corruption score (0=clean, 100=definitely corrupted) func (av *AddressValidator) calculateCorruptionScore(addressStr string) int { score := 0 // Extract the hex part (remove 0x prefix) hexPart := addressStr[2:] // Count trailing zeros (sign of truncation) trailingZeros := 0 for i := len(hexPart) - 1; i >= 0; i-- { if hexPart[i] == '0' { trailingZeros++ } else { break } } // More than 10 trailing zeros is suspicious if trailingZeros > 10 { score += trailingZeros } // Count leading zeros after first non-zero leadingZeros := 0 foundNonZero := false for _, char := range hexPart { if char != '0' { foundNonZero = true } else if foundNonZero { leadingZeros++ } } // Large blocks of zeros in the middle are suspicious if leadingZeros > 8 { score += leadingZeros / 2 } // Check for repetitive patterns if av.hasRepetitivePattern(hexPart) { score += 10 } // Overall zero density check for leading-zero patterns (common corruption) zeroCount := strings.Count(hexPart, "0") if strings.HasPrefix(hexPart, "0000") && float64(zeroCount)/float64(len(hexPart)) > 0.7 { score += 30 } // Leading zero prefix (common corruption pattern from truncated data) if strings.HasPrefix(hexPart, "000000") { score += 20 } return score } // hasRepetitivePattern detects repetitive patterns in hex strings that indicate // corruption or artificial generation. These patterns are rarely seen in legitimate // Ethereum addresses and often indicate data corruption or malicious generation. // // Detected patterns include: // - Long sequences of the same character (000000000000, ffffffffffff) // - Addresses where all characters are identical // - Other suspicious repetitive patterns // // Parameters: // - hexStr: The hex string to analyze (without 0x prefix) // // Returns: // - bool: true if repetitive patterns are detected func (av *AddressValidator) hasRepetitivePattern(hexStr string) bool { // Define patterns that indicate corruption or artificial generation // These patterns are extremely rare in legitimate Ethereum addresses patterns := []string{"000000000000", "ffffffffffff", "aaaaaaaaaaaa", "bbbbbbbbbbbb", "1111111111", "2222222222", "3333333333", "4444444444", "5555555555", "6666666666", "7777777777", "8888888888", "9999999999"} for _, pattern := range patterns { if strings.Contains(hexStr, pattern) { return true } } // Additional check for address with same character repeated throughout if len(hexStr) >= 10 { firstChar := hexStr[0] allSame := true for i := 1; i < len(hexStr); i++ { if hexStr[i] != firstChar { allSame = false break } } if allSame { return true } } return false } // IsValidPoolAddress checks if an address is valid for pool operations func (av *AddressValidator) IsValidPoolAddress(address common.Address) bool { result := av.ValidateAddress(address.Hex()) if !result.IsValid { return false } // Must not be a token, router, or factory for pool operations switch result.ContractType { case ContractTypeERC20Token, ContractTypeRouter, ContractTypeFactory: return false case ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool: return true case ContractTypeUnknown: // Allow unknown contracts but warn return result.CorruptionScore < 20 } return false } // IsValidTokenAddress checks if an address is valid for token operations func (av *AddressValidator) IsValidTokenAddress(address common.Address) bool { result := av.ValidateAddress(address.Hex()) if !result.IsValid { return false } // Prefer known tokens, but allow unknown contracts with low corruption scores switch result.ContractType { case ContractTypeERC20Token: return true case ContractTypeRouter, ContractTypeFactory, ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool: return false case ContractTypeUnknown: return result.CorruptionScore < 15 } return false } // GetContractType returns the contract type for a given address func (av *AddressValidator) GetContractType(address common.Address) ContractType { if contractType, exists := av.knownContracts[address]; exists { return contractType } return ContractTypeUnknown } // ValidateContractTypeConsistency validates that addresses don't have conflicting contract types func (av *AddressValidator) ValidateContractTypeConsistency(tokenAddresses []common.Address, poolAddresses []common.Address) error { // CRITICAL: Ensure no address appears in both token and pool lists for _, token := range tokenAddresses { for _, pool := range poolAddresses { if token == pool { return fmt.Errorf("address %s cannot be both a token and a pool", token.Hex()) } } } // CRITICAL: Validate each token address is actually a token for _, token := range tokenAddresses { if !av.IsValidTokenAddress(token) { return fmt.Errorf("address %s is not a valid token address", token.Hex()) } // Additional check: ensure it's not marked as a pool in known contracts if contractType := av.GetContractType(token); contractType == ContractTypeUniswapV2Pool || contractType == ContractTypeUniswapV3Pool { return fmt.Errorf("address %s is marked as a pool but being used as a token", token.Hex()) } } // CRITICAL: Validate each pool address is actually a pool for _, pool := range poolAddresses { if !av.IsValidPoolAddress(pool) { return fmt.Errorf("address %s is not a valid pool address", pool.Hex()) } // Additional check: ensure it's not marked as a token in known contracts if contractType := av.GetContractType(pool); contractType == ContractTypeERC20Token { return fmt.Errorf("address %s is marked as a token but being used as a pool", pool.Hex()) } } return nil } // PreventERC20PoolConfusion is a critical safety function that prevents the costly // error where ERC-20 token contracts are incorrectly used as pool contracts. // This was the root cause of the 535K+ log spam incident in production. // // The function performs a type safety check to ensure that: // - ERC-20 tokens are not used in pool operations // - Pool contracts are not used in token operations // - Unknown contracts meet safety thresholds // // This is a mandatory check for all contract address usage in critical operations. // // Parameters: // - address: The contract address to validate // - expectedType: The contract type expected by the calling code // // Returns: // - error: nil if the address is safe to use, error describing the issue otherwise func (av *AddressValidator) PreventERC20PoolConfusion(address common.Address, expectedType ContractType) error { // Check if we have prior knowledge about this contract knownType := av.GetContractType(address) // If we have knowledge about this contract, use it if knownType != ContractTypeUnknown { if knownType != expectedType { return fmt.Errorf("contract type mismatch for %s: expected %s but known as %s", address.Hex(), contractTypeToString(expectedType), contractTypeToString(knownType)) } return nil } // For unknown contracts, perform basic validation result := av.ValidateAddress(address.Hex()) if !result.IsValid { return fmt.Errorf("invalid address %s: %v", address.Hex(), result.ErrorMessages) } // High corruption score indicates potential misclassification if result.CorruptionScore > 25 { return fmt.Errorf("high corruption score (%d) for address %s, refusing to use as %s", result.CorruptionScore, address.Hex(), contractTypeToString(expectedType)) } return nil } // contractTypeToString converts ContractType to string representation func contractTypeToString(ct ContractType) string { switch ct { case ContractTypeERC20Token: return "ERC-20 Token" case ContractTypeUniswapV2Pool: return "Uniswap V2 Pool" case ContractTypeUniswapV3Pool: return "Uniswap V3 Pool" case ContractTypeRouter: return "Router" case ContractTypeFactory: return "Factory" default: return "Unknown" } } // IsKnownContract checks if we have specific knowledge about a contract func (av *AddressValidator) IsKnownContract(address common.Address) bool { _, exists := av.knownContracts[address] return exists } // GetValidationStats returns statistics about validation results func (av *AddressValidator) GetValidationStats() map[string]interface{} { return map[string]interface{}{ "known_contracts": len(av.knownContracts), "corruption_patterns": len(av.corruptedPatterns), } } // SanitizeAddress attempts to clean up a potentially corrupted address func (av *AddressValidator) SanitizeAddress(addressStr string) (string, error) { // Remove common prefixes that might be corrupted cleaned := strings.TrimSpace(addressStr) // Ensure 0x prefix if !strings.HasPrefix(cleaned, "0x") && !strings.HasPrefix(cleaned, "0X") { if len(cleaned) == 40 && av.isValidHexFormat("0x"+cleaned) { cleaned = "0x" + cleaned } else { return "", fmt.Errorf("cannot sanitize address without 0x prefix") } } // Normalize to lowercase cleaned = strings.ToLower(cleaned) // Validate the sanitized address result := av.ValidateAddress(cleaned) if !result.IsValid { return "", fmt.Errorf("sanitized address is still invalid: %v", result.ErrorMessages) } return cleaned, nil } // ValidatePoolOperation validates if a pool-specific operation should be allowed on an address // This prevents the critical error where ERC-20 tokens are treated as pools func (av *AddressValidator) ValidatePoolOperation(address common.Address, operation string) error { // Check if this is a known ERC-20 token if av.knownContractsRegistry != nil { if err := av.knownContractsRegistry.ValidatePoolCall(address, operation); err != nil { return err } } // Additional validation for suspicious addresses addressStr := address.Hex() // Check for zero address (common corruption) if address == (common.Address{}) { return fmt.Errorf("pool operation '%s' attempted on zero address (likely corruption)", operation) } // Check for addresses with excessive zeros (likely corruption) if strings.Count(addressStr, "0") > 30 { return fmt.Errorf("pool operation '%s' attempted on suspicious address %s (excessive zeros)", operation, addressStr) } // Fallback to general validation result := av.ValidateAddress(addressStr) if !result.IsValid { return fmt.Errorf("pool operation '%s' attempted on invalid address %s: %v", operation, addressStr, result.ErrorMessages) } if result.CorruptionScore > 25 { return fmt.Errorf("pool operation '%s' blocked due to corruption score %d on address %s", operation, result.CorruptionScore, addressStr) } return nil } // GetDetailedAddressAnalysis provides comprehensive analysis of an address including corruption patterns func (av *AddressValidator) GetDetailedAddressAnalysis(address common.Address) map[string]interface{} { analysis := make(map[string]interface{}) addressStr := address.Hex() // Basic validation result := av.ValidateAddress(addressStr) analysis["is_valid"] = result.IsValid analysis["corruption_score"] = result.CorruptionScore analysis["contract_type"] = result.ContractType.String() analysis["error_messages"] = result.ErrorMessages // Known contract information if av.knownContractsRegistry != nil { contractType, name := av.knownContractsRegistry.GetContractType(address) analysis["known_contract_type"] = contractType.String() analysis["known_contract_name"] = name isERC20, tokenName := av.knownContractsRegistry.IsKnownERC20(address) analysis["is_known_erc20"] = isERC20 if isERC20 { analysis["token_name"] = tokenName } // Corruption pattern analysis corruptionPattern := av.knownContractsRegistry.GetCorruptionPattern(address) analysis["corruption_pattern"] = corruptionPattern } // Address characteristics analysis["is_zero_address"] = address == (common.Address{}) analysis["zero_count"] = strings.Count(addressStr, "0") analysis["hex_length"] = len(addressStr) return analysis }