feat: create v2-prep branch with comprehensive planning

Restructured project for V2 refactor:

**Structure Changes:**
- Moved all V1 code to orig/ folder (preserved with git mv)
- Created docs/planning/ directory
- Added orig/README_V1.md explaining V1 preservation

**Planning Documents:**
- 00_V2_MASTER_PLAN.md: Complete architecture overview
  - Executive summary of critical V1 issues
  - High-level component architecture diagrams
  - 5-phase implementation roadmap
  - Success metrics and risk mitigation

- 07_TASK_BREAKDOWN.md: Atomic task breakdown
  - 99+ hours of detailed tasks
  - Every task < 2 hours (atomic)
  - Clear dependencies and success criteria
  - Organized by implementation phase

**V2 Key Improvements:**
- Per-exchange parsers (factory pattern)
- Multi-layer strict validation
- Multi-index pool cache
- Background validation pipeline
- Comprehensive observability

**Critical Issues Addressed:**
- Zero address tokens (strict validation + cache enrichment)
- Parsing accuracy (protocol-specific parsers)
- No audit trail (background validation channel)
- Inefficient lookups (multi-index cache)
- Stats disconnection (event-driven metrics)

Next Steps:
1. Review planning documents
2. Begin Phase 1: Foundation (P1-001 through P1-010)
3. Implement parsers in Phase 2
4. Build cache system in Phase 3
5. Add validation pipeline in Phase 4
6. Migrate and test in Phase 5

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Administrator
2025-11-10 10:14:26 +01:00
parent 1773daffe7
commit 803de231ba
411 changed files with 20390 additions and 8680 deletions

View File

@@ -0,0 +1,704 @@
// Package validation provides comprehensive Ethereum address validation and corruption detection.
// This package is critical for the MEV bot's security and reliability, preventing costly errors
// from malformed or corrupted addresses that could cause transaction failures or security issues.
//
// Key features:
// - Multi-layer address validation (format, length, corruption detection)
// - Contract type classification and prevention of ERC-20/pool confusion
// - Corruption scoring system to identify suspicious addresses
// - Known contract registry for instant validation of major protocols
package validation
import (
"fmt"
"regexp"
"strings"
"github.com/ethereum/go-ethereum/common"
)
// AddressValidator provides comprehensive Ethereum address validation with advanced
// corruption detection and contract type classification. This validator is designed
// to prevent the costly errors that can occur when malformed addresses are used
// in contract calls, particularly in high-frequency MEV operations.
//
// The validator implements multiple validation layers:
// 1. Basic format validation (hex format, length, prefix)
// 2. Corruption pattern detection (repetitive patterns, suspicious zeros)
// 3. Contract type classification to prevent misuse
// 4. Known contract registry for instant validation
type AddressValidator struct {
// Known corrupted patterns that should be immediately rejected
// These patterns are derived from observed corruption incidents in production
corruptedPatterns []string
// Precompiled regex equivalents for efficient matching
corruptedPatternRegex []*regexp.Regexp
// Registry of known contract addresses and their verified types
// This enables instant validation without requiring RPC calls
knownContracts map[common.Address]ContractType
// Enhanced known contract registry for detailed validation
knownContractsRegistry *KnownContractRegistry
}
// ContractType represents the classification of an Ethereum contract.
// This classification is critical for preventing the ERC-20/pool confusion
// that was causing massive log spam and transaction failures in production.
type ContractType int
const (
// ContractTypeUnknown indicates the contract type could not be determined
// This is the default for addresses not in the known contracts registry
ContractTypeUnknown ContractType = iota
// ContractTypeERC20Token indicates a standard ERC-20 token contract
// These contracts should never be used in pool-specific operations
ContractTypeERC20Token
// ContractTypeUniswapV2Pool indicates a Uniswap V2 compatible pool contract
// These support token0(), token1(), and getReserves() functions
ContractTypeUniswapV2Pool
// ContractTypeUniswapV3Pool indicates a Uniswap V3 compatible pool contract
// These support token0(), token1(), and slot0() functions
ContractTypeUniswapV3Pool
// ContractTypeRouter indicates a DEX router contract
// These should not be used directly as token or pool addresses
ContractTypeRouter
// ContractTypeFactory indicates a pool factory contract
// These create pools but are not pools themselves
ContractTypeFactory
)
// String returns a human-readable representation of the contract type.
func (ct ContractType) String() string {
switch ct {
case ContractTypeERC20Token:
return "ERC20_TOKEN"
case ContractTypeUniswapV2Pool:
return "UNISWAP_V2_POOL"
case ContractTypeUniswapV3Pool:
return "UNISWAP_V3_POOL"
case ContractTypeRouter:
return "ROUTER"
case ContractTypeFactory:
return "FACTORY"
case ContractTypeUnknown:
fallthrough
default:
return "UNKNOWN"
}
}
// ValidationError represents a structured validation failure for an address.
type ValidationError struct {
Code string
Message string
Context map[string]interface{}
}
// Error implements the error interface.
func (e *ValidationError) Error() string {
if e == nil {
return ""
}
if e.Code != "" {
return fmt.Sprintf("%s: %s", e.Code, e.Message)
}
return e.Message
}
// AddressValidationResult contains comprehensive validation results and metadata
// for an Ethereum address. This structure provides detailed information about
// the validation process, including any issues found and confidence metrics.
type AddressValidationResult struct {
// IsValid indicates whether the address passed all validation checks
// A false value means the address should not be used in transactions
IsValid bool
// Address contains the parsed Ethereum address (only valid if IsValid is true)
Address common.Address
// ContractType indicates the classification of the contract (if known)
ContractType ContractType
// ErrorMessages contains detailed descriptions of validation failures
// These are critical issues that prevent the address from being used
ErrorMessages []string
// WarningMessages contains non-critical issues or concerns
// These don't prevent usage but should be logged for monitoring
WarningMessages []string
// CorruptionScore is a 0-100 metric indicating likelihood of corruption
// Higher scores indicate more suspicious patterns (0=clean, 100=definitely corrupted)
// Addresses with scores >30 are typically rejected in critical operations
CorruptionScore int
}
// NewAddressValidator creates a new address validator
func NewAddressValidator() *AddressValidator {
patterns := []string{
// Patterns indicating clear corruption
"0000000000000000000000000000000000000000", // All zeros
"000000000000000000000000000000000000000", // Missing one zero
"00000000000000000000000000000000000000000", // Extra zero
// Patterns with trailing zeros indicating truncation
"00000000000000000000000000$",
"000000000000000000000000$",
"0000000000000000000000$",
// Patterns with leading non-hex after 0x
"^0x[^0-9a-fA-F]",
}
compiled := make([]*regexp.Regexp, 0, len(patterns))
for _, pattern := range patterns {
compiled = append(compiled, regexp.MustCompile(pattern))
}
return &AddressValidator{
corruptedPatterns: patterns,
corruptedPatternRegex: compiled,
knownContracts: make(map[common.Address]ContractType),
knownContractsRegistry: NewKnownContractRegistry(),
}
}
// InitializeKnownContracts populates the validator with known Arbitrum contracts
func (av *AddressValidator) InitializeKnownContracts() {
// Known Arbitrum tokens
av.knownContracts[common.HexToAddress("0xFF970A61A04b1cA14834A43f5dE4533eBDDB5CC8")] = ContractTypeERC20Token // USDC
av.knownContracts[common.HexToAddress("0x82aF49447D8a07e3bd95BD0d56f35241523fBab1")] = ContractTypeERC20Token // WETH
av.knownContracts[common.HexToAddress("0xFd086bC7CD5C481DCC9C85ebE478A1C0b69FCbb9")] = ContractTypeERC20Token // USDT
av.knownContracts[common.HexToAddress("0x2f2a2543B76A4166549F7aaB2e75Bef0aefC5B0f")] = ContractTypeERC20Token // WBTC
av.knownContracts[common.HexToAddress("0x912CE59144191C1204E64559FE8253a0e49E6548")] = ContractTypeERC20Token // ARB
// Known Arbitrum routers
av.knownContracts[common.HexToAddress("0xE592427A0AEce92De3Edee1F18E0157C05861564")] = ContractTypeRouter // Uniswap V3 Router
av.knownContracts[common.HexToAddress("0x4752ba5dbc23f44d87826276bf6fd6b1c372ad24")] = ContractTypeRouter // Uniswap V2 Router02
av.knownContracts[common.HexToAddress("0xA51afAFe0263b40EdaEf0Df8781eA9aa03E381a3")] = ContractTypeRouter // Universal Router
av.knownContracts[common.HexToAddress("0xC36442b4a4522E871399CD717aBDD847Ab11FE88")] = ContractTypeRouter // Position Manager
// Known Arbitrum factories
av.knownContracts[common.HexToAddress("0x1F98431c8aD98523631AE4a59f267346ea31F984")] = ContractTypeFactory // Uniswap V3 Factory
av.knownContracts[common.HexToAddress("0xf1D7CC64Fb4452F05c498126312eBE29f30Fbcf9")] = ContractTypeFactory // Uniswap V2 Factory
// Known high-volume pools
av.knownContracts[common.HexToAddress("0xC6962004f452bE9203591991D15f6b388e09E8D0")] = ContractTypeUniswapV3Pool // USDC/WETH 0.05%
av.knownContracts[common.HexToAddress("0x17c14D2c404D167802b16C450d3c99F88F2c4F4d")] = ContractTypeUniswapV3Pool // USDC/WETH 0.3%
av.knownContracts[common.HexToAddress("0x2f5e87C9312fa29aed5c179E456625D79015299c")] = ContractTypeUniswapV3Pool // WBTC/WETH 0.05%
}
// ValidateAddress performs comprehensive validation of an Ethereum address string.
// This is the primary validation function that applies all validation layers
// in sequence to determine if an address is safe to use in transactions.
//
// The validation process includes:
// 1. Basic format validation (hex format, 0x prefix)
// 2. Length validation (must be exactly 42 characters)
// 3. Corruption pattern detection
// 4. Contract type classification
// 5. Corruption scoring
//
// Parameters:
// - addressStr: The address string to validate (should include 0x prefix)
//
// Returns:
// - *AddressValidationResult: Comprehensive validation results
func (av *AddressValidator) ValidateAddress(addressStr string) *AddressValidationResult {
// Initialize the result structure with safe defaults
result := &AddressValidationResult{
IsValid: false, // Default to invalid until all checks pass
ErrorMessages: make([]string, 0),
WarningMessages: make([]string, 0),
CorruptionScore: 0, // Start with zero corruption score
}
// Basic format validation
if !av.isValidHexFormat(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "invalid hex format")
result.CorruptionScore += 50
return result
}
// Length validation
if !av.isValidLength(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "invalid address length")
result.CorruptionScore += 50
return result
}
// Corruption pattern detection
corruptionDetected, patterns := av.detectCorruption(addressStr)
if corruptionDetected {
result.ErrorMessages = append(result.ErrorMessages, fmt.Sprintf("corruption detected: %v", patterns))
result.CorruptionScore += 70
return result
}
// Convert to common.Address for further validation
if !common.IsHexAddress(addressStr) {
result.ErrorMessages = append(result.ErrorMessages, "not a valid Ethereum address")
result.CorruptionScore += 30
return result
}
address := common.HexToAddress(addressStr)
result.Address = address
// Check for zero address
if address == (common.Address{}) {
result.ErrorMessages = append(result.ErrorMessages, "zero address")
result.CorruptionScore += 40
return result
}
// Check known contract types
if contractType, exists := av.knownContracts[address]; exists {
result.ContractType = contractType
} else {
result.ContractType = ContractTypeUnknown
result.WarningMessages = append(result.WarningMessages, "unknown contract type")
}
// Additional pattern-based corruption detection
result.CorruptionScore += av.calculateCorruptionScore(addressStr)
// Mark as valid if corruption score is low enough
if result.CorruptionScore < 30 {
result.IsValid = true
}
return result
}
// isValidHexFormat checks if the string is a valid hex format
func (av *AddressValidator) isValidHexFormat(addressStr string) bool {
if len(addressStr) < 3 {
return false
}
if !strings.HasPrefix(addressStr, "0x") && !strings.HasPrefix(addressStr, "0X") {
return false
}
// Check if all characters after 0x are valid hex
hexPart := addressStr[2:]
if len(hexPart) == 0 {
return false
}
for i := 0; i < len(hexPart); i++ {
switch {
case hexPart[i] >= '0' && hexPart[i] <= '9':
case hexPart[i] >= 'a' && hexPart[i] <= 'f':
case hexPart[i] >= 'A' && hexPart[i] <= 'F':
default:
return false
}
}
return true
}
// isValidLength checks if the address has the correct length
func (av *AddressValidator) isValidLength(addressStr string) bool {
// Ethereum addresses should be 42 characters (0x + 40 hex chars)
return len(addressStr) == 42
}
// detectCorruption checks for known corruption patterns
func (av *AddressValidator) detectCorruption(addressStr string) (bool, []string) {
var detectedPatterns []string
for idx, re := range av.corruptedPatternRegex {
if re.MatchString(addressStr) {
detectedPatterns = append(detectedPatterns, av.corruptedPatterns[idx])
}
}
return len(detectedPatterns) > 0, detectedPatterns
}
// calculateCorruptionScore calculates a 0-100 score indicating the likelihood
// that an address has been corrupted or malformed. This scoring system is based
// on patterns observed in production corruption incidents.
//
// Scoring factors:
// - Trailing zeros (indicates truncation): +1 per excess zero
// - Leading zeros in middle (unusual pattern): +0.5 per zero
// - Repetitive patterns (indicates generation errors): +10
// - Other suspicious patterns: variable points
//
// Parameters:
// - addressStr: The address string to analyze (with 0x prefix)
//
// Returns:
// - int: Corruption score (0=clean, 100=definitely corrupted)
func (av *AddressValidator) calculateCorruptionScore(addressStr string) int {
score := 0
// Extract the hex part (remove 0x prefix)
hexPart := addressStr[2:]
// Count trailing zeros (sign of truncation)
trailingZeros := 0
for i := len(hexPart) - 1; i >= 0; i-- {
if hexPart[i] == '0' {
trailingZeros++
} else {
break
}
}
// More than 10 trailing zeros is suspicious
if trailingZeros > 10 {
score += trailingZeros
}
// Count leading zeros after first non-zero
leadingZeros := 0
foundNonZero := false
for _, char := range hexPart {
if char != '0' {
foundNonZero = true
} else if foundNonZero {
leadingZeros++
}
}
// Large blocks of zeros in the middle are suspicious
if leadingZeros > 8 {
score += leadingZeros / 2
}
// Check for repetitive patterns
if av.hasRepetitivePattern(hexPart) {
score += 10
}
// Overall zero density check for leading-zero patterns (common corruption)
zeroCount := strings.Count(hexPart, "0")
if strings.HasPrefix(hexPart, "0000") && float64(zeroCount)/float64(len(hexPart)) > 0.7 {
score += 30
}
// Leading zero prefix (common corruption pattern from truncated data)
if strings.HasPrefix(hexPart, "000000") {
score += 20
}
return score
}
// hasRepetitivePattern detects repetitive patterns in hex strings that indicate
// corruption or artificial generation. These patterns are rarely seen in legitimate
// Ethereum addresses and often indicate data corruption or malicious generation.
//
// Detected patterns include:
// - Long sequences of the same character (000000000000, ffffffffffff)
// - Addresses where all characters are identical
// - Other suspicious repetitive patterns
//
// Parameters:
// - hexStr: The hex string to analyze (without 0x prefix)
//
// Returns:
// - bool: true if repetitive patterns are detected
func (av *AddressValidator) hasRepetitivePattern(hexStr string) bool {
// Define patterns that indicate corruption or artificial generation
// These patterns are extremely rare in legitimate Ethereum addresses
patterns := []string{"000000000000", "ffffffffffff", "aaaaaaaaaaaa", "bbbbbbbbbbbb",
"1111111111", "2222222222", "3333333333", "4444444444", "5555555555",
"6666666666", "7777777777", "8888888888", "9999999999"}
for _, pattern := range patterns {
if strings.Contains(hexStr, pattern) {
return true
}
}
// Additional check for address with same character repeated throughout
if len(hexStr) >= 10 {
firstChar := hexStr[0]
allSame := true
for i := 1; i < len(hexStr); i++ {
if hexStr[i] != firstChar {
allSame = false
break
}
}
if allSame {
return true
}
}
return false
}
// IsValidPoolAddress checks if an address is valid for pool operations
func (av *AddressValidator) IsValidPoolAddress(address common.Address) bool {
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return false
}
// Must not be a token, router, or factory for pool operations
switch result.ContractType {
case ContractTypeERC20Token, ContractTypeRouter, ContractTypeFactory:
return false
case ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool:
return true
case ContractTypeUnknown:
// Allow unknown contracts but warn
return result.CorruptionScore < 20
}
return false
}
// IsValidTokenAddress checks if an address is valid for token operations
func (av *AddressValidator) IsValidTokenAddress(address common.Address) bool {
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return false
}
// Prefer known tokens, but allow unknown contracts with low corruption scores
switch result.ContractType {
case ContractTypeERC20Token:
return true
case ContractTypeRouter, ContractTypeFactory, ContractTypeUniswapV2Pool, ContractTypeUniswapV3Pool:
return false
case ContractTypeUnknown:
return result.CorruptionScore < 15
}
return false
}
// GetContractType returns the contract type for a given address
func (av *AddressValidator) GetContractType(address common.Address) ContractType {
if contractType, exists := av.knownContracts[address]; exists {
return contractType
}
return ContractTypeUnknown
}
// ValidateContractTypeConsistency validates that addresses don't have conflicting contract types
func (av *AddressValidator) ValidateContractTypeConsistency(tokenAddresses []common.Address, poolAddresses []common.Address) error {
// CRITICAL: Ensure no address appears in both token and pool lists
for _, token := range tokenAddresses {
for _, pool := range poolAddresses {
if token == pool {
return fmt.Errorf("address %s cannot be both a token and a pool", token.Hex())
}
}
}
// CRITICAL: Validate each token address is actually a token
for _, token := range tokenAddresses {
if !av.IsValidTokenAddress(token) {
return fmt.Errorf("address %s is not a valid token address", token.Hex())
}
// Additional check: ensure it's not marked as a pool in known contracts
if contractType := av.GetContractType(token); contractType == ContractTypeUniswapV2Pool || contractType == ContractTypeUniswapV3Pool {
return fmt.Errorf("address %s is marked as a pool but being used as a token", token.Hex())
}
}
// CRITICAL: Validate each pool address is actually a pool
for _, pool := range poolAddresses {
if !av.IsValidPoolAddress(pool) {
return fmt.Errorf("address %s is not a valid pool address", pool.Hex())
}
// Additional check: ensure it's not marked as a token in known contracts
if contractType := av.GetContractType(pool); contractType == ContractTypeERC20Token {
return fmt.Errorf("address %s is marked as a token but being used as a pool", pool.Hex())
}
}
return nil
}
// PreventERC20PoolConfusion is a critical safety function that prevents the costly
// error where ERC-20 token contracts are incorrectly used as pool contracts.
// This was the root cause of the 535K+ log spam incident in production.
//
// The function performs a type safety check to ensure that:
// - ERC-20 tokens are not used in pool operations
// - Pool contracts are not used in token operations
// - Unknown contracts meet safety thresholds
//
// This is a mandatory check for all contract address usage in critical operations.
//
// Parameters:
// - address: The contract address to validate
// - expectedType: The contract type expected by the calling code
//
// Returns:
// - error: nil if the address is safe to use, error describing the issue otherwise
func (av *AddressValidator) PreventERC20PoolConfusion(address common.Address, expectedType ContractType) error {
// Check if we have prior knowledge about this contract
knownType := av.GetContractType(address)
// If we have knowledge about this contract, use it
if knownType != ContractTypeUnknown {
if knownType != expectedType {
return fmt.Errorf("contract type mismatch for %s: expected %s but known as %s",
address.Hex(), contractTypeToString(expectedType), contractTypeToString(knownType))
}
return nil
}
// For unknown contracts, perform basic validation
result := av.ValidateAddress(address.Hex())
if !result.IsValid {
return fmt.Errorf("invalid address %s: %v", address.Hex(), result.ErrorMessages)
}
// High corruption score indicates potential misclassification
if result.CorruptionScore > 25 {
return fmt.Errorf("high corruption score (%d) for address %s, refusing to use as %s",
result.CorruptionScore, address.Hex(), contractTypeToString(expectedType))
}
return nil
}
// contractTypeToString converts ContractType to string representation
func contractTypeToString(ct ContractType) string {
switch ct {
case ContractTypeERC20Token:
return "ERC-20 Token"
case ContractTypeUniswapV2Pool:
return "Uniswap V2 Pool"
case ContractTypeUniswapV3Pool:
return "Uniswap V3 Pool"
case ContractTypeRouter:
return "Router"
case ContractTypeFactory:
return "Factory"
default:
return "Unknown"
}
}
// IsKnownContract checks if we have specific knowledge about a contract
func (av *AddressValidator) IsKnownContract(address common.Address) bool {
_, exists := av.knownContracts[address]
return exists
}
// GetValidationStats returns statistics about validation results
func (av *AddressValidator) GetValidationStats() map[string]interface{} {
return map[string]interface{}{
"known_contracts": len(av.knownContracts),
"corruption_patterns": len(av.corruptedPatterns),
}
}
// SanitizeAddress attempts to clean up a potentially corrupted address
func (av *AddressValidator) SanitizeAddress(addressStr string) (string, error) {
// Remove common prefixes that might be corrupted
cleaned := strings.TrimSpace(addressStr)
// Ensure 0x prefix
if !strings.HasPrefix(cleaned, "0x") && !strings.HasPrefix(cleaned, "0X") {
if len(cleaned) == 40 && av.isValidHexFormat("0x"+cleaned) {
cleaned = "0x" + cleaned
} else {
return "", fmt.Errorf("cannot sanitize address without 0x prefix")
}
}
// Normalize to lowercase
cleaned = strings.ToLower(cleaned)
// Validate the sanitized address
result := av.ValidateAddress(cleaned)
if !result.IsValid {
return "", fmt.Errorf("sanitized address is still invalid: %v", result.ErrorMessages)
}
return cleaned, nil
}
// ValidatePoolOperation validates if a pool-specific operation should be allowed on an address
// This prevents the critical error where ERC-20 tokens are treated as pools
func (av *AddressValidator) ValidatePoolOperation(address common.Address, operation string) error {
// Check if this is a known ERC-20 token
if av.knownContractsRegistry != nil {
if err := av.knownContractsRegistry.ValidatePoolCall(address, operation); err != nil {
return err
}
}
// Additional validation for suspicious addresses
addressStr := address.Hex()
// Check for zero address (common corruption)
if address == (common.Address{}) {
return fmt.Errorf("pool operation '%s' attempted on zero address (likely corruption)", operation)
}
// Check for addresses with excessive zeros (likely corruption)
if strings.Count(addressStr, "0") > 30 {
return fmt.Errorf("pool operation '%s' attempted on suspicious address %s (excessive zeros)", operation, addressStr)
}
// Fallback to general validation
result := av.ValidateAddress(addressStr)
if !result.IsValid {
return fmt.Errorf("pool operation '%s' attempted on invalid address %s: %v", operation, addressStr, result.ErrorMessages)
}
if result.CorruptionScore > 25 {
return fmt.Errorf("pool operation '%s' blocked due to corruption score %d on address %s", operation, result.CorruptionScore, addressStr)
}
return nil
}
// GetDetailedAddressAnalysis provides comprehensive analysis of an address including corruption patterns
func (av *AddressValidator) GetDetailedAddressAnalysis(address common.Address) map[string]interface{} {
analysis := make(map[string]interface{})
addressStr := address.Hex()
// Basic validation
result := av.ValidateAddress(addressStr)
analysis["is_valid"] = result.IsValid
analysis["corruption_score"] = result.CorruptionScore
analysis["contract_type"] = result.ContractType.String()
analysis["error_messages"] = result.ErrorMessages
// Known contract information
if av.knownContractsRegistry != nil {
contractType, name := av.knownContractsRegistry.GetContractType(address)
analysis["known_contract_type"] = contractType.String()
analysis["known_contract_name"] = name
isERC20, tokenName := av.knownContractsRegistry.IsKnownERC20(address)
analysis["is_known_erc20"] = isERC20
if isERC20 {
analysis["token_name"] = tokenName
}
// Corruption pattern analysis
corruptionPattern := av.knownContractsRegistry.GetCorruptionPattern(address)
analysis["corruption_pattern"] = corruptionPattern
}
// Address characteristics
analysis["is_zero_address"] = address == (common.Address{})
analysis["zero_count"] = strings.Count(addressStr, "0")
analysis["hex_length"] = len(addressStr)
return analysis
}

View File

@@ -0,0 +1,351 @@
package validation
import (
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAddressValidator_ValidateAddress(t *testing.T) {
validator := NewAddressValidator()
tests := []struct {
name string
address string
expectedValid bool
expectedScore int
expectedType ContractType
shouldContainErrors []string
}{
{
name: "Valid WETH address",
address: "0x82aF49447D8a07e3bd95BD0d56f35241523fBab1",
expectedValid: true,
expectedScore: 0,
expectedType: ContractTypeUnknown, // Will be unknown without RPC
},
{
name: "Valid USDC address",
address: "0xaf88d065e77c8cC2239327C5EDb3A432268e5831",
expectedValid: true,
expectedScore: 0,
expectedType: ContractTypeUnknown,
},
{
name: "Critical corruption - TOKEN_0x000000 pattern",
address: "0x0000000300000000000000000000000000000000",
expectedValid: false,
expectedScore: 70, // Detected by corruption patterns
shouldContainErrors: []string{"corruption detected"},
},
{
name: "High corruption - mixed zero pattern",
address: "0x0000001200000000000000000000000000000000",
expectedValid: false,
expectedScore: 70,
shouldContainErrors: []string{"corruption detected"},
},
{
name: "Medium corruption - trailing zeros",
address: "0x123456780000000000000000000000000000000",
expectedValid: false,
expectedScore: 50,
shouldContainErrors: []string{"invalid address length"},
},
{
name: "Low corruption - some zeros",
address: "0x1234567800000000000000000000000000000001",
expectedValid: true, // Valid format with moderate corruption
expectedScore: 25,
shouldContainErrors: []string{}, // No errors for valid format
},
{
name: "Invalid length - too short",
address: "0x123456",
expectedValid: false,
expectedScore: 50,
shouldContainErrors: []string{"invalid address length"},
},
{
name: "Invalid length - too long",
address: "0x82aF49447D8a07e3bd95BD0d56f35241523fBab12345",
expectedValid: false,
expectedScore: 50,
shouldContainErrors: []string{"invalid address length"},
},
{
name: "Invalid hex characters",
address: "0x82aF49447D8a07e3bd95BD0d56f35241523fBaZ1",
expectedValid: false,
expectedScore: 50,
shouldContainErrors: []string{"invalid hex format"},
},
{
name: "Missing 0x prefix",
address: "82aF49447D8a07e3bd95BD0d56f35241523fBab1",
expectedValid: false,
expectedScore: 50,
shouldContainErrors: []string{"invalid hex format"},
},
{
name: "All zeros address",
address: "0x0000000000000000000000000000000000000000",
expectedValid: false,
expectedScore: 70, // Detected by corruption patterns
shouldContainErrors: []string{"corruption detected"},
},
{
name: "Invalid checksum",
address: "0x82af49447d8a07e3bd95bd0d56f35241523fbab1", // lowercase
expectedValid: true, // Checksum validation not enforced in current implementation
expectedScore: 0,
shouldContainErrors: []string{}, // No errors for valid format
},
{
name: "Valid checksummed address",
address: common.HexToAddress("0x82aF49447D8a07e3bd95BD0d56f35241523fBab1").Hex(),
expectedValid: true,
expectedScore: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := validator.ValidateAddress(tt.address)
assert.Equal(t, tt.expectedValid, result.IsValid, "IsValid mismatch")
assert.Equal(t, tt.expectedScore, result.CorruptionScore, "CorruptionScore mismatch")
if tt.expectedType != ContractTypeUnknown {
assert.Equal(t, tt.expectedType, result.ContractType, "ContractType mismatch")
}
for _, expectedError := range tt.shouldContainErrors {
found := false
for _, errMsg := range result.ErrorMessages {
if contains(errMsg, expectedError) {
found = true
break
}
}
assert.True(t, found, "Expected error message containing '%s' not found in %v", expectedError, result.ErrorMessages)
}
t.Logf("Address: %s, Valid: %v, Score: %d, Errors: %v",
tt.address, result.IsValid, result.CorruptionScore, result.ErrorMessages)
})
}
}
func TestAddressValidator_CorruptionPatterns(t *testing.T) {
validator := NewAddressValidator()
corruptionTests := []struct {
name string
address string
expectedScore int
description string
}{
{
name: "TOKEN_0x000000 exact pattern",
address: "0x0000000300000000000000000000000000000000",
expectedScore: 70, // Caught by pattern detection
description: "Exact TOKEN_0x000000 corruption pattern",
},
{
name: "Similar corruption pattern",
address: "0x0000000100000000000000000000000000000000",
expectedScore: 70, // Caught by pattern detection
description: "Similar zero-heavy corruption",
},
{
name: "Partial corruption",
address: "0x1234000000000000000000000000000000000000",
expectedScore: 70, // Caught by pattern detection
description: "Partial zero corruption",
},
{
name: "Trailing corruption",
address: "0x123456789abcdef000000000000000000000000",
expectedScore: 50, // Invalid length
description: "Trailing zero corruption",
},
{
name: "Valid but zero-heavy",
address: "0x000000000000000000000000000000000000dead",
expectedScore: 10, // Valid format, minimal corruption
description: "Valid format but suspicious zeros",
},
}
for _, tt := range corruptionTests {
t.Run(tt.name, func(t *testing.T) {
result := validator.ValidateAddress(tt.address)
assert.GreaterOrEqual(t, result.CorruptionScore, tt.expectedScore-10,
"Corruption score should be at least %d-10 for %s", tt.expectedScore, tt.description)
assert.LessOrEqual(t, result.CorruptionScore, 100,
"Corruption score should not exceed 100")
// Check validity based on expected behavior rather than fixed threshold
if tt.expectedScore >= 50 && tt.address != "0x000000000000000000000000000000000000dead" {
assert.False(t, result.IsValid, "High corruption addresses should be invalid")
}
t.Logf("%s: Score=%d, Valid=%v, Description=%s",
tt.address, result.CorruptionScore, result.IsValid, tt.description)
})
}
}
func TestAddressValidator_EdgeCases(t *testing.T) {
validator := NewAddressValidator()
edgeCases := []struct {
name string
address string
shouldBeValid bool
}{
{"Empty string", "", false},
{"Only 0x", "0x", false},
{"Just prefix", "0x0", false},
{"Uppercase hex", "0x82AF49447D8A07E3BD95BD0D56F35241523FBAB1", true}, // Valid - case doesn't matter
{"Mixed case invalid", "0x82aF49447D8a07e3BD95BD0d56f35241523fBaB1", true}, // Valid - case doesn't matter
{"Unicode characters", "0x82aF49447D8a07e3bd95BD0d56f35241523fBaβ1", false},
{"SQL injection attempt", "0x'; DROP TABLE addresses; --", false},
{"Buffer overflow attempt", "0x" + string(make([]byte, 1000)), false},
}
for _, tt := range edgeCases {
t.Run(tt.name, func(t *testing.T) {
// Should not panic
result := validator.ValidateAddress(tt.address)
// Check validity based on expectation
assert.Equal(t, tt.shouldBeValid, result.IsValid, "Edge case validity mismatch: %s", tt.address)
if !tt.shouldBeValid {
assert.Greater(t, result.CorruptionScore, 0, "Invalid edge case should have corruption score > 0")
assert.NotEmpty(t, result.ErrorMessages, "Invalid edge case should have error messages")
} else {
// Valid addresses can have low corruption scores
assert.Empty(t, result.ErrorMessages, "Valid edge case should not have error messages")
}
t.Logf("Edge case '%s': Valid=%v, Score=%d, Errors=%v",
tt.name, result.IsValid, result.CorruptionScore, result.ErrorMessages)
})
}
}
func TestAddressValidator_Performance(t *testing.T) {
validator := NewAddressValidator()
// Test addresses for performance benchmark
addresses := []string{
"0x82aF49447D8a07e3bd95BD0d56f35241523fBab1", // Valid WETH
"0x0000000300000000000000000000000000000000", // Corrupted
"0xaf88d065e77c8cC2239327C5EDb3A432268e5831", // Valid USDC
"0x0000000000000000000000000000000000000000", // Zero address
"0x123456789abcdef0000000000000000000000000", // Partial corruption
}
// Warm up
for _, addr := range addresses {
validator.ValidateAddress(addr)
}
// Benchmark validation performance
const iterations = 10000
start := time.Now()
for i := 0; i < iterations; i++ {
addr := addresses[i%len(addresses)]
result := validator.ValidateAddress(addr)
require.NotNil(t, result)
}
duration := time.Since(start)
avgTime := duration / iterations
t.Logf("Performance: %d validations in %v (avg: %v per validation)",
iterations, duration, avgTime)
// Should validate at least 1,000 addresses per second
maxTime := time.Millisecond * 2 // 2ms per validation = 500/sec (reasonable for complex validation)
assert.Less(t, avgTime.Nanoseconds(), maxTime.Nanoseconds(),
"Validation should be faster than %v per address (got %v)", maxTime, avgTime)
}
func TestAddressValidator_ConcurrentAccess(t *testing.T) {
validator := NewAddressValidator()
addresses := []string{
"0x82aF49447D8a07e3bd95BD0d56f35241523fBab1",
"0x0000000300000000000000000000000000000000",
"0xaf88d065e77c8cC2239327C5EDb3A432268e5831",
}
const numGoroutines = 100
const validationsPerGoroutine = 100
done := make(chan bool, numGoroutines)
// Launch concurrent validators
for i := 0; i < numGoroutines; i++ {
go func(id int) {
defer func() { done <- true }()
for j := 0; j < validationsPerGoroutine; j++ {
addr := addresses[j%len(addresses)]
result := validator.ValidateAddress(addr)
require.NotNil(t, result)
// Verify consistent results
if addr == "0x82aF49447D8a07e3bd95BD0d56f35241523fBab1" {
assert.True(t, result.IsValid)
assert.Equal(t, 0, result.CorruptionScore)
}
if addr == "0x0000000300000000000000000000000000000000" {
assert.False(t, result.IsValid)
assert.Equal(t, 70, result.CorruptionScore) // Updated to match new validation logic
}
}
}(i)
}
// Wait for all goroutines to complete
for i := 0; i < numGoroutines; i++ {
select {
case <-done:
// Success
case <-time.After(10 * time.Second):
t.Fatal("Concurrent validation test timed out")
}
}
t.Logf("Successfully completed %d concurrent validations",
numGoroutines*validationsPerGoroutine)
}
// Helper function to check if a string contains a substring (case-insensitive)
func contains(str, substr string) bool {
return len(str) >= len(substr) &&
(str == substr ||
len(str) > len(substr) &&
(str[:len(substr)] == substr ||
str[len(str)-len(substr):] == substr ||
indexOf(str, substr) >= 0))
}
func indexOf(str, substr string) int {
for i := 0; i <= len(str)-len(substr); i++ {
if str[i:i+len(substr)] == substr {
return i
}
}
return -1
}

View File

@@ -0,0 +1,158 @@
package validation
import (
"fmt"
"strings"
"github.com/ethereum/go-ethereum/common"
)
// KnownContractRegistry maintains a registry of known contract addresses and their types
// This prevents misclassification of well-known contracts (like major ERC-20 tokens)
type KnownContractRegistry struct {
erc20Tokens map[common.Address]string
pools map[common.Address]string
routers map[common.Address]string
}
// NewKnownContractRegistry creates a new registry populated with known Arbitrum contracts
func NewKnownContractRegistry() *KnownContractRegistry {
registry := &KnownContractRegistry{
erc20Tokens: make(map[common.Address]string),
pools: make(map[common.Address]string),
routers: make(map[common.Address]string),
}
// Populate known ERC-20 tokens on Arbitrum (CRITICAL: These should NEVER be treated as pools)
registry.addKnownERC20Tokens()
registry.addKnownPools()
registry.addKnownRouters()
return registry
}
// addKnownERC20Tokens adds all major ERC-20 tokens on Arbitrum
func (r *KnownContractRegistry) addKnownERC20Tokens() {
// Major ERC-20 tokens that were being misclassified as pools
r.erc20Tokens[common.HexToAddress("0x82aF49447D8a07e3bd95BD0d56f35241523fBab1")] = "WETH" // Wrapped Ether
r.erc20Tokens[common.HexToAddress("0xaf88d065e77c8cC2239327C5EDb3A432268e5831")] = "USDC" // USD Coin
r.erc20Tokens[common.HexToAddress("0xFd086bC7CD5C481DCC9C85ebE478A1C0b69FCbb9")] = "USDT" // Tether USD
r.erc20Tokens[common.HexToAddress("0xFF970A61A04b1cA14834A43f5dE4533eBDDB5CC8")] = "USDC.e" // Bridged USDC
r.erc20Tokens[common.HexToAddress("0x912CE59144191C1204E64559FE8253a0e49E6548")] = "ARB" // Arbitrum Token
r.erc20Tokens[common.HexToAddress("0x2f2a2543B76A4166549F7aaB2e75Bef0aefC5B0f")] = "WBTC" // Wrapped Bitcoin
r.erc20Tokens[common.HexToAddress("0xDA10009cBd5D07dd0CeCc66161FC93D7c9000da1")] = "DAI" // Dai Stablecoin
r.erc20Tokens[common.HexToAddress("0x17FC002b466eEc40DaE837Fc4bE5c67993ddBd6F")] = "FRAX" // Frax
r.erc20Tokens[common.HexToAddress("0x11cDb42B0EB46D95f990BeDD4695A6e3fA034978")] = "CRV" // Curve DAO Token
r.erc20Tokens[common.HexToAddress("0x539bdE0d7Dbd336b79148AA742883198BBF60342")] = "MAGIC" // MAGIC
r.erc20Tokens[common.HexToAddress("0xf97f4df75117a78c1A5a0DBb814Af92458539FB4")] = "LINK" // Chainlink Token
r.erc20Tokens[common.HexToAddress("0xfc5A1A6EB076a2C7aD06eD22C90d7E710E35ad0a")] = "GMX" // GMX
r.erc20Tokens[common.HexToAddress("0x6C2C06790b3E3E3c38e12Ee22F8183b37a13EE55")] = "DPX" // Dopex Governance Token
r.erc20Tokens[common.HexToAddress("0x10393c20975cF177a3513071bC110f7962CD67da")] = "JONES" // JonesDAO
r.erc20Tokens[common.HexToAddress("0x4e352cf164e64adcbad318c3a1e222e9eba4ce42")] = "MCB" // MCDEX Token
r.erc20Tokens[common.HexToAddress("0x23A941036Ae778Ac51Ab04CEa08Ed6e2AF33b49")] = "RDNT" // Radiant Capital
r.erc20Tokens[common.HexToAddress("0x6694340fc020c5E6B96567843da2df01b2CE1eb6")] = "STG" // Stargate Finance
r.erc20Tokens[common.HexToAddress("0x3082CC23568eA640225c2467653dB90e9250AaA0")] = "RDNT" // Radiant (alternative)
r.erc20Tokens[common.HexToAddress("0x51fC0f6660482Ea73330E414eFd7808811a57Fa2")] = "PREMIA" // Premia
r.erc20Tokens[common.HexToAddress("0x69Eb4FA4a2fbd498C257C57Ea8b7655a2559A581")] = "DODO" // DODO
}
// addKnownPools adds known liquidity pools
func (r *KnownContractRegistry) addKnownPools() {
// Major Uniswap V3 pools on Arbitrum
r.pools[common.HexToAddress("0xC31E54c7a869B9FcBEcc14363CF510d1c41fa443")] = "USDC/WETH-0.05%"
r.pools[common.HexToAddress("0x17c14D2c404D167802b16C450d3c99F88F2c4F4d")] = "USDC/WETH-0.3%"
r.pools[common.HexToAddress("0x641C00A822e8b671738d32a431a4Fb6074E5c79d")] = "WETH/ARB-0.3%"
r.pools[common.HexToAddress("0xdE64C63e6BaD1Ff18f4F1bdc9d1e7Bbfb5E0B6FD")] = "USDT/USDC-0.01%"
r.pools[common.HexToAddress("0x2f5e87C9312fa29aed5c179E456625D79015299c")] = "WBTC/WETH-0.05%"
}
// addKnownRouters adds known router contracts
func (r *KnownContractRegistry) addKnownRouters() {
// Uniswap V3 SwapRouter
r.routers[common.HexToAddress("0xE592427A0AEce92De3Edee1F18E0157C05861564")] = "UniswapV3Router"
// Uniswap V3 SwapRouter02
r.routers[common.HexToAddress("0x68b3465833fb72A70ecDF485E0e4C7bD8665Fc45")] = "UniswapV3Router02"
// SushiSwap Router
r.routers[common.HexToAddress("0x1b02dA8Cb0d097eB8D57A175b88c7D8b47997506")] = "SushiSwapRouter"
// Camelot Router
r.routers[common.HexToAddress("0xc873fEcbd354f5A56E00E710B90EF4201db2448d")] = "CamelotRouter"
}
// IsKnownERC20 checks if an address is a known ERC-20 token
func (r *KnownContractRegistry) IsKnownERC20(address common.Address) (bool, string) {
name, exists := r.erc20Tokens[address]
return exists, name
}
// IsKnownPool checks if an address is a known liquidity pool
func (r *KnownContractRegistry) IsKnownPool(address common.Address) (bool, string) {
name, exists := r.pools[address]
return exists, name
}
// IsKnownRouter checks if an address is a known router contract
func (r *KnownContractRegistry) IsKnownRouter(address common.Address) (bool, string) {
name, exists := r.routers[address]
return exists, name
}
// GetContractType returns the type of a known contract
func (r *KnownContractRegistry) GetContractType(address common.Address) (ContractType, string) {
if isERC20, name := r.IsKnownERC20(address); isERC20 {
return ContractTypeERC20Token, name
}
if isPool, name := r.IsKnownPool(address); isPool {
return ContractTypeUniswapV3Pool, name
}
if isRouter, name := r.IsKnownRouter(address); isRouter {
return ContractTypeRouter, name
}
return ContractTypeUnknown, ""
}
// ValidatePoolCall validates if a pool-specific operation should be allowed on an address
func (r *KnownContractRegistry) ValidatePoolCall(address common.Address, operation string) error {
if isERC20, name := r.IsKnownERC20(address); isERC20 {
return &ValidationError{
Code: "INVALID_POOL_OPERATION",
Message: fmt.Sprintf("Attempted pool operation '%s' on known ERC-20 token %s (%s)", operation, name, address.Hex()),
Context: map[string]interface{}{
"address": address.Hex(),
"token_name": name,
"operation": operation,
"issue": "ERC-20 tokens do not have pool-specific functions like slot0()",
},
}
}
return nil
}
// GetCorruptionPattern analyzes an address for known corruption patterns
func (r *KnownContractRegistry) GetCorruptionPattern(address common.Address) string {
hexAddr := address.Hex()
// Check for zero address
if address == (common.Address{}) {
return "ZERO_ADDRESS"
}
// Check for mostly zeros with small values
if strings.HasSuffix(hexAddr, "0000000000000000000000000000000000000000") {
return "TRAILING_ZEROS"
}
// Check for leading zeros with small values
if strings.HasPrefix(hexAddr, "0x00000000") {
return "LEADING_ZEROS_CORRUPTION"
}
// Check for embedded WETH/USDC patterns (indicates address extraction issues)
if strings.Contains(strings.ToLower(hexAddr), "82af49447d8a07e3bd95bd0d56f35241523fbab1") {
return "EMBEDDED_WETH_PATTERN"
}
if strings.Contains(strings.ToLower(hexAddr), "af88d065e77c8cc2239327c5edb3a432268e5831") {
return "EMBEDDED_USDC_PATTERN"
}
return "OTHER_CORRUPTION"
}