feat: create v2-prep branch with comprehensive planning
Restructured project for V2 refactor: **Structure Changes:** - Moved all V1 code to orig/ folder (preserved with git mv) - Created docs/planning/ directory - Added orig/README_V1.md explaining V1 preservation **Planning Documents:** - 00_V2_MASTER_PLAN.md: Complete architecture overview - Executive summary of critical V1 issues - High-level component architecture diagrams - 5-phase implementation roadmap - Success metrics and risk mitigation - 07_TASK_BREAKDOWN.md: Atomic task breakdown - 99+ hours of detailed tasks - Every task < 2 hours (atomic) - Clear dependencies and success criteria - Organized by implementation phase **V2 Key Improvements:** - Per-exchange parsers (factory pattern) - Multi-layer strict validation - Multi-index pool cache - Background validation pipeline - Comprehensive observability **Critical Issues Addressed:** - Zero address tokens (strict validation + cache enrichment) - Parsing accuracy (protocol-specific parsers) - No audit trail (background validation channel) - Inefficient lookups (multi-index cache) - Stats disconnection (event-driven metrics) Next Steps: 1. Review planning documents 2. Begin Phase 1: Foundation (P1-001 through P1-010) 3. Implement parsers in Phase 2 4. Build cache system in Phase 3 5. Add validation pipeline in Phase 4 6. Migrate and test in Phase 5 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
400
orig/internal/monitoring/alert_handlers.go
Normal file
400
orig/internal/monitoring/alert_handlers.go
Normal file
@@ -0,0 +1,400 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fraktal/mev-beta/internal/logger"
|
||||
)
|
||||
|
||||
// LogAlertHandler logs alerts to the application logger
|
||||
type LogAlertHandler struct {
|
||||
logger *logger.Logger
|
||||
}
|
||||
|
||||
// NewLogAlertHandler creates a new log-based alert handler
|
||||
func NewLogAlertHandler(logger *logger.Logger) *LogAlertHandler {
|
||||
return &LogAlertHandler{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAlert logs the alert using structured logging
|
||||
func (lah *LogAlertHandler) HandleAlert(alert CorruptionAlert) error {
|
||||
switch alert.Severity {
|
||||
case AlertSeverityEmergency:
|
||||
lah.logger.Error("🚨 EMERGENCY ALERT",
|
||||
"message", alert.Message,
|
||||
"severity", alert.Severity.String(),
|
||||
"timestamp", alert.Timestamp,
|
||||
"context", alert.Context)
|
||||
case AlertSeverityCritical:
|
||||
lah.logger.Error("🔴 CRITICAL ALERT",
|
||||
"message", alert.Message,
|
||||
"severity", alert.Severity.String(),
|
||||
"timestamp", alert.Timestamp,
|
||||
"context", alert.Context)
|
||||
case AlertSeverityWarning:
|
||||
lah.logger.Warn("🟡 WARNING ALERT",
|
||||
"message", alert.Message,
|
||||
"severity", alert.Severity.String(),
|
||||
"timestamp", alert.Timestamp,
|
||||
"context", alert.Context)
|
||||
default:
|
||||
lah.logger.Info("ℹ️ INFO ALERT",
|
||||
"message", alert.Message,
|
||||
"severity", alert.Severity.String(),
|
||||
"timestamp", alert.Timestamp,
|
||||
"context", alert.Context)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FileAlertHandler writes alerts to a file in JSON format
|
||||
type FileAlertHandler struct {
|
||||
mu sync.Mutex
|
||||
filePath string
|
||||
logger *logger.Logger
|
||||
}
|
||||
|
||||
// NewFileAlertHandler creates a new file-based alert handler
|
||||
func NewFileAlertHandler(filePath string, logger *logger.Logger) *FileAlertHandler {
|
||||
return &FileAlertHandler{
|
||||
filePath: filePath,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAlert writes the alert to a file
|
||||
func (fah *FileAlertHandler) HandleAlert(alert CorruptionAlert) error {
|
||||
fah.mu.Lock()
|
||||
defer fah.mu.Unlock()
|
||||
|
||||
// Create alert record for file
|
||||
alertRecord := map[string]interface{}{
|
||||
"timestamp": alert.Timestamp.Format(time.RFC3339),
|
||||
"severity": alert.Severity.String(),
|
||||
"message": alert.Message,
|
||||
"address": alert.Address.Hex(),
|
||||
"corruption_score": alert.CorruptionScore,
|
||||
"source": alert.Source,
|
||||
"context": alert.Context,
|
||||
}
|
||||
|
||||
// Convert to JSON
|
||||
alertJSON, err := json.Marshal(alertRecord)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal alert: %w", err)
|
||||
}
|
||||
|
||||
// Open file for appending
|
||||
file, err := os.OpenFile(fah.filePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open alert file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Write alert with newline
|
||||
if _, err := file.Write(append(alertJSON, '\n')); err != nil {
|
||||
return fmt.Errorf("failed to write alert to file: %w", err)
|
||||
}
|
||||
|
||||
fah.logger.Debug("Alert written to file",
|
||||
"file", fah.filePath,
|
||||
"severity", alert.Severity.String())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HTTPAlertHandler sends alerts to an HTTP endpoint (e.g., Slack, Discord, PagerDuty)
|
||||
type HTTPAlertHandler struct {
|
||||
mu sync.Mutex
|
||||
webhookURL string
|
||||
client *http.Client
|
||||
logger *logger.Logger
|
||||
retryCount int
|
||||
}
|
||||
|
||||
// NewHTTPAlertHandler creates a new HTTP-based alert handler
|
||||
func NewHTTPAlertHandler(webhookURL string, logger *logger.Logger) *HTTPAlertHandler {
|
||||
return &HTTPAlertHandler{
|
||||
webhookURL: webhookURL,
|
||||
client: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
},
|
||||
logger: logger,
|
||||
retryCount: 3,
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAlert sends the alert to the configured HTTP endpoint
|
||||
func (hah *HTTPAlertHandler) HandleAlert(alert CorruptionAlert) error {
|
||||
if hah.webhookURL == "" {
|
||||
return fmt.Errorf("webhook URL not configured")
|
||||
}
|
||||
|
||||
// Create payload based on webhook type
|
||||
payload := hah.createPayload(alert)
|
||||
|
||||
// Convert payload to JSON
|
||||
payloadJSON, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal webhook payload: %w", err)
|
||||
}
|
||||
|
||||
// Send with retries
|
||||
for attempt := 1; attempt <= hah.retryCount; attempt++ {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", hah.webhookURL, strings.NewReader(string(payloadJSON)))
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create HTTP request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", "MEV-Bot-AlertHandler/1.0")
|
||||
|
||||
resp, err := hah.client.Do(req)
|
||||
if err != nil {
|
||||
hah.logger.Warn("Failed to send alert to webhook",
|
||||
"attempt", attempt,
|
||||
"error", err)
|
||||
if attempt == hah.retryCount {
|
||||
return fmt.Errorf("failed to send alert after %d attempts: %w", hah.retryCount, err)
|
||||
}
|
||||
time.Sleep(time.Duration(attempt) * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read response body for debugging
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
hah.logger.Debug("Alert sent successfully",
|
||||
"webhook_url", hah.webhookURL,
|
||||
"status_code", resp.StatusCode,
|
||||
"response", string(body))
|
||||
return nil
|
||||
}
|
||||
|
||||
hah.logger.Warn("Webhook returned error status",
|
||||
"attempt", attempt,
|
||||
"status_code", resp.StatusCode,
|
||||
"response", string(body))
|
||||
|
||||
if attempt == hah.retryCount {
|
||||
return fmt.Errorf("webhook returned status %d after %d attempts", resp.StatusCode, hah.retryCount)
|
||||
}
|
||||
|
||||
time.Sleep(time.Duration(attempt) * time.Second)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// createPayload creates the webhook payload based on the webhook type
|
||||
func (hah *HTTPAlertHandler) createPayload(alert CorruptionAlert) map[string]interface{} {
|
||||
// Detect webhook type based on URL
|
||||
if strings.Contains(hah.webhookURL, "slack.com") {
|
||||
return hah.createSlackPayload(alert)
|
||||
} else if strings.Contains(hah.webhookURL, "discord.com") {
|
||||
return hah.createDiscordPayload(alert)
|
||||
}
|
||||
|
||||
// Generic webhook payload
|
||||
return map[string]interface{}{
|
||||
"timestamp": alert.Timestamp.Format(time.RFC3339),
|
||||
"severity": alert.Severity.String(),
|
||||
"message": alert.Message,
|
||||
"address": alert.Address.Hex(),
|
||||
"corruption_score": alert.CorruptionScore,
|
||||
"source": alert.Source,
|
||||
"context": alert.Context,
|
||||
}
|
||||
}
|
||||
|
||||
// createSlackPayload creates a Slack-compatible webhook payload
|
||||
func (hah *HTTPAlertHandler) createSlackPayload(alert CorruptionAlert) map[string]interface{} {
|
||||
color := "good"
|
||||
switch alert.Severity {
|
||||
case AlertSeverityWarning:
|
||||
color = "warning"
|
||||
case AlertSeverityCritical:
|
||||
color = "danger"
|
||||
case AlertSeverityEmergency:
|
||||
color = "#FF0000" // Bright red for emergency
|
||||
}
|
||||
|
||||
attachment := map[string]interface{}{
|
||||
"color": color,
|
||||
"title": fmt.Sprintf("%s Alert - MEV Bot", alert.Severity.String()),
|
||||
"text": alert.Message,
|
||||
"timestamp": alert.Timestamp.Unix(),
|
||||
"fields": []map[string]interface{}{
|
||||
{
|
||||
"title": "Address",
|
||||
"value": alert.Address.Hex(),
|
||||
"short": true,
|
||||
},
|
||||
{
|
||||
"title": "Corruption Score",
|
||||
"value": fmt.Sprintf("%d", alert.CorruptionScore),
|
||||
"short": true,
|
||||
},
|
||||
{
|
||||
"title": "Source",
|
||||
"value": alert.Source,
|
||||
"short": true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"text": fmt.Sprintf("MEV Bot Alert: %s", alert.Severity.String()),
|
||||
"attachments": []map[string]interface{}{attachment},
|
||||
}
|
||||
}
|
||||
|
||||
// createDiscordPayload creates a Discord-compatible webhook payload
|
||||
func (hah *HTTPAlertHandler) createDiscordPayload(alert CorruptionAlert) map[string]interface{} {
|
||||
color := 0x00FF00 // Green
|
||||
switch alert.Severity {
|
||||
case AlertSeverityWarning:
|
||||
color = 0xFFFF00 // Yellow
|
||||
case AlertSeverityCritical:
|
||||
color = 0xFF8000 // Orange
|
||||
case AlertSeverityEmergency:
|
||||
color = 0xFF0000 // Red
|
||||
}
|
||||
|
||||
embed := map[string]interface{}{
|
||||
"title": fmt.Sprintf("%s Alert - MEV Bot", alert.Severity.String()),
|
||||
"description": alert.Message,
|
||||
"color": color,
|
||||
"timestamp": alert.Timestamp.Format(time.RFC3339),
|
||||
"fields": []map[string]interface{}{
|
||||
{
|
||||
"name": "Address",
|
||||
"value": alert.Address.Hex(),
|
||||
"inline": true,
|
||||
},
|
||||
{
|
||||
"name": "Corruption Score",
|
||||
"value": fmt.Sprintf("%d", alert.CorruptionScore),
|
||||
"inline": true,
|
||||
},
|
||||
{
|
||||
"name": "Source",
|
||||
"value": alert.Source,
|
||||
"inline": true,
|
||||
},
|
||||
},
|
||||
"footer": map[string]interface{}{
|
||||
"text": "MEV Bot Integrity Monitor",
|
||||
},
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"embeds": []map[string]interface{}{embed},
|
||||
}
|
||||
}
|
||||
|
||||
// MetricsAlertHandler integrates with metrics systems (Prometheus, etc.)
|
||||
type MetricsAlertHandler struct {
|
||||
mu sync.Mutex
|
||||
logger *logger.Logger
|
||||
counters map[string]int64
|
||||
}
|
||||
|
||||
// NewMetricsAlertHandler creates a new metrics-based alert handler
|
||||
func NewMetricsAlertHandler(logger *logger.Logger) *MetricsAlertHandler {
|
||||
return &MetricsAlertHandler{
|
||||
logger: logger,
|
||||
counters: make(map[string]int64),
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAlert updates metrics counters based on alert
|
||||
func (mah *MetricsAlertHandler) HandleAlert(alert CorruptionAlert) error {
|
||||
mah.mu.Lock()
|
||||
defer mah.mu.Unlock()
|
||||
|
||||
// Increment counters
|
||||
mah.counters["total_alerts"]++
|
||||
mah.counters[fmt.Sprintf("alerts_%s", strings.ToLower(alert.Severity.String()))]++
|
||||
|
||||
if alert.CorruptionScore > 0 {
|
||||
mah.counters["corruption_alerts"]++
|
||||
}
|
||||
|
||||
mah.logger.Debug("Metrics updated for alert",
|
||||
"severity", alert.Severity.String(),
|
||||
"total_alerts", mah.counters["total_alerts"])
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCounters returns the current alert counters
|
||||
func (mah *MetricsAlertHandler) GetCounters() map[string]int64 {
|
||||
mah.mu.Lock()
|
||||
defer mah.mu.Unlock()
|
||||
|
||||
// Return a copy
|
||||
counters := make(map[string]int64)
|
||||
for k, v := range mah.counters {
|
||||
counters[k] = v
|
||||
}
|
||||
|
||||
return counters
|
||||
}
|
||||
|
||||
// CompositeAlertHandler combines multiple alert handlers
|
||||
type CompositeAlertHandler struct {
|
||||
handlers []AlertSubscriber
|
||||
logger *logger.Logger
|
||||
}
|
||||
|
||||
// NewCompositeAlertHandler creates a composite alert handler
|
||||
func NewCompositeAlertHandler(logger *logger.Logger, handlers ...AlertSubscriber) *CompositeAlertHandler {
|
||||
return &CompositeAlertHandler{
|
||||
handlers: handlers,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAlert sends the alert to all configured handlers
|
||||
func (cah *CompositeAlertHandler) HandleAlert(alert CorruptionAlert) error {
|
||||
errors := make([]error, 0)
|
||||
|
||||
for i, handler := range cah.handlers {
|
||||
if err := handler.HandleAlert(alert); err != nil {
|
||||
cah.logger.Error("Alert handler failed",
|
||||
"handler_index", i,
|
||||
"handler_type", fmt.Sprintf("%T", handler),
|
||||
"error", err)
|
||||
errors = append(errors, fmt.Errorf("handler %d (%T): %w", i, handler, err))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
return fmt.Errorf("alert handler errors: %v", errors)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddHandler adds a new handler to the composite
|
||||
func (cah *CompositeAlertHandler) AddHandler(handler AlertSubscriber) {
|
||||
cah.handlers = append(cah.handlers, handler)
|
||||
}
|
||||
549
orig/internal/monitoring/dashboard.go
Normal file
549
orig/internal/monitoring/dashboard.go
Normal file
@@ -0,0 +1,549 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/fraktal/mev-beta/internal/logger"
|
||||
)
|
||||
|
||||
// DashboardServer provides a web-based monitoring dashboard
|
||||
type DashboardServer struct {
|
||||
logger *logger.Logger
|
||||
integrityMonitor *IntegrityMonitor
|
||||
healthChecker *HealthCheckRunner
|
||||
port int
|
||||
server *http.Server
|
||||
}
|
||||
|
||||
// NewDashboardServer creates a new dashboard server
|
||||
func NewDashboardServer(logger *logger.Logger, integrityMonitor *IntegrityMonitor, healthChecker *HealthCheckRunner, port int) *DashboardServer {
|
||||
return &DashboardServer{
|
||||
logger: logger,
|
||||
integrityMonitor: integrityMonitor,
|
||||
healthChecker: healthChecker,
|
||||
port: port,
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the dashboard HTTP server
|
||||
func (ds *DashboardServer) Start() error {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// Register endpoints
|
||||
mux.HandleFunc("/", ds.handleDashboard)
|
||||
mux.HandleFunc("/api/health", ds.handleAPIHealth)
|
||||
mux.HandleFunc("/api/metrics", ds.handleAPIMetrics)
|
||||
mux.HandleFunc("/api/history", ds.handleAPIHistory)
|
||||
mux.HandleFunc("/api/alerts", ds.handleAPIAlerts)
|
||||
mux.HandleFunc("/static/", ds.handleStatic)
|
||||
|
||||
ds.server = &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", ds.port),
|
||||
Handler: mux,
|
||||
}
|
||||
|
||||
ds.logger.Info("Starting monitoring dashboard",
|
||||
"port", ds.port,
|
||||
"url", fmt.Sprintf("http://localhost:%d", ds.port))
|
||||
|
||||
return ds.server.ListenAndServe()
|
||||
}
|
||||
|
||||
// Stop stops the dashboard server
|
||||
func (ds *DashboardServer) Stop() error {
|
||||
if ds.server != nil {
|
||||
return ds.server.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleDashboard serves the main dashboard HTML page
|
||||
func (ds *DashboardServer) handleDashboard(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
|
||||
// Get current metrics and health data
|
||||
metrics := ds.integrityMonitor.GetMetrics()
|
||||
healthSummary := ds.integrityMonitor.GetHealthSummary()
|
||||
healthHistory := ds.healthChecker.GetRecentSnapshots(20)
|
||||
|
||||
// Render dashboard template
|
||||
tmpl := ds.getDashboardTemplate()
|
||||
data := struct {
|
||||
Metrics MetricsSnapshot
|
||||
HealthSummary map[string]interface{}
|
||||
HealthHistory []HealthSnapshot
|
||||
Timestamp time.Time
|
||||
}{
|
||||
Metrics: metrics,
|
||||
HealthSummary: healthSummary,
|
||||
HealthHistory: healthHistory,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
if err := tmpl.Execute(w, data); err != nil {
|
||||
ds.logger.Error("Failed to render dashboard", "error", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// handleAPIHealth provides JSON health endpoint
|
||||
func (ds *DashboardServer) handleAPIHealth(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
healthSummary := ds.integrityMonitor.GetHealthSummary()
|
||||
healthCheckerSummary := ds.healthChecker.GetHealthSummary()
|
||||
|
||||
// Combine summaries
|
||||
response := map[string]interface{}{
|
||||
"integrity_monitor": healthSummary,
|
||||
"health_checker": healthCheckerSummary,
|
||||
"timestamp": time.Now(),
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||||
ds.logger.Error("Failed to encode health response", "error", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// handleAPIMetrics provides JSON metrics endpoint
|
||||
func (ds *DashboardServer) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
metrics := ds.integrityMonitor.GetMetrics()
|
||||
|
||||
if err := json.NewEncoder(w).Encode(metrics); err != nil {
|
||||
ds.logger.Error("Failed to encode metrics response", "error", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// handleAPIHistory provides JSON health history endpoint
|
||||
func (ds *DashboardServer) handleAPIHistory(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Get count parameter (default 20)
|
||||
countStr := r.URL.Query().Get("count")
|
||||
count := 20
|
||||
if countStr != "" {
|
||||
if c, err := strconv.Atoi(countStr); err == nil && c > 0 && c <= 100 {
|
||||
count = c
|
||||
}
|
||||
}
|
||||
|
||||
history := ds.healthChecker.GetRecentSnapshots(count)
|
||||
|
||||
if err := json.NewEncoder(w).Encode(history); err != nil {
|
||||
ds.logger.Error("Failed to encode history response", "error", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// handleAPIAlerts provides recent alerts for integrity and health monitoring.
|
||||
func (ds *DashboardServer) handleAPIAlerts(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
limit := 20
|
||||
if q := r.URL.Query().Get("limit"); q != "" {
|
||||
if parsed, err := strconv.Atoi(q); err == nil && parsed > 0 && parsed <= 200 {
|
||||
limit = parsed
|
||||
}
|
||||
}
|
||||
|
||||
alerts := ds.integrityMonitor.GetRecentAlerts(limit)
|
||||
|
||||
payload := map[string]interface{}{
|
||||
"alerts": alerts,
|
||||
"count": len(alerts),
|
||||
"timestamp": time.Now(),
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(payload); err != nil {
|
||||
ds.logger.Error("Failed to encode alerts response", "error", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// handleStatic serves static assets (CSS, JS)
|
||||
func (ds *DashboardServer) handleStatic(w http.ResponseWriter, r *http.Request) {
|
||||
// For simplicity, we'll inline CSS and JS in the HTML template
|
||||
// In a production system, you'd serve actual static files
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
|
||||
// getDashboardTemplate returns the HTML template for the dashboard
|
||||
func (ds *DashboardServer) getDashboardTemplate() *template.Template {
|
||||
htmlTemplate := `
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>MEV Bot - Data Integrity Monitor</title>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||
background-color: #f5f5f5;
|
||||
color: #333;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.header {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
padding: 1rem 0;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 0 1rem;
|
||||
}
|
||||
|
||||
.header h1 {
|
||||
font-size: 2rem;
|
||||
font-weight: 300;
|
||||
}
|
||||
|
||||
.header .subtitle {
|
||||
opacity: 0.9;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.dashboard {
|
||||
padding: 2rem 0;
|
||||
}
|
||||
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 1.5rem;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
border-left: 4px solid #667eea;
|
||||
}
|
||||
|
||||
.card h3 {
|
||||
color: #333;
|
||||
margin-bottom: 1rem;
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.metric {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 0.5rem 0;
|
||||
border-bottom: 1px solid #eee;
|
||||
}
|
||||
|
||||
.metric:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.metric-label {
|
||||
font-weight: 500;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.metric-value {
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.health-score {
|
||||
font-size: 2rem;
|
||||
font-weight: bold;
|
||||
text-align: center;
|
||||
padding: 1rem;
|
||||
border-radius: 50%;
|
||||
width: 100px;
|
||||
height: 100px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
margin: 0 auto 1rem;
|
||||
}
|
||||
|
||||
.health-excellent { background: #4CAF50; color: white; }
|
||||
.health-good { background: #8BC34A; color: white; }
|
||||
.health-fair { background: #FF9800; color: white; }
|
||||
.health-poor { background: #F44336; color: white; }
|
||||
|
||||
.status-indicator {
|
||||
display: inline-block;
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.status-healthy { background: #4CAF50; }
|
||||
.status-warning { background: #FF9800; }
|
||||
.status-critical { background: #F44336; }
|
||||
|
||||
.chart-container {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 1.5rem;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
.refresh-indicator {
|
||||
position: fixed;
|
||||
top: 20px;
|
||||
right: 20px;
|
||||
background: #667eea;
|
||||
color: white;
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
text-align: center;
|
||||
color: #666;
|
||||
font-size: 0.875rem;
|
||||
margin-top: 2rem;
|
||||
}
|
||||
|
||||
.recovery-actions {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 1rem;
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.recovery-action {
|
||||
background: #f8f9fa;
|
||||
padding: 0.75rem;
|
||||
border-radius: 4px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.recovery-action-count {
|
||||
font-size: 1.5rem;
|
||||
font-weight: bold;
|
||||
color: #667eea;
|
||||
}
|
||||
|
||||
.recovery-action-label {
|
||||
font-size: 0.875rem;
|
||||
color: #666;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<div class="container">
|
||||
<h1>🤖 MEV Bot - Data Integrity Monitor</h1>
|
||||
<p class="subtitle">Real-time monitoring of corruption detection and recovery systems</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="dashboard">
|
||||
<div class="container">
|
||||
<div class="grid">
|
||||
<!-- Health Score Card -->
|
||||
<div class="card">
|
||||
<h3>System Health</h3>
|
||||
<div class="health-score {{.HealthSummary.health_score | healthClass}}">
|
||||
{{.HealthSummary.health_score | printf "%.1f"}}
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Status</span>
|
||||
<span class="metric-value">
|
||||
<span class="status-indicator {{.HealthSummary.health_score | statusClass}}"></span>
|
||||
{{.HealthSummary.health_score | healthStatus}}
|
||||
</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Monitor Enabled</span>
|
||||
<span class="metric-value">{{if .HealthSummary.enabled}}✅ Yes{{else}}❌ No{{end}}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Processing Statistics -->
|
||||
<div class="card">
|
||||
<h3>Processing Statistics</h3>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Total Addresses</span>
|
||||
<span class="metric-value">{{.Metrics.TotalAddressesProcessed | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Corruption Detected</span>
|
||||
<span class="metric-value">{{.Metrics.CorruptAddressesDetected | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Corruption Rate</span>
|
||||
<span class="metric-value">{{.HealthSummary.corruption_rate | printf "%.4f%%"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Avg Corruption Score</span>
|
||||
<span class="metric-value">{{.Metrics.AverageCorruptionScore | printf "%.1f"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Max Corruption Score</span>
|
||||
<span class="metric-value">{{.Metrics.MaxCorruptionScore}}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Validation Results -->
|
||||
<div class="card">
|
||||
<h3>Validation Results</h3>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Validation Passed</span>
|
||||
<span class="metric-value">{{.Metrics.AddressValidationPassed | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Validation Failed</span>
|
||||
<span class="metric-value">{{.Metrics.AddressValidationFailed | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Success Rate</span>
|
||||
<span class="metric-value">{{.HealthSummary.validation_success_rate | printf "%.2f%%"}}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Contract Calls -->
|
||||
<div class="card">
|
||||
<h3>Contract Calls</h3>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Successful Calls</span>
|
||||
<span class="metric-value">{{.Metrics.ContractCallsSucceeded | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Failed Calls</span>
|
||||
<span class="metric-value">{{.Metrics.ContractCallsFailed | printf "%,d"}}</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Success Rate</span>
|
||||
<span class="metric-value">{{.HealthSummary.contract_call_success_rate | printf "%.2f%%"}}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Recovery Actions -->
|
||||
<div class="chart-container">
|
||||
<h3>Recovery System Activity</h3>
|
||||
<div class="recovery-actions">
|
||||
<div class="recovery-action">
|
||||
<div class="recovery-action-count">{{.Metrics.RetryOperationsTriggered}}</div>
|
||||
<div class="recovery-action-label">Retry Operations</div>
|
||||
</div>
|
||||
<div class="recovery-action">
|
||||
<div class="recovery-action-count">{{.Metrics.FallbackOperationsUsed}}</div>
|
||||
<div class="recovery-action-label">Fallback Used</div>
|
||||
</div>
|
||||
<div class="recovery-action">
|
||||
<div class="recovery-action-count">{{.Metrics.CircuitBreakersTripped}}</div>
|
||||
<div class="recovery-action-label">Circuit Breakers</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timestamp">
|
||||
Last updated: {{.Timestamp.Format "2006-01-02 15:04:05 UTC"}}
|
||||
<br>
|
||||
Auto-refresh every 30 seconds
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="refresh-indicator">🔄 Live</div>
|
||||
|
||||
<script>
|
||||
// Auto-refresh every 30 seconds
|
||||
setInterval(function() {
|
||||
window.location.reload();
|
||||
}, 30000);
|
||||
|
||||
// Add smooth transitions
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const cards = document.querySelectorAll('.card');
|
||||
cards.forEach((card, index) => {
|
||||
card.style.animationDelay = (index * 0.1) + 's';
|
||||
card.style.animation = 'fadeInUp 0.6s ease forwards';
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
<style>
|
||||
@keyframes fadeInUp {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(20px);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
// Create template with custom functions
|
||||
funcMap := template.FuncMap{
|
||||
"healthClass": func(score interface{}) string {
|
||||
s := score.(float64)
|
||||
if s >= 0.9 {
|
||||
return "health-excellent"
|
||||
} else if s >= 0.7 {
|
||||
return "health-good"
|
||||
} else if s >= 0.5 {
|
||||
return "health-fair"
|
||||
}
|
||||
return "health-poor"
|
||||
},
|
||||
"statusClass": func(score interface{}) string {
|
||||
s := score.(float64)
|
||||
if s >= 0.7 {
|
||||
return "status-healthy"
|
||||
} else if s >= 0.5 {
|
||||
return "status-warning"
|
||||
}
|
||||
return "status-critical"
|
||||
},
|
||||
"healthStatus": func(score interface{}) string {
|
||||
s := score.(float64)
|
||||
if s >= 0.9 {
|
||||
return "Excellent"
|
||||
} else if s >= 0.7 {
|
||||
return "Good"
|
||||
} else if s >= 0.5 {
|
||||
return "Fair"
|
||||
}
|
||||
return "Poor"
|
||||
},
|
||||
}
|
||||
|
||||
return template.Must(template.New("dashboard").Funcs(funcMap).Parse(htmlTemplate))
|
||||
}
|
||||
|
||||
// GetDashboardURL returns the dashboard URL
|
||||
func (ds *DashboardServer) GetDashboardURL() string {
|
||||
return fmt.Sprintf("http://localhost:%d", ds.port)
|
||||
}
|
||||
447
orig/internal/monitoring/health_checker.go
Normal file
447
orig/internal/monitoring/health_checker.go
Normal file
@@ -0,0 +1,447 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fraktal/mev-beta/internal/logger"
|
||||
)
|
||||
|
||||
// HealthCheckRunner performs periodic health checks and monitoring
|
||||
type HealthCheckRunner struct {
|
||||
mu sync.RWMutex
|
||||
logger *logger.Logger
|
||||
integrityMonitor *IntegrityMonitor
|
||||
checkInterval time.Duration
|
||||
running bool
|
||||
stopChan chan struct{}
|
||||
lastHealthCheck time.Time
|
||||
healthHistory []HealthSnapshot
|
||||
maxHistorySize int
|
||||
warmupSamples int
|
||||
minAddressesForAlerts int64
|
||||
}
|
||||
|
||||
// HealthSnapshot represents a point-in-time health snapshot
|
||||
type HealthSnapshot struct {
|
||||
Timestamp time.Time
|
||||
HealthScore float64
|
||||
CorruptionRate float64
|
||||
ValidationSuccess float64
|
||||
ContractCallSuccess float64
|
||||
ActiveAlerts int
|
||||
Trend HealthTrend
|
||||
}
|
||||
|
||||
// HealthTrend indicates the direction of health metrics
|
||||
type HealthTrend int
|
||||
|
||||
const (
|
||||
HealthTrendUnknown HealthTrend = iota
|
||||
HealthTrendImproving
|
||||
HealthTrendStable
|
||||
HealthTrendDeclining
|
||||
HealthTrendCritical
|
||||
)
|
||||
|
||||
func (t HealthTrend) String() string {
|
||||
switch t {
|
||||
case HealthTrendImproving:
|
||||
return "IMPROVING"
|
||||
case HealthTrendStable:
|
||||
return "STABLE"
|
||||
case HealthTrendDeclining:
|
||||
return "DECLINING"
|
||||
case HealthTrendCritical:
|
||||
return "CRITICAL"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
// NewHealthCheckRunner creates a new health check runner
|
||||
func NewHealthCheckRunner(logger *logger.Logger, integrityMonitor *IntegrityMonitor) *HealthCheckRunner {
|
||||
return &HealthCheckRunner{
|
||||
logger: logger,
|
||||
integrityMonitor: integrityMonitor,
|
||||
checkInterval: 30 * time.Second, // Default 30 second intervals
|
||||
stopChan: make(chan struct{}),
|
||||
healthHistory: make([]HealthSnapshot, 0),
|
||||
maxHistorySize: 100, // Keep last 100 snapshots (50 minutes at 30s intervals)
|
||||
warmupSamples: 3,
|
||||
minAddressesForAlerts: 25,
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the periodic health checking routine
|
||||
func (hcr *HealthCheckRunner) Start(ctx context.Context) {
|
||||
hcr.mu.Lock()
|
||||
if hcr.running {
|
||||
hcr.mu.Unlock()
|
||||
return
|
||||
}
|
||||
hcr.running = true
|
||||
hcr.mu.Unlock()
|
||||
|
||||
hcr.logger.Info("Starting health check runner",
|
||||
"interval", hcr.checkInterval)
|
||||
|
||||
go hcr.healthCheckLoop(ctx)
|
||||
}
|
||||
|
||||
// Stop stops the health checking routine
|
||||
func (hcr *HealthCheckRunner) Stop() {
|
||||
hcr.mu.Lock()
|
||||
defer hcr.mu.Unlock()
|
||||
|
||||
if !hcr.running {
|
||||
return
|
||||
}
|
||||
|
||||
hcr.running = false
|
||||
close(hcr.stopChan)
|
||||
hcr.logger.Info("Health check runner stopped")
|
||||
}
|
||||
|
||||
// healthCheckLoop runs the periodic health checking
|
||||
func (hcr *HealthCheckRunner) healthCheckLoop(ctx context.Context) {
|
||||
ticker := time.NewTicker(hcr.checkInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Perform initial health check
|
||||
hcr.performHealthCheck()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
hcr.logger.Info("Health check runner stopped due to context cancellation")
|
||||
return
|
||||
case <-hcr.stopChan:
|
||||
hcr.logger.Info("Health check runner stopped")
|
||||
return
|
||||
case <-ticker.C:
|
||||
hcr.performHealthCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// performHealthCheck executes a comprehensive health check
|
||||
func (hcr *HealthCheckRunner) performHealthCheck() {
|
||||
start := time.Now()
|
||||
hcr.lastHealthCheck = start
|
||||
|
||||
if !hcr.integrityMonitor.IsEnabled() {
|
||||
hcr.logger.Debug("Skipping health check - integrity monitor disabled")
|
||||
return
|
||||
}
|
||||
|
||||
// Get current metrics
|
||||
metrics := hcr.integrityMonitor.GetMetrics()
|
||||
healthSummary := hcr.integrityMonitor.GetHealthSummary()
|
||||
|
||||
// Calculate rates
|
||||
corruptionRate := 0.0
|
||||
if metrics.TotalAddressesProcessed > 0 {
|
||||
corruptionRate = float64(metrics.CorruptAddressesDetected) / float64(metrics.TotalAddressesProcessed)
|
||||
}
|
||||
|
||||
validationSuccessRate := 0.0
|
||||
totalValidations := metrics.AddressValidationPassed + metrics.AddressValidationFailed
|
||||
if totalValidations > 0 {
|
||||
validationSuccessRate = float64(metrics.AddressValidationPassed) / float64(totalValidations)
|
||||
}
|
||||
|
||||
contractCallSuccessRate := 0.0
|
||||
totalCalls := metrics.ContractCallsSucceeded + metrics.ContractCallsFailed
|
||||
if totalCalls > 0 {
|
||||
contractCallSuccessRate = float64(metrics.ContractCallsSucceeded) / float64(totalCalls)
|
||||
}
|
||||
|
||||
// Create health snapshot
|
||||
snapshot := HealthSnapshot{
|
||||
Timestamp: start,
|
||||
HealthScore: metrics.HealthScore,
|
||||
CorruptionRate: corruptionRate,
|
||||
ValidationSuccess: validationSuccessRate,
|
||||
ContractCallSuccess: contractCallSuccessRate,
|
||||
ActiveAlerts: 0, // Will be calculated based on current conditions
|
||||
Trend: hcr.calculateHealthTrend(metrics.HealthScore),
|
||||
}
|
||||
|
||||
// Add to history
|
||||
hcr.addHealthSnapshot(snapshot)
|
||||
|
||||
// Check for threshold violations and generate alerts
|
||||
hcr.checkThresholds(healthSummary, snapshot)
|
||||
|
||||
// Log health status periodically
|
||||
hcr.logHealthStatus(snapshot, time.Since(start))
|
||||
}
|
||||
|
||||
// addHealthSnapshot adds a snapshot to the health history
|
||||
func (hcr *HealthCheckRunner) addHealthSnapshot(snapshot HealthSnapshot) {
|
||||
hcr.mu.Lock()
|
||||
defer hcr.mu.Unlock()
|
||||
|
||||
hcr.healthHistory = append(hcr.healthHistory, snapshot)
|
||||
|
||||
// Trim history if it exceeds max size
|
||||
if len(hcr.healthHistory) > hcr.maxHistorySize {
|
||||
hcr.healthHistory = hcr.healthHistory[len(hcr.healthHistory)-hcr.maxHistorySize:]
|
||||
}
|
||||
}
|
||||
|
||||
// calculateHealthTrend analyzes recent health scores to determine trend
|
||||
func (hcr *HealthCheckRunner) calculateHealthTrend(currentScore float64) HealthTrend {
|
||||
hcr.mu.RLock()
|
||||
defer hcr.mu.RUnlock()
|
||||
|
||||
if len(hcr.healthHistory) < 3 {
|
||||
return HealthTrendUnknown
|
||||
}
|
||||
|
||||
// Get last few scores for trend analysis
|
||||
recentScores := make([]float64, 0, 5)
|
||||
start := len(hcr.healthHistory) - 5
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
for i := start; i < len(hcr.healthHistory); i++ {
|
||||
recentScores = append(recentScores, hcr.healthHistory[i].HealthScore)
|
||||
}
|
||||
recentScores = append(recentScores, currentScore)
|
||||
|
||||
// Calculate trend
|
||||
if currentScore < 0.5 {
|
||||
return HealthTrendCritical
|
||||
}
|
||||
|
||||
// Simple linear trend calculation
|
||||
if len(recentScores) >= 3 {
|
||||
first := recentScores[0]
|
||||
last := recentScores[len(recentScores)-1]
|
||||
diff := last - first
|
||||
|
||||
if diff > 0.05 {
|
||||
return HealthTrendImproving
|
||||
} else if diff < -0.05 {
|
||||
return HealthTrendDeclining
|
||||
} else {
|
||||
return HealthTrendStable
|
||||
}
|
||||
}
|
||||
|
||||
return HealthTrendUnknown
|
||||
}
|
||||
|
||||
// checkThresholds checks for threshold violations and generates alerts
|
||||
func (hcr *HealthCheckRunner) checkThresholds(healthSummary map[string]interface{}, snapshot HealthSnapshot) {
|
||||
if !hcr.readyForAlerts(healthSummary, snapshot) {
|
||||
hcr.logger.Debug("Health alerts suppressed during warm-up",
|
||||
"health_score", snapshot.HealthScore,
|
||||
"total_addresses_processed", safeNumericLookup(healthSummary, "total_addresses_processed"),
|
||||
"history_size", hcr.historySize())
|
||||
return
|
||||
}
|
||||
|
||||
// Critical health score alert
|
||||
if snapshot.HealthScore < 0.5 {
|
||||
alert := CorruptionAlert{
|
||||
Timestamp: time.Now(),
|
||||
Severity: AlertSeverityEmergency,
|
||||
Message: fmt.Sprintf("CRITICAL: System health score is %.2f (below 0.5)", snapshot.HealthScore),
|
||||
Context: map[string]interface{}{
|
||||
"health_score": snapshot.HealthScore,
|
||||
"corruption_rate": snapshot.CorruptionRate,
|
||||
"validation_success": snapshot.ValidationSuccess,
|
||||
"contract_call_success": snapshot.ContractCallSuccess,
|
||||
"trend": snapshot.Trend.String(),
|
||||
},
|
||||
}
|
||||
hcr.integrityMonitor.sendAlert(alert)
|
||||
}
|
||||
|
||||
// High corruption rate alert
|
||||
if snapshot.CorruptionRate > 0.10 { // 10% corruption rate
|
||||
alert := CorruptionAlert{
|
||||
Timestamp: time.Now(),
|
||||
Severity: AlertSeverityCritical,
|
||||
Message: fmt.Sprintf("High corruption rate detected: %.2f%%", snapshot.CorruptionRate*100),
|
||||
Context: map[string]interface{}{
|
||||
"corruption_rate": snapshot.CorruptionRate,
|
||||
"threshold": 0.10,
|
||||
"addresses_affected": snapshot.CorruptionRate,
|
||||
},
|
||||
}
|
||||
hcr.integrityMonitor.sendAlert(alert)
|
||||
}
|
||||
|
||||
// Declining trend alert
|
||||
if snapshot.Trend == HealthTrendDeclining || snapshot.Trend == HealthTrendCritical {
|
||||
alert := CorruptionAlert{
|
||||
Timestamp: time.Now(),
|
||||
Severity: AlertSeverityWarning,
|
||||
Message: fmt.Sprintf("System health trend is %s (current score: %.2f)", snapshot.Trend.String(), snapshot.HealthScore),
|
||||
Context: map[string]interface{}{
|
||||
"trend": snapshot.Trend.String(),
|
||||
"health_score": snapshot.HealthScore,
|
||||
"recent_snapshots": hcr.getRecentSnapshots(5),
|
||||
},
|
||||
}
|
||||
hcr.integrityMonitor.sendAlert(alert)
|
||||
}
|
||||
}
|
||||
|
||||
func (hcr *HealthCheckRunner) readyForAlerts(healthSummary map[string]interface{}, snapshot HealthSnapshot) bool {
|
||||
hcr.mu.RLock()
|
||||
historyLen := len(hcr.healthHistory)
|
||||
hcr.mu.RUnlock()
|
||||
|
||||
if historyLen < hcr.warmupSamples {
|
||||
return false
|
||||
}
|
||||
|
||||
totalProcessed := safeNumericLookup(healthSummary, "total_addresses_processed")
|
||||
if totalProcessed >= 0 && totalProcessed < float64(hcr.minAddressesForAlerts) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Require at least one validation or contract call attempt before alarming.
|
||||
if snapshot.ValidationSuccess == 0 && snapshot.ContractCallSuccess == 0 && totalProcessed == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func safeNumericLookup(summary map[string]interface{}, key string) float64 {
|
||||
if summary == nil {
|
||||
return -1
|
||||
}
|
||||
|
||||
value, ok := summary[key]
|
||||
if !ok {
|
||||
return -1
|
||||
}
|
||||
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return float64(v)
|
||||
case int32:
|
||||
return float64(v)
|
||||
case int64:
|
||||
return float64(v)
|
||||
case uint:
|
||||
return float64(v)
|
||||
case uint32:
|
||||
return float64(v)
|
||||
case uint64:
|
||||
return float64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case float64:
|
||||
return v
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
func (hcr *HealthCheckRunner) historySize() int {
|
||||
hcr.mu.RLock()
|
||||
defer hcr.mu.RUnlock()
|
||||
return len(hcr.healthHistory)
|
||||
}
|
||||
|
||||
// logHealthStatus logs periodic health status information
|
||||
func (hcr *HealthCheckRunner) logHealthStatus(snapshot HealthSnapshot, duration time.Duration) {
|
||||
// Log detailed status every 5 minutes (10 checks at 30s intervals)
|
||||
if len(hcr.healthHistory)%10 == 0 {
|
||||
hcr.logger.Info("System health status",
|
||||
"health_score", snapshot.HealthScore,
|
||||
"corruption_rate", fmt.Sprintf("%.4f", snapshot.CorruptionRate),
|
||||
"validation_success", fmt.Sprintf("%.4f", snapshot.ValidationSuccess),
|
||||
"contract_call_success", fmt.Sprintf("%.4f", snapshot.ContractCallSuccess),
|
||||
"trend", snapshot.Trend.String(),
|
||||
"check_duration", duration)
|
||||
} else {
|
||||
// Brief status for regular checks
|
||||
hcr.logger.Debug("Health check completed",
|
||||
"health_score", snapshot.HealthScore,
|
||||
"trend", snapshot.Trend.String(),
|
||||
"duration", duration)
|
||||
}
|
||||
}
|
||||
|
||||
// GetRecentSnapshots returns the most recent health snapshots
|
||||
func (hcr *HealthCheckRunner) GetRecentSnapshots(count int) []HealthSnapshot {
|
||||
return hcr.getRecentSnapshots(count)
|
||||
}
|
||||
|
||||
// getRecentSnapshots internal implementation
|
||||
func (hcr *HealthCheckRunner) getRecentSnapshots(count int) []HealthSnapshot {
|
||||
hcr.mu.RLock()
|
||||
defer hcr.mu.RUnlock()
|
||||
|
||||
if len(hcr.healthHistory) == 0 {
|
||||
return []HealthSnapshot{}
|
||||
}
|
||||
|
||||
start := len(hcr.healthHistory) - count
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
// Create a copy to avoid external modification
|
||||
snapshots := make([]HealthSnapshot, len(hcr.healthHistory[start:]))
|
||||
copy(snapshots, hcr.healthHistory[start:])
|
||||
|
||||
return snapshots
|
||||
}
|
||||
|
||||
// GetHealthSummary returns a comprehensive health summary
|
||||
func (hcr *HealthCheckRunner) GetHealthSummary() map[string]interface{} {
|
||||
hcr.mu.RLock()
|
||||
defer hcr.mu.RUnlock()
|
||||
|
||||
if len(hcr.healthHistory) == 0 {
|
||||
return map[string]interface{}{
|
||||
"running": hcr.running,
|
||||
"check_interval": hcr.checkInterval.String(),
|
||||
"history_size": 0,
|
||||
"last_check": nil,
|
||||
}
|
||||
}
|
||||
|
||||
lastSnapshot := hcr.healthHistory[len(hcr.healthHistory)-1]
|
||||
|
||||
return map[string]interface{}{
|
||||
"running": hcr.running,
|
||||
"check_interval": hcr.checkInterval.String(),
|
||||
"history_size": len(hcr.healthHistory),
|
||||
"last_check": hcr.lastHealthCheck,
|
||||
"current_health_score": lastSnapshot.HealthScore,
|
||||
"current_trend": lastSnapshot.Trend.String(),
|
||||
"corruption_rate": lastSnapshot.CorruptionRate,
|
||||
"validation_success": lastSnapshot.ValidationSuccess,
|
||||
"contract_call_success": lastSnapshot.ContractCallSuccess,
|
||||
"recent_snapshots": hcr.getRecentSnapshots(10),
|
||||
}
|
||||
}
|
||||
|
||||
// SetCheckInterval sets the health check interval
|
||||
func (hcr *HealthCheckRunner) SetCheckInterval(interval time.Duration) {
|
||||
hcr.mu.Lock()
|
||||
defer hcr.mu.Unlock()
|
||||
hcr.checkInterval = interval
|
||||
hcr.logger.Info("Health check interval updated", "interval", interval)
|
||||
}
|
||||
|
||||
// IsRunning returns whether the health checker is running
|
||||
func (hcr *HealthCheckRunner) IsRunning() bool {
|
||||
hcr.mu.RLock()
|
||||
defer hcr.mu.RUnlock()
|
||||
return hcr.running
|
||||
}
|
||||
533
orig/internal/monitoring/integrity_monitor.go
Normal file
533
orig/internal/monitoring/integrity_monitor.go
Normal file
@@ -0,0 +1,533 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
|
||||
"github.com/fraktal/mev-beta/internal/logger"
|
||||
"github.com/fraktal/mev-beta/internal/recovery"
|
||||
)
|
||||
|
||||
// IntegrityMetrics tracks data integrity statistics
|
||||
type IntegrityMetrics struct {
|
||||
mu sync.RWMutex
|
||||
TotalAddressesProcessed int64
|
||||
CorruptAddressesDetected int64
|
||||
AddressValidationPassed int64
|
||||
AddressValidationFailed int64
|
||||
ContractCallsSucceeded int64
|
||||
ContractCallsFailed int64
|
||||
RetryOperationsTriggered int64
|
||||
FallbackOperationsUsed int64
|
||||
CircuitBreakersTripped int64
|
||||
LastCorruptionDetection time.Time
|
||||
AverageCorruptionScore float64
|
||||
MaxCorruptionScore int
|
||||
HealthScore float64
|
||||
HighScore float64
|
||||
RecoveryActions map[recovery.RecoveryAction]int64
|
||||
ErrorsByType map[recovery.ErrorType]int64
|
||||
}
|
||||
|
||||
// MetricsSnapshot represents a copy of metrics without mutex for safe external access
|
||||
type MetricsSnapshot struct {
|
||||
TotalAddressesProcessed int64 `json:"total_addresses_processed"`
|
||||
CorruptAddressesDetected int64 `json:"corrupt_addresses_detected"`
|
||||
AddressValidationPassed int64 `json:"address_validation_passed"`
|
||||
AddressValidationFailed int64 `json:"address_validation_failed"`
|
||||
ContractCallsSucceeded int64 `json:"contract_calls_succeeded"`
|
||||
ContractCallsFailed int64 `json:"contract_calls_failed"`
|
||||
RetryOperationsTriggered int64 `json:"retry_operations_triggered"`
|
||||
FallbackOperationsUsed int64 `json:"fallback_operations_used"`
|
||||
CircuitBreakersTripped int64 `json:"circuit_breakers_tripped"`
|
||||
LastCorruptionDetection time.Time `json:"last_corruption_detection"`
|
||||
AverageCorruptionScore float64 `json:"average_corruption_score"`
|
||||
MaxCorruptionScore int `json:"max_corruption_score"`
|
||||
HealthScore float64 `json:"health_score"`
|
||||
HighScore float64 `json:"high_score"`
|
||||
RecoveryActions map[recovery.RecoveryAction]int64 `json:"recovery_actions"`
|
||||
ErrorsByType map[recovery.ErrorType]int64 `json:"errors_by_type"`
|
||||
}
|
||||
|
||||
// CorruptionAlert represents a corruption detection alert
|
||||
type CorruptionAlert struct {
|
||||
Timestamp time.Time
|
||||
Address common.Address
|
||||
CorruptionScore int
|
||||
Source string
|
||||
Severity AlertSeverity
|
||||
Message string
|
||||
Context map[string]interface{}
|
||||
}
|
||||
|
||||
// AlertSeverity defines alert severity levels
|
||||
type AlertSeverity int
|
||||
|
||||
const (
|
||||
AlertSeverityInfo AlertSeverity = iota
|
||||
AlertSeverityWarning
|
||||
AlertSeverityCritical
|
||||
AlertSeverityEmergency
|
||||
)
|
||||
|
||||
func (s AlertSeverity) String() string {
|
||||
switch s {
|
||||
case AlertSeverityInfo:
|
||||
return "INFO"
|
||||
case AlertSeverityWarning:
|
||||
return "WARNING"
|
||||
case AlertSeverityCritical:
|
||||
return "CRITICAL"
|
||||
case AlertSeverityEmergency:
|
||||
return "EMERGENCY"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
// IntegrityMonitor monitors and tracks data integrity metrics
|
||||
type IntegrityMonitor struct {
|
||||
mu sync.RWMutex
|
||||
logger *logger.Logger
|
||||
metrics *IntegrityMetrics
|
||||
alertThresholds map[string]float64
|
||||
alertSubscribers []AlertSubscriber
|
||||
healthCheckRunner *HealthCheckRunner
|
||||
enabled bool
|
||||
alerts []CorruptionAlert
|
||||
alertsMutex sync.RWMutex
|
||||
}
|
||||
|
||||
// AlertSubscriber defines the interface for alert handlers
|
||||
type AlertSubscriber interface {
|
||||
HandleAlert(alert CorruptionAlert) error
|
||||
}
|
||||
|
||||
// NewIntegrityMonitor creates a new integrity monitoring system
|
||||
func NewIntegrityMonitor(logger *logger.Logger) *IntegrityMonitor {
|
||||
monitor := &IntegrityMonitor{
|
||||
logger: logger,
|
||||
metrics: &IntegrityMetrics{
|
||||
RecoveryActions: make(map[recovery.RecoveryAction]int64),
|
||||
ErrorsByType: make(map[recovery.ErrorType]int64),
|
||||
HealthScore: 1.0,
|
||||
HighScore: 1.0,
|
||||
},
|
||||
alertThresholds: make(map[string]float64),
|
||||
enabled: true,
|
||||
alerts: make([]CorruptionAlert, 0, 256),
|
||||
}
|
||||
|
||||
// Set default thresholds
|
||||
monitor.setDefaultThresholds()
|
||||
|
||||
// Initialize health check runner
|
||||
monitor.healthCheckRunner = NewHealthCheckRunner(logger, monitor)
|
||||
|
||||
return monitor
|
||||
}
|
||||
|
||||
// setDefaultThresholds configures default alert thresholds
|
||||
func (im *IntegrityMonitor) setDefaultThresholds() {
|
||||
im.alertThresholds["corruption_rate"] = 0.05 // 5% corruption rate
|
||||
im.alertThresholds["failure_rate"] = 0.10 // 10% failure rate
|
||||
im.alertThresholds["health_score_min"] = 0.80 // 80% minimum health
|
||||
im.alertThresholds["max_corruption_score"] = 70.0 // Maximum individual corruption score
|
||||
im.alertThresholds["circuit_breaker_rate"] = 0.02 // 2% circuit breaker rate
|
||||
}
|
||||
|
||||
// RecordAddressProcessed increments the counter for processed addresses
|
||||
func (im *IntegrityMonitor) RecordAddressProcessed() {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
im.metrics.TotalAddressesProcessed++
|
||||
im.metrics.mu.Unlock()
|
||||
|
||||
im.updateHealthScore()
|
||||
}
|
||||
|
||||
// RecordCorruptionDetected records a corruption detection event
|
||||
func (im *IntegrityMonitor) RecordCorruptionDetected(address common.Address, corruptionScore int, source string) {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
im.metrics.CorruptAddressesDetected++
|
||||
im.metrics.LastCorruptionDetection = time.Now()
|
||||
|
||||
// Update corruption statistics
|
||||
if corruptionScore > im.metrics.MaxCorruptionScore {
|
||||
im.metrics.MaxCorruptionScore = corruptionScore
|
||||
}
|
||||
|
||||
// Calculate rolling average corruption score
|
||||
total := float64(im.metrics.CorruptAddressesDetected)
|
||||
im.metrics.AverageCorruptionScore = ((im.metrics.AverageCorruptionScore * (total - 1)) + float64(corruptionScore)) / total
|
||||
im.metrics.mu.Unlock()
|
||||
|
||||
// Generate alert based on corruption score
|
||||
severity := im.getCorruptionSeverity(corruptionScore)
|
||||
alert := CorruptionAlert{
|
||||
Timestamp: time.Now(),
|
||||
Address: address,
|
||||
CorruptionScore: corruptionScore,
|
||||
Source: source,
|
||||
Severity: severity,
|
||||
Message: fmt.Sprintf("Corruption detected: address %s, score %d, source %s", address.Hex(), corruptionScore, source),
|
||||
Context: map[string]interface{}{
|
||||
"address": address.Hex(),
|
||||
"corruption_score": corruptionScore,
|
||||
"source": source,
|
||||
"timestamp": time.Now().Unix(),
|
||||
},
|
||||
}
|
||||
|
||||
im.sendAlert(alert)
|
||||
im.updateHealthScore()
|
||||
|
||||
im.logger.Warn("Corruption detected",
|
||||
"address", address.Hex(),
|
||||
"corruption_score", corruptionScore,
|
||||
"source", source,
|
||||
"severity", severity.String())
|
||||
}
|
||||
|
||||
// RecordValidationResult records address validation results
|
||||
func (im *IntegrityMonitor) RecordValidationResult(passed bool) {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
if passed {
|
||||
im.metrics.AddressValidationPassed++
|
||||
} else {
|
||||
im.metrics.AddressValidationFailed++
|
||||
}
|
||||
im.metrics.mu.Unlock()
|
||||
|
||||
im.updateHealthScore()
|
||||
}
|
||||
|
||||
// RecordContractCallResult records contract call success/failure
|
||||
func (im *IntegrityMonitor) RecordContractCallResult(succeeded bool) {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
if succeeded {
|
||||
im.metrics.ContractCallsSucceeded++
|
||||
} else {
|
||||
im.metrics.ContractCallsFailed++
|
||||
}
|
||||
im.metrics.mu.Unlock()
|
||||
|
||||
im.updateHealthScore()
|
||||
}
|
||||
|
||||
// RecordRecoveryAction records recovery action usage
|
||||
func (im *IntegrityMonitor) RecordRecoveryAction(action recovery.RecoveryAction) {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
im.metrics.RecoveryActions[action]++
|
||||
|
||||
// Track specific metrics
|
||||
switch action {
|
||||
case recovery.ActionRetryWithBackoff:
|
||||
im.metrics.RetryOperationsTriggered++
|
||||
case recovery.ActionUseFallbackData:
|
||||
im.metrics.FallbackOperationsUsed++
|
||||
case recovery.ActionCircuitBreaker:
|
||||
im.metrics.CircuitBreakersTripped++
|
||||
}
|
||||
im.metrics.mu.Unlock()
|
||||
|
||||
im.updateHealthScore()
|
||||
}
|
||||
|
||||
// RecordErrorType records error by type
|
||||
func (im *IntegrityMonitor) RecordErrorType(errorType recovery.ErrorType) {
|
||||
if !im.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
im.metrics.mu.Lock()
|
||||
im.metrics.ErrorsByType[errorType]++
|
||||
im.metrics.mu.Unlock()
|
||||
}
|
||||
|
||||
// getCorruptionSeverity determines alert severity based on corruption score
|
||||
func (im *IntegrityMonitor) getCorruptionSeverity(corruptionScore int) AlertSeverity {
|
||||
if corruptionScore >= 90 {
|
||||
return AlertSeverityEmergency
|
||||
} else if corruptionScore >= 70 {
|
||||
return AlertSeverityCritical
|
||||
} else if corruptionScore >= 40 {
|
||||
return AlertSeverityWarning
|
||||
}
|
||||
return AlertSeverityInfo
|
||||
}
|
||||
|
||||
// updateHealthScore calculates overall system health score
|
||||
func (im *IntegrityMonitor) updateHealthScore() {
|
||||
im.metrics.mu.Lock()
|
||||
defer im.metrics.mu.Unlock()
|
||||
|
||||
if im.metrics.TotalAddressesProcessed == 0 {
|
||||
im.metrics.HealthScore = 1.0
|
||||
return
|
||||
}
|
||||
|
||||
// Calculate component scores
|
||||
corruptionRate := float64(im.metrics.CorruptAddressesDetected) / float64(im.metrics.TotalAddressesProcessed)
|
||||
|
||||
var validationSuccessRate float64 = 1.0
|
||||
validationTotal := im.metrics.AddressValidationPassed + im.metrics.AddressValidationFailed
|
||||
if validationTotal > 0 {
|
||||
validationSuccessRate = float64(im.metrics.AddressValidationPassed) / float64(validationTotal)
|
||||
}
|
||||
|
||||
var contractCallSuccessRate float64 = 1.0
|
||||
contractTotal := im.metrics.ContractCallsSucceeded + im.metrics.ContractCallsFailed
|
||||
if contractTotal > 0 {
|
||||
contractCallSuccessRate = float64(im.metrics.ContractCallsSucceeded) / float64(contractTotal)
|
||||
}
|
||||
|
||||
// Weighted health score calculation
|
||||
healthScore := 0.0
|
||||
healthScore += (1.0 - corruptionRate) * 0.4 // 40% weight on corruption prevention
|
||||
healthScore += validationSuccessRate * 0.3 // 30% weight on validation success
|
||||
healthScore += contractCallSuccessRate * 0.3 // 30% weight on contract call success
|
||||
|
||||
// Cap at 1.0 and handle edge cases
|
||||
if healthScore > 1.0 {
|
||||
healthScore = 1.0
|
||||
} else if healthScore < 0.0 {
|
||||
healthScore = 0.0
|
||||
}
|
||||
|
||||
im.metrics.HealthScore = healthScore
|
||||
if healthScore > im.metrics.HighScore {
|
||||
im.metrics.HighScore = healthScore
|
||||
}
|
||||
|
||||
// Check for health score threshold alerts
|
||||
if healthScore < im.alertThresholds["health_score_min"] {
|
||||
alert := CorruptionAlert{
|
||||
Timestamp: time.Now(),
|
||||
Severity: AlertSeverityCritical,
|
||||
Message: fmt.Sprintf("System health score dropped to %.2f (threshold: %.2f)", healthScore, im.alertThresholds["health_score_min"]),
|
||||
Context: map[string]interface{}{
|
||||
"health_score": healthScore,
|
||||
"threshold": im.alertThresholds["health_score_min"],
|
||||
"corruption_rate": corruptionRate,
|
||||
"validation_success": validationSuccessRate,
|
||||
"contract_call_success": contractCallSuccessRate,
|
||||
},
|
||||
}
|
||||
im.sendAlert(alert)
|
||||
}
|
||||
}
|
||||
|
||||
// sendAlert sends alerts to all subscribers
|
||||
func (im *IntegrityMonitor) sendAlert(alert CorruptionAlert) {
|
||||
im.alertsMutex.Lock()
|
||||
im.alerts = append(im.alerts, alert)
|
||||
if len(im.alerts) > 1000 {
|
||||
trimmed := make([]CorruptionAlert, 1000)
|
||||
copy(trimmed, im.alerts[len(im.alerts)-1000:])
|
||||
im.alerts = trimmed
|
||||
}
|
||||
im.alertsMutex.Unlock()
|
||||
|
||||
for _, subscriber := range im.alertSubscribers {
|
||||
if err := subscriber.HandleAlert(alert); err != nil {
|
||||
im.logger.Error("Failed to send alert",
|
||||
"subscriber", fmt.Sprintf("%T", subscriber),
|
||||
"error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddAlertSubscriber adds an alert subscriber
|
||||
func (im *IntegrityMonitor) AddAlertSubscriber(subscriber AlertSubscriber) {
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
im.alertSubscribers = append(im.alertSubscribers, subscriber)
|
||||
}
|
||||
|
||||
// GetMetrics returns a copy of current metrics
|
||||
func (im *IntegrityMonitor) GetMetrics() MetricsSnapshot {
|
||||
im.metrics.mu.RLock()
|
||||
defer im.metrics.mu.RUnlock()
|
||||
|
||||
// Create a deep copy
|
||||
metrics := IntegrityMetrics{
|
||||
TotalAddressesProcessed: im.metrics.TotalAddressesProcessed,
|
||||
CorruptAddressesDetected: im.metrics.CorruptAddressesDetected,
|
||||
AddressValidationPassed: im.metrics.AddressValidationPassed,
|
||||
AddressValidationFailed: im.metrics.AddressValidationFailed,
|
||||
ContractCallsSucceeded: im.metrics.ContractCallsSucceeded,
|
||||
ContractCallsFailed: im.metrics.ContractCallsFailed,
|
||||
RetryOperationsTriggered: im.metrics.RetryOperationsTriggered,
|
||||
FallbackOperationsUsed: im.metrics.FallbackOperationsUsed,
|
||||
CircuitBreakersTripped: im.metrics.CircuitBreakersTripped,
|
||||
LastCorruptionDetection: im.metrics.LastCorruptionDetection,
|
||||
AverageCorruptionScore: im.metrics.AverageCorruptionScore,
|
||||
MaxCorruptionScore: im.metrics.MaxCorruptionScore,
|
||||
HealthScore: im.metrics.HealthScore,
|
||||
HighScore: im.metrics.HighScore,
|
||||
RecoveryActions: make(map[recovery.RecoveryAction]int64),
|
||||
ErrorsByType: make(map[recovery.ErrorType]int64),
|
||||
}
|
||||
|
||||
// Copy maps
|
||||
for k, v := range im.metrics.RecoveryActions {
|
||||
metrics.RecoveryActions[k] = v
|
||||
}
|
||||
for k, v := range im.metrics.ErrorsByType {
|
||||
metrics.ErrorsByType[k] = v
|
||||
}
|
||||
|
||||
// Return a safe copy without mutex
|
||||
return MetricsSnapshot{
|
||||
TotalAddressesProcessed: metrics.TotalAddressesProcessed,
|
||||
CorruptAddressesDetected: metrics.CorruptAddressesDetected,
|
||||
AddressValidationPassed: metrics.AddressValidationPassed,
|
||||
AddressValidationFailed: metrics.AddressValidationFailed,
|
||||
ContractCallsSucceeded: metrics.ContractCallsSucceeded,
|
||||
ContractCallsFailed: metrics.ContractCallsFailed,
|
||||
RetryOperationsTriggered: metrics.RetryOperationsTriggered,
|
||||
FallbackOperationsUsed: metrics.FallbackOperationsUsed,
|
||||
CircuitBreakersTripped: metrics.CircuitBreakersTripped,
|
||||
LastCorruptionDetection: metrics.LastCorruptionDetection,
|
||||
AverageCorruptionScore: metrics.AverageCorruptionScore,
|
||||
MaxCorruptionScore: metrics.MaxCorruptionScore,
|
||||
HealthScore: metrics.HealthScore,
|
||||
HighScore: metrics.HighScore,
|
||||
RecoveryActions: metrics.RecoveryActions,
|
||||
ErrorsByType: metrics.ErrorsByType,
|
||||
}
|
||||
}
|
||||
|
||||
// GetHealthSummary returns a comprehensive health summary
|
||||
func (im *IntegrityMonitor) GetHealthSummary() map[string]interface{} {
|
||||
metrics := im.GetMetrics()
|
||||
|
||||
corruptionRate := 0.0
|
||||
if metrics.TotalAddressesProcessed > 0 {
|
||||
corruptionRate = float64(metrics.CorruptAddressesDetected) / float64(metrics.TotalAddressesProcessed)
|
||||
}
|
||||
|
||||
validationSuccessRate := 0.0
|
||||
totalValidations := metrics.AddressValidationPassed + metrics.AddressValidationFailed
|
||||
if totalValidations > 0 {
|
||||
validationSuccessRate = float64(metrics.AddressValidationPassed) / float64(totalValidations)
|
||||
}
|
||||
|
||||
contractCallSuccessRate := 0.0
|
||||
totalCalls := metrics.ContractCallsSucceeded + metrics.ContractCallsFailed
|
||||
if totalCalls > 0 {
|
||||
contractCallSuccessRate = float64(metrics.ContractCallsSucceeded) / float64(totalCalls)
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"enabled": im.enabled,
|
||||
"health_score": metrics.HealthScore,
|
||||
"total_addresses_processed": metrics.TotalAddressesProcessed,
|
||||
"corruption_detections": metrics.CorruptAddressesDetected,
|
||||
"corruption_rate": corruptionRate,
|
||||
"validation_success_rate": validationSuccessRate,
|
||||
"contract_call_success_rate": contractCallSuccessRate,
|
||||
"average_corruption_score": metrics.AverageCorruptionScore,
|
||||
"max_corruption_score": metrics.MaxCorruptionScore,
|
||||
"retry_operations": metrics.RetryOperationsTriggered,
|
||||
"fallback_operations": metrics.FallbackOperationsUsed,
|
||||
"circuit_breakers_tripped": metrics.CircuitBreakersTripped,
|
||||
"last_corruption": metrics.LastCorruptionDetection,
|
||||
"recovery_actions": metrics.RecoveryActions,
|
||||
"errors_by_type": metrics.ErrorsByType,
|
||||
"alert_thresholds": im.alertThresholds,
|
||||
"alert_subscribers": len(im.alertSubscribers),
|
||||
}
|
||||
}
|
||||
|
||||
// GetRecentAlerts returns the most recent corruption alerts up to the specified limit.
|
||||
func (im *IntegrityMonitor) GetRecentAlerts(limit int) []CorruptionAlert {
|
||||
im.alertsMutex.RLock()
|
||||
defer im.alertsMutex.RUnlock()
|
||||
|
||||
if limit <= 0 || limit > len(im.alerts) {
|
||||
limit = len(im.alerts)
|
||||
}
|
||||
|
||||
if limit == 0 {
|
||||
return []CorruptionAlert{}
|
||||
}
|
||||
|
||||
start := len(im.alerts) - limit
|
||||
alertsCopy := make([]CorruptionAlert, limit)
|
||||
copy(alertsCopy, im.alerts[start:])
|
||||
return alertsCopy
|
||||
}
|
||||
|
||||
// SetThreshold sets an alert threshold
|
||||
func (im *IntegrityMonitor) SetThreshold(name string, value float64) {
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
im.alertThresholds[name] = value
|
||||
}
|
||||
|
||||
// Enable enables the integrity monitor
|
||||
func (im *IntegrityMonitor) Enable() {
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
im.enabled = true
|
||||
im.logger.Info("Integrity monitor enabled")
|
||||
}
|
||||
|
||||
// Disable disables the integrity monitor
|
||||
func (im *IntegrityMonitor) Disable() {
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
im.enabled = false
|
||||
im.logger.Info("Integrity monitor disabled")
|
||||
}
|
||||
|
||||
// IsEnabled returns whether the monitor is enabled
|
||||
func (im *IntegrityMonitor) IsEnabled() bool {
|
||||
im.mu.RLock()
|
||||
defer im.mu.RUnlock()
|
||||
return im.enabled
|
||||
}
|
||||
|
||||
// StartHealthCheckRunner starts the periodic health check routine
|
||||
func (im *IntegrityMonitor) StartHealthCheckRunner(ctx context.Context) {
|
||||
if im.healthCheckRunner != nil {
|
||||
im.healthCheckRunner.Start(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// StopHealthCheckRunner stops the periodic health check routine
|
||||
func (im *IntegrityMonitor) StopHealthCheckRunner() {
|
||||
if im.healthCheckRunner != nil {
|
||||
im.healthCheckRunner.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// GetHealthCheckRunner returns the health check runner
|
||||
func (im *IntegrityMonitor) GetHealthCheckRunner() *HealthCheckRunner {
|
||||
return im.healthCheckRunner
|
||||
}
|
||||
391
orig/internal/monitoring/integrity_monitor_test.go
Normal file
391
orig/internal/monitoring/integrity_monitor_test.go
Normal file
@@ -0,0 +1,391 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/fraktal/mev-beta/internal/logger"
|
||||
"github.com/fraktal/mev-beta/internal/recovery"
|
||||
)
|
||||
|
||||
// MockAlertSubscriber for testing
|
||||
type MockAlertSubscriber struct {
|
||||
alerts []CorruptionAlert
|
||||
}
|
||||
|
||||
func (m *MockAlertSubscriber) HandleAlert(alert CorruptionAlert) error {
|
||||
m.alerts = append(m.alerts, alert)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockAlertSubscriber) GetAlerts() []CorruptionAlert {
|
||||
return m.alerts
|
||||
}
|
||||
|
||||
func (m *MockAlertSubscriber) Reset() {
|
||||
m.alerts = nil
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_RecordCorruptionDetected(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
mockSubscriber := &MockAlertSubscriber{}
|
||||
monitor.AddAlertSubscriber(mockSubscriber)
|
||||
|
||||
// Test various corruption scenarios
|
||||
testCases := []struct {
|
||||
name string
|
||||
address string
|
||||
corruptionScore int
|
||||
source string
|
||||
expectedSeverity AlertSeverity
|
||||
}{
|
||||
{
|
||||
name: "Low corruption",
|
||||
address: "0x1234567890123456789012345678901234567890",
|
||||
corruptionScore: 30,
|
||||
source: "test_source",
|
||||
expectedSeverity: AlertSeverityInfo,
|
||||
},
|
||||
{
|
||||
name: "Medium corruption",
|
||||
address: "0x1234000000000000000000000000000000000000",
|
||||
corruptionScore: 50,
|
||||
source: "token_extraction",
|
||||
expectedSeverity: AlertSeverityWarning,
|
||||
},
|
||||
{
|
||||
name: "High corruption",
|
||||
address: "0x0000001000000000000000000000000000000000",
|
||||
corruptionScore: 80,
|
||||
source: "abi_decoder",
|
||||
expectedSeverity: AlertSeverityCritical,
|
||||
},
|
||||
{
|
||||
name: "Critical corruption - TOKEN_0x000000",
|
||||
address: "0x0000000300000000000000000000000000000000",
|
||||
corruptionScore: 100,
|
||||
source: "generic_extraction",
|
||||
expectedSeverity: AlertSeverityEmergency,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
mockSubscriber.Reset()
|
||||
|
||||
addr := common.HexToAddress(tc.address)
|
||||
monitor.RecordCorruptionDetected(addr, tc.corruptionScore, tc.source)
|
||||
|
||||
// Verify metrics were updated
|
||||
metrics := monitor.GetMetrics()
|
||||
assert.Greater(t, metrics.CorruptAddressesDetected, int64(0))
|
||||
assert.GreaterOrEqual(t, metrics.MaxCorruptionScore, tc.corruptionScore)
|
||||
|
||||
// Verify alert was generated
|
||||
alerts := mockSubscriber.GetAlerts()
|
||||
require.Len(t, alerts, 1)
|
||||
|
||||
alert := alerts[0]
|
||||
assert.Equal(t, tc.expectedSeverity, alert.Severity)
|
||||
assert.Equal(t, addr, alert.Address)
|
||||
assert.Equal(t, tc.corruptionScore, alert.CorruptionScore)
|
||||
assert.Equal(t, tc.source, alert.Source)
|
||||
assert.Contains(t, alert.Message, "Corruption detected")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_HealthScoreCalculation(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
// Test initial health score
|
||||
metrics := monitor.GetMetrics()
|
||||
assert.Equal(t, 1.0, metrics.HealthScore) // Perfect health initially
|
||||
|
||||
// Record some activity
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordValidationResult(true)
|
||||
monitor.RecordValidationResult(true)
|
||||
monitor.RecordContractCallResult(true)
|
||||
monitor.RecordContractCallResult(true)
|
||||
|
||||
// Health should still be perfect
|
||||
metrics = monitor.GetMetrics()
|
||||
assert.Equal(t, 1.0, metrics.HealthScore)
|
||||
|
||||
// Introduce some corruption
|
||||
addr := common.HexToAddress("0x0000000300000000000000000000000000000000")
|
||||
monitor.RecordCorruptionDetected(addr, 80, "test")
|
||||
|
||||
// Health score should decrease
|
||||
metrics = monitor.GetMetrics()
|
||||
assert.Less(t, metrics.HealthScore, 1.0)
|
||||
assert.Greater(t, metrics.HealthScore, 0.0)
|
||||
|
||||
// Add validation failures
|
||||
monitor.RecordValidationResult(false)
|
||||
monitor.RecordValidationResult(false)
|
||||
|
||||
// Health should decrease further
|
||||
newMetrics := monitor.GetMetrics()
|
||||
assert.Less(t, newMetrics.HealthScore, metrics.HealthScore)
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_RecoveryActionTracking(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
// Record various recovery actions
|
||||
monitor.RecordRecoveryAction(recovery.ActionRetryWithBackoff)
|
||||
monitor.RecordRecoveryAction(recovery.ActionRetryWithBackoff)
|
||||
monitor.RecordRecoveryAction(recovery.ActionUseFallbackData)
|
||||
monitor.RecordRecoveryAction(recovery.ActionCircuitBreaker)
|
||||
|
||||
metrics := monitor.GetMetrics()
|
||||
|
||||
// Verify action counts
|
||||
assert.Equal(t, int64(2), metrics.RecoveryActions[recovery.ActionRetryWithBackoff])
|
||||
assert.Equal(t, int64(1), metrics.RecoveryActions[recovery.ActionUseFallbackData])
|
||||
assert.Equal(t, int64(1), metrics.RecoveryActions[recovery.ActionCircuitBreaker])
|
||||
|
||||
// Verify specific counters
|
||||
assert.Equal(t, int64(2), metrics.RetryOperationsTriggered)
|
||||
assert.Equal(t, int64(1), metrics.FallbackOperationsUsed)
|
||||
assert.Equal(t, int64(1), metrics.CircuitBreakersTripped)
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_ErrorTypeTracking(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
// Record various error types
|
||||
errorTypes := []recovery.ErrorType{
|
||||
recovery.ErrorTypeAddressCorruption,
|
||||
recovery.ErrorTypeContractCallFailed,
|
||||
recovery.ErrorTypeRPCConnectionFailed,
|
||||
recovery.ErrorTypeDataParsingFailed,
|
||||
recovery.ErrorTypeValidationFailed,
|
||||
recovery.ErrorTypeAddressCorruption, // Duplicate
|
||||
}
|
||||
|
||||
for _, errorType := range errorTypes {
|
||||
monitor.RecordErrorType(errorType)
|
||||
}
|
||||
|
||||
metrics := monitor.GetMetrics()
|
||||
|
||||
// Verify error type counts
|
||||
assert.Equal(t, int64(2), metrics.ErrorsByType[recovery.ErrorTypeAddressCorruption])
|
||||
assert.Equal(t, int64(1), metrics.ErrorsByType[recovery.ErrorTypeContractCallFailed])
|
||||
assert.Equal(t, int64(1), metrics.ErrorsByType[recovery.ErrorTypeRPCConnectionFailed])
|
||||
assert.Equal(t, int64(1), metrics.ErrorsByType[recovery.ErrorTypeDataParsingFailed])
|
||||
assert.Equal(t, int64(1), metrics.ErrorsByType[recovery.ErrorTypeValidationFailed])
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_GetHealthSummary(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
// Generate some activity
|
||||
for i := 0; i < 100; i++ {
|
||||
monitor.RecordAddressProcessed()
|
||||
if i%10 == 0 { // 10% corruption rate
|
||||
addr := common.HexToAddress(fmt.Sprintf("0x%040d", i))
|
||||
monitor.RecordCorruptionDetected(addr, 50, "test")
|
||||
}
|
||||
monitor.RecordValidationResult(i%20 != 0) // 95% success rate
|
||||
monitor.RecordContractCallResult(i%10 != 0) // 90% success rate
|
||||
}
|
||||
|
||||
summary := monitor.GetHealthSummary()
|
||||
|
||||
// Verify summary structure
|
||||
assert.True(t, summary["enabled"].(bool))
|
||||
assert.Equal(t, int64(100), summary["total_addresses_processed"].(int64))
|
||||
assert.Equal(t, int64(10), summary["corruption_detections"].(int64))
|
||||
assert.InDelta(t, 0.1, summary["corruption_rate"].(float64), 0.01)
|
||||
assert.InDelta(t, 0.95, summary["validation_success_rate"].(float64), 0.01)
|
||||
assert.InDelta(t, 0.9, summary["contract_call_success_rate"].(float64), 0.01)
|
||||
|
||||
// Health score should be reasonable
|
||||
healthScore := summary["health_score"].(float64)
|
||||
assert.Greater(t, healthScore, 0.7) // Should be decent despite some issues
|
||||
assert.Less(t, healthScore, 1.0) // Not perfect due to corruption
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_AlertThresholds(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
mockSubscriber := &MockAlertSubscriber{}
|
||||
monitor.AddAlertSubscriber(mockSubscriber)
|
||||
|
||||
// Test health score threshold
|
||||
monitor.SetThreshold("health_score_min", 0.8)
|
||||
|
||||
// Generate activity that drops health below threshold
|
||||
for i := 0; i < 50; i++ {
|
||||
monitor.RecordAddressProcessed()
|
||||
// High corruption rate to drop health score
|
||||
addr := common.HexToAddress(fmt.Sprintf("0x%040d", i))
|
||||
monitor.RecordCorruptionDetected(addr, 80, "test")
|
||||
}
|
||||
|
||||
// Should trigger health score alert
|
||||
alerts := mockSubscriber.GetAlerts()
|
||||
healthAlerts := 0
|
||||
for _, alert := range alerts {
|
||||
if alert.Severity == AlertSeverityCritical &&
|
||||
alert.Context != nil &&
|
||||
alert.Context["health_score"] != nil {
|
||||
healthAlerts++
|
||||
}
|
||||
}
|
||||
assert.Greater(t, healthAlerts, 0, "Should have triggered health score alerts")
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_ConcurrentAccess(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
const numGoroutines = 50
|
||||
const operationsPerGoroutine = 100
|
||||
|
||||
done := make(chan bool, numGoroutines)
|
||||
|
||||
// Launch concurrent operations
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
go func(id int) {
|
||||
defer func() { done <- true }()
|
||||
|
||||
for j := 0; j < operationsPerGoroutine; j++ {
|
||||
// Perform various operations
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordValidationResult(j%10 != 0)
|
||||
monitor.RecordContractCallResult(j%5 != 0)
|
||||
|
||||
if j%20 == 0 { // Occasional corruption
|
||||
addr := common.HexToAddress(fmt.Sprintf("0x%020d%020d", id, j))
|
||||
monitor.RecordCorruptionDetected(addr, 60, fmt.Sprintf("goroutine_%d", id))
|
||||
}
|
||||
|
||||
// Recovery actions
|
||||
if j%15 == 0 {
|
||||
monitor.RecordRecoveryAction(recovery.ActionRetryWithBackoff)
|
||||
}
|
||||
if j%25 == 0 {
|
||||
monitor.RecordErrorType(recovery.ErrorTypeAddressCorruption)
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Wait for completion
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
select {
|
||||
case <-done:
|
||||
// Success
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatal("Concurrent test timed out")
|
||||
}
|
||||
}
|
||||
|
||||
// Verify final metrics are consistent
|
||||
metrics := monitor.GetMetrics()
|
||||
expectedAddresses := int64(numGoroutines * operationsPerGoroutine)
|
||||
assert.Equal(t, expectedAddresses, metrics.TotalAddressesProcessed)
|
||||
|
||||
// Should have some corruption detections
|
||||
assert.Greater(t, metrics.CorruptAddressesDetected, int64(0))
|
||||
|
||||
// Should have recorded recovery actions
|
||||
assert.Greater(t, metrics.RetryOperationsTriggered, int64(0))
|
||||
|
||||
// Health score should be calculated
|
||||
assert.GreaterOrEqual(t, metrics.HealthScore, 0.0)
|
||||
assert.LessOrEqual(t, metrics.HealthScore, 1.0)
|
||||
|
||||
t.Logf("Final metrics: Processed=%d, Corrupted=%d, Health=%.3f",
|
||||
metrics.TotalAddressesProcessed,
|
||||
metrics.CorruptAddressesDetected,
|
||||
metrics.HealthScore)
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_DisableEnable(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
// Should be enabled by default
|
||||
assert.True(t, monitor.IsEnabled())
|
||||
|
||||
// Record some activity
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordValidationResult(true)
|
||||
|
||||
initialMetrics := monitor.GetMetrics()
|
||||
assert.Greater(t, initialMetrics.TotalAddressesProcessed, int64(0))
|
||||
|
||||
// Disable monitor
|
||||
monitor.Disable()
|
||||
assert.False(t, monitor.IsEnabled())
|
||||
|
||||
// Activity should not be recorded when disabled
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordValidationResult(true)
|
||||
|
||||
disabledMetrics := monitor.GetMetrics()
|
||||
assert.Equal(t, initialMetrics.TotalAddressesProcessed, disabledMetrics.TotalAddressesProcessed)
|
||||
|
||||
// Re-enable
|
||||
monitor.Enable()
|
||||
assert.True(t, monitor.IsEnabled())
|
||||
|
||||
// Activity should be recorded again
|
||||
monitor.RecordAddressProcessed()
|
||||
enabledMetrics := monitor.GetMetrics()
|
||||
assert.Greater(t, enabledMetrics.TotalAddressesProcessed, disabledMetrics.TotalAddressesProcessed)
|
||||
}
|
||||
|
||||
func TestIntegrityMonitor_Performance(t *testing.T) {
|
||||
log := logger.New("error", "text", "")
|
||||
monitor := NewIntegrityMonitor(log)
|
||||
|
||||
const iterations = 10000
|
||||
|
||||
// Benchmark recording operations
|
||||
start := time.Now()
|
||||
|
||||
for i := 0; i < iterations; i++ {
|
||||
monitor.RecordAddressProcessed()
|
||||
monitor.RecordValidationResult(i%10 != 0)
|
||||
monitor.RecordContractCallResult(i%5 != 0)
|
||||
|
||||
if i%100 == 0 {
|
||||
addr := common.HexToAddress(fmt.Sprintf("0x%040d", i))
|
||||
monitor.RecordCorruptionDetected(addr, 50, "benchmark")
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(start)
|
||||
avgTime := duration / iterations
|
||||
|
||||
t.Logf("Performance: %d operations in %v (avg: %v per operation)",
|
||||
iterations, duration, avgTime)
|
||||
|
||||
// Should be reasonably fast (under 500 microseconds per operation is acceptable)
|
||||
maxTime := 500 * time.Microsecond
|
||||
assert.Less(t, avgTime.Nanoseconds(), maxTime.Nanoseconds(),
|
||||
"Recording should be faster than %v per operation (got %v)", maxTime, avgTime)
|
||||
|
||||
// Verify metrics are accurate
|
||||
metrics := monitor.GetMetrics()
|
||||
assert.Equal(t, int64(iterations), metrics.TotalAddressesProcessed)
|
||||
assert.Equal(t, int64(100), metrics.CorruptAddressesDetected) // Every 100th iteration
|
||||
}
|
||||
Reference in New Issue
Block a user