#!/bin/bash # ============================================================================= # SQL/DATABASE AUDIT SCRIPT # Comprehensive analysis of PostgreSQL schemas, migrations, and queries # ============================================================================= set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" MIGRATIONS_DIR="$PROJECT_ROOT/backend/migrations" BACKEND_DIR="$PROJECT_ROOT/backend/functions" OUTPUT_DIR="$PROJECT_ROOT/audit-reports/database-audit" TIMESTAMP=$(date +%Y%m%d-%H%M%S) # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' echo -e "${BLUE}========================================${NC}" echo -e "${BLUE} SQL/DATABASE COMPREHENSIVE AUDIT${NC}" echo -e "${BLUE}========================================${NC}" echo "" mkdir -p "$OUTPUT_DIR" # ============================================================================= # 1. MIGRATION FILE ANALYSIS # ============================================================================= echo -e "${YELLOW}[1/10] Analyzing migration files...${NC}" MIGRATION_OUTPUT="$OUTPUT_DIR/migrations-$TIMESTAMP.txt" echo "# Migration Analysis - $TIMESTAMP" > "$MIGRATION_OUTPUT" echo "== Migration Files Found ==" >> "$MIGRATION_OUTPUT" if [ -d "$MIGRATIONS_DIR" ]; then ls -la "$MIGRATIONS_DIR"/*.sql 2>/dev/null >> "$MIGRATION_OUTPUT" || echo "No SQL files found" >> "$MIGRATION_OUTPUT" echo "" >> "$MIGRATION_OUTPUT" echo "== Migration Pairs Check (up/down) ==" >> "$MIGRATION_OUTPUT" for up_file in "$MIGRATIONS_DIR"/*.up.sql; do if [ -f "$up_file" ]; then base_name=$(basename "$up_file" .up.sql) down_file="$MIGRATIONS_DIR/${base_name}.down.sql" if [ -f "$down_file" ]; then echo "[OK] $base_name has both up and down migrations" >> "$MIGRATION_OUTPUT" else echo "[MISSING] $base_name is missing DOWN migration!" >> "$MIGRATION_OUTPUT" fi fi done echo "" >> "$MIGRATION_OUTPUT" echo "== Irreversible Operations (DROP, TRUNCATE) ==" >> "$MIGRATION_OUTPUT" grep -rn "DROP TABLE\|DROP COLUMN\|TRUNCATE\|DROP INDEX" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$MIGRATION_OUTPUT" || echo "None found" >> "$MIGRATION_OUTPUT" else echo "No migrations directory found at $MIGRATIONS_DIR" >> "$MIGRATION_OUTPUT" fi echo -e "${GREEN} Output: $MIGRATION_OUTPUT${NC}" # ============================================================================= # 2. SQL INJECTION ANALYSIS # ============================================================================= echo -e "${YELLOW}[2/10] Scanning for SQL injection vulnerabilities...${NC}" SQLI_OUTPUT="$OUTPUT_DIR/sql-injection-$TIMESTAMP.txt" echo "# SQL Injection Analysis - $TIMESTAMP" > "$SQLI_OUTPUT" echo "== String concatenation in SQL queries (HIGH RISK) ==" >> "$SQLI_OUTPUT" grep -rn 'fmt.Sprintf.*SELECT\|fmt.Sprintf.*INSERT\|fmt.Sprintf.*UPDATE\|fmt.Sprintf.*DELETE' "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$SQLI_OUTPUT" || echo "None found" >> "$SQLI_OUTPUT" echo "" >> "$SQLI_OUTPUT" echo "== String concatenation with + in queries ==" >> "$SQLI_OUTPUT" grep -rn '"\s*+\s*.*SELECT\|"\s*+\s*.*INSERT\|"\s*+\s*.*UPDATE' "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$SQLI_OUTPUT" || echo "None found" >> "$SQLI_OUTPUT" echo "" >> "$SQLI_OUTPUT" echo "== Raw SQL execution (potential risk) ==" >> "$SQLI_OUTPUT" grep -rn 'db.Exec\|db.Query\|db.QueryRow' "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$SQLI_OUTPUT" || echo "None found" >> "$SQLI_OUTPUT" echo "" >> "$SQLI_OUTPUT" echo "== Parameterized queries (GOOD - verify all use $1, $2 placeholders) ==" >> "$SQLI_OUTPUT" grep -rn '\$[0-9]' "$BACKEND_DIR" --include="*.go" 2>/dev/null | wc -l >> "$SQLI_OUTPUT" echo " parameterized query usages found" >> "$SQLI_OUTPUT" echo -e "${GREEN} Output: $SQLI_OUTPUT${NC}" # ============================================================================= # 3. SCHEMA ANALYSIS # ============================================================================= echo -e "${YELLOW}[3/10] Analyzing schema design...${NC}" SCHEMA_OUTPUT="$OUTPUT_DIR/schema-analysis-$TIMESTAMP.txt" echo "# Schema Design Analysis - $TIMESTAMP" > "$SCHEMA_OUTPUT" if [ -d "$MIGRATIONS_DIR" ]; then echo "== Tables Created ==" >> "$SCHEMA_OUTPUT" grep -rn "CREATE TABLE" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SCHEMA_OUTPUT" || echo "None found" >> "$SCHEMA_OUTPUT" echo "" >> "$SCHEMA_OUTPUT" echo "== Foreign Key Constraints ==" >> "$SCHEMA_OUTPUT" grep -rn "REFERENCES\|FOREIGN KEY" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SCHEMA_OUTPUT" || echo "None found" >> "$SCHEMA_OUTPUT" echo "" >> "$SCHEMA_OUTPUT" echo "== Indexes Created ==" >> "$SCHEMA_OUTPUT" grep -rn "CREATE INDEX\|CREATE UNIQUE INDEX" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SCHEMA_OUTPUT" || echo "None found" >> "$SCHEMA_OUTPUT" echo "" >> "$SCHEMA_OUTPUT" echo "== Missing NOT NULL constraints (nullable columns) ==" >> "$SCHEMA_OUTPUT" grep -rn "VARCHAR\|TEXT\|INTEGER\|BOOLEAN\|TIMESTAMP" "$MIGRATIONS_DIR" --include="*.sql" | grep -v "NOT NULL" >> "$SCHEMA_OUTPUT" || echo "All columns have NOT NULL or intentionally nullable" >> "$SCHEMA_OUTPUT" echo "" >> "$SCHEMA_OUTPUT" echo "== Missing DEFAULT values ==" >> "$SCHEMA_OUTPUT" grep -rn "created_at\|updated_at" "$MIGRATIONS_DIR" --include="*.sql" | grep -v "DEFAULT" >> "$SCHEMA_OUTPUT" || echo "All timestamp fields have defaults" >> "$SCHEMA_OUTPUT" echo "" >> "$SCHEMA_OUTPUT" echo "== Enum Types ==" >> "$SCHEMA_OUTPUT" grep -rn "CREATE TYPE.*ENUM\|AS ENUM" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SCHEMA_OUTPUT" || echo "None found" >> "$SCHEMA_OUTPUT" fi echo -e "${GREEN} Output: $SCHEMA_OUTPUT${NC}" # ============================================================================= # 4. QUERY PERFORMANCE ANALYSIS # ============================================================================= echo -e "${YELLOW}[4/10] Analyzing query patterns for performance...${NC}" QUERY_OUTPUT="$OUTPUT_DIR/query-performance-$TIMESTAMP.txt" echo "# Query Performance Analysis - $TIMESTAMP" > "$QUERY_OUTPUT" echo "== SELECT * usage (fetch all columns - inefficient) ==" >> "$QUERY_OUTPUT" grep -rn "SELECT \*" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$QUERY_OUTPUT" || echo "None found - good practice!" >> "$QUERY_OUTPUT" echo "" >> "$QUERY_OUTPUT" echo "== Missing LIMIT on SELECT queries (potential memory issues) ==" >> "$QUERY_OUTPUT" grep -rn "SELECT.*FROM" "$BACKEND_DIR" --include="*.go" | grep -v "LIMIT\|WHERE.*id\s*=" | head -50 >> "$QUERY_OUTPUT" || echo "All queries appear bounded" >> "$QUERY_OUTPUT" echo "" >> "$QUERY_OUTPUT" echo "== N+1 Query Pattern (queries in loops) ==" >> "$QUERY_OUTPUT" grep -rn "for.*{" -A 10 "$BACKEND_DIR" --include="*.go" | grep -E "db\.(Query|Exec)" | head -30 >> "$QUERY_OUTPUT" || echo "None detected (manual review recommended)" >> "$QUERY_OUTPUT" echo "" >> "$QUERY_OUTPUT" echo "== Missing ORDER BY (non-deterministic results) ==" >> "$QUERY_OUTPUT" grep -rn "SELECT.*FROM" "$BACKEND_DIR" --include="*.go" | grep -v "ORDER BY\|LIMIT 1\|WHERE.*id" | head -30 >> "$QUERY_OUTPUT" || echo "All queries appear to have ordering" >> "$QUERY_OUTPUT" echo -e "${GREEN} Output: $QUERY_OUTPUT${NC}" # ============================================================================= # 5. CONNECTION MANAGEMENT # ============================================================================= echo -e "${YELLOW}[5/10] Analyzing database connection handling...${NC}" CONN_OUTPUT="$OUTPUT_DIR/connection-management-$TIMESTAMP.txt" echo "# Database Connection Management - $TIMESTAMP" > "$CONN_OUTPUT" echo "== Connection pool settings ==" >> "$CONN_OUTPUT" grep -rn "SetMaxOpenConns\|SetMaxIdleConns\|SetConnMaxLifetime" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$CONN_OUTPUT" || echo "No pool configuration found - using defaults (potential issue)" >> "$CONN_OUTPUT" echo "" >> "$CONN_OUTPUT" echo "== rows.Close() calls (resource cleanup) ==" >> "$CONN_OUTPUT" grep -rn "rows.Close\|defer rows.Close" "$BACKEND_DIR" --include="*.go" 2>/dev/null | wc -l >> "$CONN_OUTPUT" echo " rows.Close() calls found" >> "$CONN_OUTPUT" echo "" >> "$CONN_OUTPUT" echo "== Missing defer rows.Close() (potential leak) ==" >> "$CONN_OUTPUT" grep -rn "db.Query" "$BACKEND_DIR" --include="*.go" -A 5 | grep -v "defer.*Close\|rows.Close" | head -30 >> "$CONN_OUTPUT" || echo "All queries appear to close rows" >> "$CONN_OUTPUT" echo "" >> "$CONN_OUTPUT" echo "== Transaction handling ==" >> "$CONN_OUTPUT" grep -rn "db.Begin\|tx.Commit\|tx.Rollback" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$CONN_OUTPUT" || echo "No explicit transactions found" >> "$CONN_OUTPUT" echo -e "${GREEN} Output: $CONN_OUTPUT${NC}" # ============================================================================= # 6. DATA INTEGRITY CHECKS # ============================================================================= echo -e "${YELLOW}[6/10] Checking data integrity patterns...${NC}" INTEGRITY_OUTPUT="$OUTPUT_DIR/data-integrity-$TIMESTAMP.txt" echo "# Data Integrity Analysis - $TIMESTAMP" > "$INTEGRITY_OUTPUT" echo "== Soft delete patterns ==" >> "$INTEGRITY_OUTPUT" grep -rn "deleted_at\|is_deleted\|soft.delete" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$INTEGRITY_OUTPUT" || echo "No soft delete pattern found" >> "$INTEGRITY_OUTPUT" grep -rn "deleted_at\|is_deleted" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$INTEGRITY_OUTPUT" || true echo "" >> "$INTEGRITY_OUTPUT" echo "== Audit trail columns ==" >> "$INTEGRITY_OUTPUT" grep -rn "created_by\|updated_by\|audit" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$INTEGRITY_OUTPUT" || echo "No audit columns found" >> "$INTEGRITY_OUTPUT" echo "" >> "$INTEGRITY_OUTPUT" echo "== Cascading deletes (potential data loss) ==" >> "$INTEGRITY_OUTPUT" grep -rn "ON DELETE CASCADE" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$INTEGRITY_OUTPUT" || echo "No cascading deletes found" >> "$INTEGRITY_OUTPUT" echo "" >> "$INTEGRITY_OUTPUT" echo "== Check constraints ==" >> "$INTEGRITY_OUTPUT" grep -rn "CHECK\s*(" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$INTEGRITY_OUTPUT" || echo "No check constraints found" >> "$INTEGRITY_OUTPUT" echo -e "${GREEN} Output: $INTEGRITY_OUTPUT${NC}" # ============================================================================= # 7. SENSITIVE DATA HANDLING # ============================================================================= echo -e "${YELLOW}[7/10] Checking sensitive data handling...${NC}" SENSITIVE_OUTPUT="$OUTPUT_DIR/sensitive-data-$TIMESTAMP.txt" echo "# Sensitive Data Handling - $TIMESTAMP" > "$SENSITIVE_OUTPUT" echo "== Password storage columns ==" >> "$SENSITIVE_OUTPUT" grep -rn "password\|passwd\|pwd" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SENSITIVE_OUTPUT" || echo "None found" >> "$SENSITIVE_OUTPUT" echo "" >> "$SENSITIVE_OUTPUT" echo "== bcrypt/hash usage (password hashing) ==" >> "$SENSITIVE_OUTPUT" grep -rn "bcrypt\|hash\|GenerateFromPassword" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$SENSITIVE_OUTPUT" || echo "None found - CRITICAL if storing passwords" >> "$SENSITIVE_OUTPUT" echo "" >> "$SENSITIVE_OUTPUT" echo "== Encryption columns ==" >> "$SENSITIVE_OUTPUT" grep -rn "encrypt\|cipher\|aes" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$SENSITIVE_OUTPUT" || echo "No encryption found" >> "$SENSITIVE_OUTPUT" echo "" >> "$SENSITIVE_OUTPUT" echo "== PII columns (email, phone, address, ssn) ==" >> "$SENSITIVE_OUTPUT" grep -rn "email\|phone\|address\|ssn\|social_security" "$MIGRATIONS_DIR" --include="*.sql" 2>/dev/null >> "$SENSITIVE_OUTPUT" || echo "None found" >> "$SENSITIVE_OUTPUT" echo -e "${GREEN} Output: $SENSITIVE_OUTPUT${NC}" # ============================================================================= # 8. ERROR HANDLING IN QUERIES # ============================================================================= echo -e "${YELLOW}[8/10] Analyzing SQL error handling...${NC}" ERROR_OUTPUT="$OUTPUT_DIR/sql-error-handling-$TIMESTAMP.txt" echo "# SQL Error Handling Analysis - $TIMESTAMP" > "$ERROR_OUTPUT" echo "== sql.ErrNoRows handling ==" >> "$ERROR_OUTPUT" grep -rn "sql.ErrNoRows" "$BACKEND_DIR" --include="*.go" 2>/dev/null >> "$ERROR_OUTPUT" || echo "No ErrNoRows checks found - potential issues" >> "$ERROR_OUTPUT" echo "" >> "$ERROR_OUTPUT" echo "== Generic error returns without context ==" >> "$ERROR_OUTPUT" grep -rn 'return.*err$' "$BACKEND_DIR" --include="*.go" | grep -i "db\|sql\|query" | head -30 >> "$ERROR_OUTPUT" || echo "None found" >> "$ERROR_OUTPUT" echo "" >> "$ERROR_OUTPUT" echo "== Error logging on DB operations ==" >> "$ERROR_OUTPUT" grep -rn "log.*err\|log\.Printf.*err" "$BACKEND_DIR" --include="*.go" | grep -i "db\|sql\|query" | head -30 >> "$ERROR_OUTPUT" || echo "None found" >> "$ERROR_OUTPUT" echo -e "${GREEN} Output: $ERROR_OUTPUT${NC}" # ============================================================================= # 9. LIVE DATABASE AUDIT (if accessible) # ============================================================================= echo -e "${YELLOW}[9/10] Attempting live database audit...${NC}" LIVE_OUTPUT="$OUTPUT_DIR/live-db-audit-$TIMESTAMP.txt" echo "# Live Database Audit - $TIMESTAMP" > "$LIVE_OUTPUT" # Check if we can connect to the database if command -v psql &> /dev/null; then DB_HOST="${DB_HOST:-localhost}" DB_USER="${DB_USER:-user}" DB_NAME="${DB_NAME:-coppertone_db}" DB_PASSWORD="${DB_PASSWORD:-password}" export PGPASSWORD="$DB_PASSWORD" echo "Attempting to connect to $DB_NAME@$DB_HOST..." >> "$LIVE_OUTPUT" # Try to run analysis queries psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c " SELECT 'Tables' as type, count(*) as count FROM information_schema.tables WHERE table_schema = 'public'; " 2>&1 >> "$LIVE_OUTPUT" || echo "Could not connect to database" >> "$LIVE_OUTPUT" psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c " SELECT table_name, pg_size_pretty(pg_total_relation_size(quote_ident(table_name))) as size FROM information_schema.tables WHERE table_schema = 'public' ORDER BY pg_total_relation_size(quote_ident(table_name)) DESC; " 2>&1 >> "$LIVE_OUTPUT" || true psql -h "$DB_HOST" -U "$DB_USER" -d "$DB_NAME" -c " SELECT indexname, indexdef FROM pg_indexes WHERE schemaname = 'public'; " 2>&1 >> "$LIVE_OUTPUT" || true unset PGPASSWORD else echo "psql not available - skipping live database audit" >> "$LIVE_OUTPUT" fi echo -e "${GREEN} Output: $LIVE_OUTPUT${NC}" # ============================================================================= # 10. BACKUP AND RECOVERY # ============================================================================= echo -e "${YELLOW}[10/10] Checking backup/recovery patterns...${NC}" BACKUP_OUTPUT="$OUTPUT_DIR/backup-recovery-$TIMESTAMP.txt" echo "# Backup and Recovery Analysis - $TIMESTAMP" > "$BACKUP_OUTPUT" echo "== Backup scripts ==" >> "$BACKUP_OUTPUT" find "$PROJECT_ROOT" -name "*backup*" -o -name "*dump*" 2>/dev/null >> "$BACKUP_OUTPUT" || echo "No backup scripts found" >> "$BACKUP_OUTPUT" echo "" >> "$BACKUP_OUTPUT" echo "== Point-in-time recovery configuration ==" >> "$BACKUP_OUTPUT" grep -rn "wal\|archive\|recovery" "$PROJECT_ROOT" --include="*.yml" --include="*.yaml" --include="*.conf" 2>/dev/null >> "$BACKUP_OUTPUT" || echo "No WAL/archive configuration found" >> "$BACKUP_OUTPUT" echo "" >> "$BACKUP_OUTPUT" echo "== Database volume persistence ==" >> "$BACKUP_OUTPUT" grep -rn "db_data\|postgres.*volume" "$PROJECT_ROOT/podman-compose.yml" 2>/dev/null >> "$BACKUP_OUTPUT" || echo "Check podman-compose.yml manually" >> "$BACKUP_OUTPUT" echo -e "${GREEN} Output: $BACKUP_OUTPUT${NC}" # ============================================================================= # SUMMARY # ============================================================================= echo "" echo -e "${BLUE}========================================${NC}" echo -e "${BLUE} DATABASE AUDIT COMPLETE${NC}" echo -e "${BLUE}========================================${NC}" echo "" echo -e "Reports generated in: ${GREEN}$OUTPUT_DIR${NC}" echo "" echo "Files generated:" ls -la "$OUTPUT_DIR"/*$TIMESTAMP* 2>/dev/null || echo "No files generated"