345 lines
11 KiB
Bash
Executable File
345 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Health Check Script for all web services
|
|
# Checks containers, ports, and URLs
|
|
#
|
|
|
|
# Don't exit on errors - we want to collect all results
|
|
set +e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
# Counters
|
|
PASS=0
|
|
FAIL=0
|
|
WARN=0
|
|
|
|
log_pass() {
|
|
echo -e "${GREEN}[PASS]${NC} $1"
|
|
((PASS++))
|
|
}
|
|
|
|
log_fail() {
|
|
echo -e "${RED}[FAIL]${NC} $1"
|
|
((FAIL++))
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
((WARN++))
|
|
}
|
|
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
section() {
|
|
echo ""
|
|
echo -e "${BLUE}=== $1 ===${NC}"
|
|
}
|
|
|
|
# Check if a container is running
|
|
check_container() {
|
|
local name="$1"
|
|
local status=$(podman ps --filter "name=^${name}$" --format "{{.Status}}" 2>/dev/null | head -1)
|
|
|
|
if [ -z "$status" ]; then
|
|
log_fail "Container '$name' is not running"
|
|
return 1
|
|
elif echo "$status" | grep -q "unhealthy"; then
|
|
log_warn "Container '$name' is unhealthy: $status"
|
|
return 2
|
|
elif echo "$status" | grep -q "Up"; then
|
|
log_pass "Container '$name' is running: $status"
|
|
return 0
|
|
else
|
|
log_fail "Container '$name' status: $status"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check if a port is listening
|
|
check_port() {
|
|
local port="$1"
|
|
local service="$2"
|
|
|
|
if nc -z 127.0.0.1 "$port" 2>/dev/null; then
|
|
log_pass "Port $port ($service) is listening"
|
|
return 0
|
|
else
|
|
log_fail "Port $port ($service) is not listening"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check URL returns expected status
|
|
check_url() {
|
|
local url="$1"
|
|
local expected="${2:-200}"
|
|
local timeout="${3:-10}"
|
|
|
|
local status=$(curl -sI -o /dev/null -w "%{http_code}" --max-time "$timeout" "$url" 2>/dev/null)
|
|
|
|
if [ "$status" = "$expected" ]; then
|
|
log_pass "$url returned $status"
|
|
return 0
|
|
elif [ "$status" = "000" ]; then
|
|
log_fail "$url - connection failed"
|
|
return 1
|
|
else
|
|
log_fail "$url returned $status (expected $expected)"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check URL returns content (for sites that may return different codes)
|
|
check_url_content() {
|
|
local url="$1"
|
|
local search="$2"
|
|
local timeout="${3:-10}"
|
|
|
|
local content=$(curl -sL --max-time "$timeout" "$url" 2>/dev/null)
|
|
|
|
if echo "$content" | grep -qi "$search"; then
|
|
log_pass "$url contains expected content"
|
|
return 0
|
|
else
|
|
log_fail "$url missing expected content '$search'"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check systemd service
|
|
check_systemd_service() {
|
|
local service="$1"
|
|
local user="${2:-}"
|
|
|
|
if [ "$user" = "user" ]; then
|
|
local status=$(systemctl --user is-active "$service" 2>/dev/null)
|
|
else
|
|
local status=$(systemctl is-active "$service" 2>/dev/null)
|
|
fi
|
|
|
|
if [ "$status" = "active" ]; then
|
|
log_pass "Service '$service' is active"
|
|
return 0
|
|
elif [ "$status" = "inactive" ]; then
|
|
log_warn "Service '$service' is inactive"
|
|
return 2
|
|
else
|
|
log_fail "Service '$service' status: $status"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Main health checks
|
|
main() {
|
|
echo ""
|
|
echo -e "${BLUE}╔════════════════════════════════════════════════════════════╗${NC}"
|
|
echo -e "${BLUE}║ Web Services Health Check ║${NC}"
|
|
echo -e "${BLUE}║ $(date '+%Y-%m-%d %H:%M:%S') ║${NC}"
|
|
echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
|
|
|
|
# =========================================
|
|
section "Systemd Services (User)"
|
|
# =========================================
|
|
check_systemd_service "podman-compose-postgres.service" "user"
|
|
check_systemd_service "podman-compose-gitea.service" "user"
|
|
check_systemd_service "web-hosts.service" "user"
|
|
check_systemd_service "test-coppertone-webhook.service" "user"
|
|
check_systemd_service "web-hosts-webhook.service" "user"
|
|
|
|
# =========================================
|
|
section "Core Infrastructure Containers"
|
|
# =========================================
|
|
check_container "postgres"
|
|
check_container "gitea"
|
|
# gitea-nginx runs under root podman
|
|
if sudo -n podman ps --filter "name=gitea-nginx" --format "{{.Status}}" 2>/dev/null | grep -q "Up"; then
|
|
log_pass "Container 'gitea-nginx' (root) is running"
|
|
else
|
|
if sudo -n podman ps --format "{{.ID}}" >/dev/null 2>&1; then
|
|
log_fail "Container 'gitea-nginx' (root) is not running"
|
|
else
|
|
log_warn "Skipping gitea-nginx root container check (sudo required)"
|
|
fi
|
|
fi
|
|
|
|
# =========================================
|
|
section "Web Host Containers"
|
|
# =========================================
|
|
|
|
# Chuckie (MarketManager)
|
|
check_container "chuckie-redis"
|
|
check_container "chuckie-api"
|
|
check_container "chuckie-frontend"
|
|
|
|
# Games (Spades)
|
|
check_container "games-spades-backend"
|
|
check_container "games-spades-frontend"
|
|
|
|
# Test.coppertone.tech
|
|
check_container "test-coppertone-tech-frontend"
|
|
check_container "test-coppertone-tech-db"
|
|
check_container "test-coppertone-tech-auth"
|
|
check_container "test-coppertone-tech-work-mgmt"
|
|
check_container "test-coppertone-tech-blog"
|
|
|
|
# Coppertone.tech (if running)
|
|
check_container "coppertonetech_frontend_1" || true
|
|
check_container "coppertonetech_auth-service_1" || true
|
|
|
|
# =========================================
|
|
section "Port Connectivity"
|
|
# =========================================
|
|
check_port 80 "HTTP (nginx)"
|
|
check_port 443 "HTTPS (nginx)"
|
|
check_port 3000 "Gitea"
|
|
check_port 5432 "PostgreSQL"
|
|
check_port 2222 "Gitea SSH"
|
|
check_port 9100 "test.coppertone.tech frontend"
|
|
check_port 9102 "test.coppertone.tech auth"
|
|
check_port 9200 "chuckie.coppertone.tech backend"
|
|
check_port 9201 "chuckie.coppertone.tech frontend"
|
|
check_port 9300 "games.coppertone.tech frontend"
|
|
|
|
# =========================================
|
|
section "Website Accessibility (HTTPS)"
|
|
# =========================================
|
|
check_url "https://coppertone.tech" "200"
|
|
check_url "https://test.coppertone.tech" "200"
|
|
check_url "https://chuckie.coppertone.tech" "200"
|
|
check_url "https://api.chuckie.coppertone.tech/health" "200"
|
|
check_url "https://canva.chuckie.coppertone.tech" "200"
|
|
check_url "https://games.coppertone.tech" "200"
|
|
check_url "https://git.coppertone.tech" "200"
|
|
|
|
# =========================================
|
|
section "Website Content Verification"
|
|
# =========================================
|
|
check_url_content "https://git.coppertone.tech" "Gitea"
|
|
check_url_content "https://chuckie.coppertone.tech" "html"
|
|
check_url_content "https://canva.chuckie.coppertone.tech" "html"
|
|
check_url_content "https://games.coppertone.tech" "html"
|
|
check_url_content "https://test.coppertone.tech" "html"
|
|
|
|
# =========================================
|
|
section "API Health Endpoints"
|
|
# =========================================
|
|
# Gitea API
|
|
local gitea_api=$(curl -s "https://git.coppertone.tech/api/v1/version" 2>/dev/null)
|
|
if echo "$gitea_api" | grep -q "version"; then
|
|
log_pass "Gitea API is responsive"
|
|
else
|
|
log_fail "Gitea API not responding"
|
|
fi
|
|
|
|
# Test auth service health (fallback to root if /health is missing)
|
|
local auth_health=$(curl -s "http://127.0.0.1:9102/health" 2>/dev/null)
|
|
if [ "$auth_health" = "OK" ] || echo "$auth_health" | grep -qi "healthy\|ok"; then
|
|
log_pass "test.coppertone.tech auth service healthy"
|
|
else
|
|
local auth_status=$(curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:9102/" 2>/dev/null)
|
|
if [ "$auth_status" != "000" ]; then
|
|
log_pass "test.coppertone.tech auth service responding (/) "
|
|
else
|
|
log_warn "test.coppertone.tech auth service health unknown"
|
|
fi
|
|
fi
|
|
|
|
# Chuckie backend health
|
|
local chuckie_health=$(curl -s "http://127.0.0.1:9200/health" 2>/dev/null)
|
|
if echo "$chuckie_health" | grep -qi "ok\|healthy"; then
|
|
log_pass "chuckie.coppertone.tech backend healthy"
|
|
else
|
|
log_warn "chuckie.coppertone.tech backend health unknown"
|
|
fi
|
|
|
|
# API subdomain health (via HTTPS)
|
|
local api_health=$(curl -s "https://api.chuckie.coppertone.tech/health" 2>/dev/null)
|
|
if echo "$api_health" | grep -qi "ok\|healthy"; then
|
|
log_pass "api.chuckie.coppertone.tech backend healthy"
|
|
else
|
|
log_warn "api.chuckie.coppertone.tech backend health unknown"
|
|
fi
|
|
|
|
# =========================================
|
|
section "Database Connectivity"
|
|
# =========================================
|
|
if podman exec postgres pg_isready -U gitea -d gitea >/dev/null 2>&1; then
|
|
log_pass "PostgreSQL is accepting connections"
|
|
else
|
|
log_fail "PostgreSQL is not accepting connections"
|
|
fi
|
|
|
|
# Check gitea database
|
|
local gitea_tables=$(podman exec postgres psql -U gitea -d gitea -c "SELECT count(*) FROM repository;" -t 2>/dev/null | tr -d ' ')
|
|
if [ -n "$gitea_tables" ] && [ "$gitea_tables" -gt 0 ]; then
|
|
log_pass "Gitea database has $gitea_tables repositories"
|
|
else
|
|
log_warn "Gitea database may be empty or inaccessible"
|
|
fi
|
|
|
|
# =========================================
|
|
section "SSL Certificates"
|
|
# =========================================
|
|
for domain in coppertone.tech test.coppertone.tech chuckie.coppertone.tech api.chuckie.coppertone.tech canva.chuckie.coppertone.tech games.coppertone.tech git.coppertone.tech; do
|
|
local expiry=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2)
|
|
if [ -n "$expiry" ]; then
|
|
local expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null)
|
|
local now_epoch=$(date +%s)
|
|
local days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
|
|
|
|
if [ "$days_left" -lt 7 ]; then
|
|
log_fail "$domain SSL expires in $days_left days"
|
|
elif [ "$days_left" -lt 30 ]; then
|
|
log_warn "$domain SSL expires in $days_left days"
|
|
else
|
|
log_pass "$domain SSL valid ($days_left days)"
|
|
fi
|
|
else
|
|
log_fail "$domain SSL certificate check failed"
|
|
fi
|
|
done
|
|
|
|
# =========================================
|
|
section "Summary"
|
|
# =========================================
|
|
echo ""
|
|
echo -e "Results: ${GREEN}$PASS passed${NC}, ${RED}$FAIL failed${NC}, ${YELLOW}$WARN warnings${NC}"
|
|
echo ""
|
|
|
|
if [ "$FAIL" -gt 0 ]; then
|
|
echo -e "${RED}Health check completed with failures!${NC}"
|
|
return 1
|
|
elif [ "$WARN" -gt 0 ]; then
|
|
echo -e "${YELLOW}Health check completed with warnings.${NC}"
|
|
return 0
|
|
else
|
|
echo -e "${GREEN}All health checks passed!${NC}"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# Run with optional flags
|
|
case "${1:-}" in
|
|
--quiet|-q)
|
|
main 2>&1 | grep -E "^\[(FAIL|WARN)\]|^Results:"
|
|
;;
|
|
--json|-j)
|
|
# JSON output for monitoring systems
|
|
main >/dev/null 2>&1
|
|
echo "{\"pass\": $PASS, \"fail\": $FAIL, \"warn\": $WARN, \"timestamp\": \"$(date -Iseconds)\"}"
|
|
;;
|
|
*)
|
|
main
|
|
;;
|
|
esac
|