Files
web-hosts/scripts/health-check.sh
2025-12-26 13:17:19 +01:00

345 lines
11 KiB
Bash
Executable File

#!/bin/bash
#
# Health Check Script for all web services
# Checks containers, ports, and URLs
#
# Don't exit on errors - we want to collect all results
set +e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Counters
PASS=0
FAIL=0
WARN=0
log_pass() {
echo -e "${GREEN}[PASS]${NC} $1"
((PASS++))
}
log_fail() {
echo -e "${RED}[FAIL]${NC} $1"
((FAIL++))
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
((WARN++))
}
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
section() {
echo ""
echo -e "${BLUE}=== $1 ===${NC}"
}
# Check if a container is running
check_container() {
local name="$1"
local status=$(podman ps --filter "name=^${name}$" --format "{{.Status}}" 2>/dev/null | head -1)
if [ -z "$status" ]; then
log_fail "Container '$name' is not running"
return 1
elif echo "$status" | grep -q "unhealthy"; then
log_warn "Container '$name' is unhealthy: $status"
return 2
elif echo "$status" | grep -q "Up"; then
log_pass "Container '$name' is running: $status"
return 0
else
log_fail "Container '$name' status: $status"
return 1
fi
}
# Check if a port is listening
check_port() {
local port="$1"
local service="$2"
if nc -z 127.0.0.1 "$port" 2>/dev/null; then
log_pass "Port $port ($service) is listening"
return 0
else
log_fail "Port $port ($service) is not listening"
return 1
fi
}
# Check URL returns expected status
check_url() {
local url="$1"
local expected="${2:-200}"
local timeout="${3:-10}"
local status=$(curl -sI -o /dev/null -w "%{http_code}" --max-time "$timeout" "$url" 2>/dev/null)
if [ "$status" = "$expected" ]; then
log_pass "$url returned $status"
return 0
elif [ "$status" = "000" ]; then
log_fail "$url - connection failed"
return 1
else
log_fail "$url returned $status (expected $expected)"
return 1
fi
}
# Check URL returns content (for sites that may return different codes)
check_url_content() {
local url="$1"
local search="$2"
local timeout="${3:-10}"
local content=$(curl -sL --max-time "$timeout" "$url" 2>/dev/null)
if echo "$content" | grep -qi "$search"; then
log_pass "$url contains expected content"
return 0
else
log_fail "$url missing expected content '$search'"
return 1
fi
}
# Check systemd service
check_systemd_service() {
local service="$1"
local user="${2:-}"
if [ "$user" = "user" ]; then
local status=$(systemctl --user is-active "$service" 2>/dev/null)
else
local status=$(systemctl is-active "$service" 2>/dev/null)
fi
if [ "$status" = "active" ]; then
log_pass "Service '$service' is active"
return 0
elif [ "$status" = "inactive" ]; then
log_warn "Service '$service' is inactive"
return 2
else
log_fail "Service '$service' status: $status"
return 1
fi
}
# Main health checks
main() {
echo ""
echo -e "${BLUE}╔════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Web Services Health Check ║${NC}"
echo -e "${BLUE}$(date '+%Y-%m-%d %H:%M:%S')${NC}"
echo -e "${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
# =========================================
section "Systemd Services (User)"
# =========================================
check_systemd_service "podman-compose-postgres.service" "user"
check_systemd_service "podman-compose-gitea.service" "user"
check_systemd_service "web-hosts.service" "user"
check_systemd_service "test-coppertone-webhook.service" "user"
check_systemd_service "web-hosts-webhook.service" "user"
# =========================================
section "Core Infrastructure Containers"
# =========================================
check_container "postgres"
check_container "gitea"
# gitea-nginx runs under root podman
if sudo -n podman ps --filter "name=gitea-nginx" --format "{{.Status}}" 2>/dev/null | grep -q "Up"; then
log_pass "Container 'gitea-nginx' (root) is running"
else
if sudo -n podman ps --format "{{.ID}}" >/dev/null 2>&1; then
log_fail "Container 'gitea-nginx' (root) is not running"
else
log_warn "Skipping gitea-nginx root container check (sudo required)"
fi
fi
# =========================================
section "Web Host Containers"
# =========================================
# Chuckie (MarketManager)
check_container "chuckie-redis"
check_container "chuckie-api"
check_container "chuckie-frontend"
# Games (Spades)
check_container "games-spades-backend"
check_container "games-spades-frontend"
# Test.coppertone.tech
check_container "test-coppertone-tech-frontend"
check_container "test-coppertone-tech-db"
check_container "test-coppertone-tech-auth"
check_container "test-coppertone-tech-work-mgmt"
check_container "test-coppertone-tech-blog"
# Coppertone.tech (if running)
check_container "coppertonetech_frontend_1" || true
check_container "coppertonetech_auth-service_1" || true
# =========================================
section "Port Connectivity"
# =========================================
check_port 80 "HTTP (nginx)"
check_port 443 "HTTPS (nginx)"
check_port 3000 "Gitea"
check_port 5432 "PostgreSQL"
check_port 2222 "Gitea SSH"
check_port 9100 "test.coppertone.tech frontend"
check_port 9102 "test.coppertone.tech auth"
check_port 9200 "chuckie.coppertone.tech backend"
check_port 9201 "chuckie.coppertone.tech frontend"
check_port 9300 "games.coppertone.tech frontend"
# =========================================
section "Website Accessibility (HTTPS)"
# =========================================
check_url "https://coppertone.tech" "200"
check_url "https://test.coppertone.tech" "200"
check_url "https://chuckie.coppertone.tech" "200"
check_url "https://api.chuckie.coppertone.tech/health" "200"
check_url "https://canva.chuckie.coppertone.tech" "200"
check_url "https://games.coppertone.tech" "200"
check_url "https://git.coppertone.tech" "200"
# =========================================
section "Website Content Verification"
# =========================================
check_url_content "https://git.coppertone.tech" "Gitea"
check_url_content "https://chuckie.coppertone.tech" "html"
check_url_content "https://canva.chuckie.coppertone.tech" "html"
check_url_content "https://games.coppertone.tech" "html"
check_url_content "https://test.coppertone.tech" "html"
# =========================================
section "API Health Endpoints"
# =========================================
# Gitea API
local gitea_api=$(curl -s "https://git.coppertone.tech/api/v1/version" 2>/dev/null)
if echo "$gitea_api" | grep -q "version"; then
log_pass "Gitea API is responsive"
else
log_fail "Gitea API not responding"
fi
# Test auth service health (fallback to root if /health is missing)
local auth_health=$(curl -s "http://127.0.0.1:9102/health" 2>/dev/null)
if [ "$auth_health" = "OK" ] || echo "$auth_health" | grep -qi "healthy\|ok"; then
log_pass "test.coppertone.tech auth service healthy"
else
local auth_status=$(curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:9102/" 2>/dev/null)
if [ "$auth_status" != "000" ]; then
log_pass "test.coppertone.tech auth service responding (/) "
else
log_warn "test.coppertone.tech auth service health unknown"
fi
fi
# Chuckie backend health
local chuckie_health=$(curl -s "http://127.0.0.1:9200/health" 2>/dev/null)
if echo "$chuckie_health" | grep -qi "ok\|healthy"; then
log_pass "chuckie.coppertone.tech backend healthy"
else
log_warn "chuckie.coppertone.tech backend health unknown"
fi
# API subdomain health (via HTTPS)
local api_health=$(curl -s "https://api.chuckie.coppertone.tech/health" 2>/dev/null)
if echo "$api_health" | grep -qi "ok\|healthy"; then
log_pass "api.chuckie.coppertone.tech backend healthy"
else
log_warn "api.chuckie.coppertone.tech backend health unknown"
fi
# =========================================
section "Database Connectivity"
# =========================================
if podman exec postgres pg_isready -U gitea -d gitea >/dev/null 2>&1; then
log_pass "PostgreSQL is accepting connections"
else
log_fail "PostgreSQL is not accepting connections"
fi
# Check gitea database
local gitea_tables=$(podman exec postgres psql -U gitea -d gitea -c "SELECT count(*) FROM repository;" -t 2>/dev/null | tr -d ' ')
if [ -n "$gitea_tables" ] && [ "$gitea_tables" -gt 0 ]; then
log_pass "Gitea database has $gitea_tables repositories"
else
log_warn "Gitea database may be empty or inaccessible"
fi
# =========================================
section "SSL Certificates"
# =========================================
for domain in coppertone.tech test.coppertone.tech chuckie.coppertone.tech api.chuckie.coppertone.tech canva.chuckie.coppertone.tech games.coppertone.tech git.coppertone.tech; do
local expiry=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$expiry" ]; then
local expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null)
local now_epoch=$(date +%s)
local days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
if [ "$days_left" -lt 7 ]; then
log_fail "$domain SSL expires in $days_left days"
elif [ "$days_left" -lt 30 ]; then
log_warn "$domain SSL expires in $days_left days"
else
log_pass "$domain SSL valid ($days_left days)"
fi
else
log_fail "$domain SSL certificate check failed"
fi
done
# =========================================
section "Summary"
# =========================================
echo ""
echo -e "Results: ${GREEN}$PASS passed${NC}, ${RED}$FAIL failed${NC}, ${YELLOW}$WARN warnings${NC}"
echo ""
if [ "$FAIL" -gt 0 ]; then
echo -e "${RED}Health check completed with failures!${NC}"
return 1
elif [ "$WARN" -gt 0 ]; then
echo -e "${YELLOW}Health check completed with warnings.${NC}"
return 0
else
echo -e "${GREEN}All health checks passed!${NC}"
return 0
fi
}
# Run with optional flags
case "${1:-}" in
--quiet|-q)
main 2>&1 | grep -E "^\[(FAIL|WARN)\]|^Results:"
;;
--json|-j)
# JSON output for monitoring systems
main >/dev/null 2>&1
echo "{\"pass\": $PASS, \"fail\": $FAIL, \"warn\": $WARN, \"timestamp\": \"$(date -Iseconds)\"}"
;;
*)
main
;;
esac