Files
web-hosts/scripts/healthcheck.sh
2025-12-26 13:38:04 +01:00

352 lines
8.5 KiB
Bash
Executable File

#!/bin/bash
#
# Web Hosts Health Check Script
# Checks all domains, restarts unhealthy containers, reloads nginx
#
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
HOSTS_DIR="$(dirname "$SCRIPT_DIR")"
DOMAINS_DIR="$HOSTS_DIR/domains"
LOGS_DIR="$HOSTS_DIR/logs"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
mkdir -p "$LOGS_DIR"
LOG_FILE="$LOGS_DIR/healthcheck.log"
log() {
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
echo -e "${BLUE}[INFO]${NC} $1"
echo "$msg" >> "$LOG_FILE"
}
log_ok() {
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] OK: $1"
echo -e "${GREEN}[OK]${NC} $1"
echo "$msg" >> "$LOG_FILE"
}
log_warn() {
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] WARN: $1"
echo -e "${YELLOW}[WARN]${NC} $1"
echo "$msg" >> "$LOG_FILE"
}
log_error() {
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $1"
echo -e "${RED}[ERROR]${NC} $1"
echo "$msg" >> "$LOG_FILE"
}
# Domain configurations: domain:port:compose_project:compose_dir
declare -A DOMAINS=(
["chuckie.coppertone.tech"]="9201:chuckiecoppertonetech:/docker/web-hosts/domains/chuckie.coppertone.tech"
["test.coppertone.tech"]="9100:testcoppertonetech:/docker/web-hosts/domains/test.coppertone.tech"
["coppertone.tech"]="8090:coppertonetech:/docker/web-hosts/domains/coppertone.tech"
["ci.coppertone.tech"]="8000:woodpecker:/docker/woodpecker"
)
# Check if a URL returns 200
check_url() {
local url="$1"
local status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 "$url" 2>/dev/null)
[ "$status" = "200" ]
}
# Check if a port is listening
check_port() {
local port="$1"
nc -z 127.0.0.1 "$port" 2>/dev/null
}
# Check containers for a compose project
check_containers() {
local project="$1"
local unhealthy=0
# Get all containers for this project
local containers=$(podman ps -a --filter "label=io.podman.compose.project=$project" --format "{{.Names}}:{{.Status}}" 2>/dev/null)
if [ -z "$containers" ]; then
return 1
fi
while IFS=: read -r name status; do
if [[ "$status" != Up* ]]; then
log_warn "Container $name is not running: $status"
unhealthy=1
elif [[ "$status" == *"unhealthy"* ]]; then
log_warn "Container $name is unhealthy"
unhealthy=1
fi
done <<< "$containers"
return $unhealthy
}
# Restart containers for a domain
restart_domain() {
local domain="$1"
local compose_dir="$2"
if [ -z "$compose_dir" ]; then
compose_dir="$DOMAINS_DIR/$domain"
fi
if [ ! -d "$compose_dir" ]; then
log_error "Directory not found: $compose_dir"
return 1
fi
# Check for compose file (compose.yaml or podman-compose.yml)
local compose_file=""
if [ -f "$compose_dir/compose.yaml" ]; then
compose_file="compose.yaml"
elif [ -f "$compose_dir/podman-compose.yml" ]; then
compose_file="podman-compose.yml"
else
log_error "No compose file found for $domain in $compose_dir"
return 1
fi
log "Restarting $domain containers..."
cd "$compose_dir"
# Stop and start (not just restart, to ensure clean state)
podman-compose -f "$compose_file" down 2>/dev/null || true
sleep 2
if podman-compose -f "$compose_file" up -d 2>&1 | tail -3; then
log_ok "$domain containers restarted"
return 0
else
log_error "Failed to restart $domain"
return 1
fi
}
# Reload nginx
reload_nginx() {
log "Reloading nginx..."
# Try via startup.sh (has sudo)
if sudo /docker/www/startup.sh reload 2>/dev/null; then
log_ok "Nginx reloaded"
return 0
fi
# Fallback: direct command
if sudo podman exec gitea-nginx nginx -s reload 2>/dev/null; then
log_ok "Nginx reloaded (direct)"
return 0
fi
log_error "Failed to reload nginx"
return 1
}
# Check and fix a single domain
check_domain() {
local domain="$1"
local config="${DOMAINS[$domain]}"
if [ -z "$config" ]; then
log_warn "Unknown domain: $domain"
return 1
fi
IFS=':' read -r port project compose_dir <<< "$config"
echo ""
log "Checking $domain..."
local needs_restart=0
# Check 1: HTTPS endpoint
if check_url "https://$domain/"; then
log_ok "HTTPS endpoint responding"
else
log_warn "HTTPS endpoint not responding (502/503/timeout)"
needs_restart=1
fi
# Check 2: Local port
if check_port "$port"; then
log_ok "Port $port is listening"
else
log_warn "Port $port is not listening"
needs_restart=1
fi
# Check 3: Container health
if check_containers "$project"; then
log_ok "All containers healthy"
else
log_warn "Some containers unhealthy or stopped"
needs_restart=1
fi
# Restart if needed
if [ $needs_restart -eq 1 ]; then
log_warn "Issues detected, restarting $domain..."
restart_domain "$domain" "$compose_dir"
return 1
fi
return 0
}
# Main health check
healthcheck() {
log "=========================================="
log "Web Hosts Health Check"
log "=========================================="
local issues=0
local restarted=0
for domain in "${!DOMAINS[@]}"; do
if ! check_domain "$domain"; then
((restarted++))
fi
done
# If any domains were restarted, wait and reload nginx
if [ $restarted -gt 0 ]; then
echo ""
log "Waiting for containers to initialize..."
sleep 10
reload_nginx || true
echo ""
log "Re-checking domains after restart..."
for domain in "${!DOMAINS[@]}"; do
if check_url "https://$domain/"; then
log_ok "$domain is now responding"
else
log_error "$domain still not responding"
((issues++))
fi
done
fi
echo ""
log "=========================================="
if [ $issues -eq 0 ]; then
log_ok "All domains healthy"
else
log_error "$issues domain(s) still have issues"
fi
log "=========================================="
return $issues
}
# Quick fix - just restart everything
fix_all() {
log "=========================================="
log "Restarting ALL domains"
log "=========================================="
for domain in "${!DOMAINS[@]}"; do
config="${DOMAINS[$domain]}"
IFS=':' read -r port project compose_dir <<< "$config"
restart_domain "$domain" "$compose_dir"
echo ""
done
log "Waiting for containers..."
sleep 10
reload_nginx
echo ""
log "Verifying..."
sleep 3
for domain in "${!DOMAINS[@]}"; do
if check_url "https://$domain/"; then
log_ok "$domain: OK"
else
log_error "$domain: FAILED"
fi
done
}
# Usage
usage() {
echo "Web Hosts Health Check Script"
echo ""
echo "Usage: $0 <command>"
echo ""
echo "Commands:"
echo " check Check all domains, restart if unhealthy, reload nginx"
echo " fix Force restart all domains and reload nginx"
echo " reload Just reload nginx"
echo " status Show current status without fixing"
echo ""
}
# Status only (no fixes)
status_only() {
log "=========================================="
log "Web Hosts Status"
log "=========================================="
for domain in "${!DOMAINS[@]}"; do
config="${DOMAINS[$domain]}"
IFS=':' read -r port project compose_dir <<< "$config"
echo ""
echo -e "${BLUE}=== $domain ===${NC}"
# URL check
if check_url "https://$domain/"; then
echo -e " HTTPS: ${GREEN}OK${NC}"
else
echo -e " HTTPS: ${RED}FAIL${NC}"
fi
# Port check
if check_port "$port"; then
echo -e " Port $port: ${GREEN}OK${NC}"
else
echo -e " Port $port: ${RED}FAIL${NC}"
fi
# Container status
echo " Containers:"
podman ps -a --filter "label=io.podman.compose.project=$project" --format " {{.Names}}: {{.Status}}" 2>/dev/null || echo " None found"
done
echo ""
}
case "${1:-check}" in
check)
healthcheck
;;
fix)
fix_all
;;
reload)
reload_nginx
;;
status)
status_only
;;
*)
usage
exit 1
;;
esac