Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- set -e
- NODE_NAME="$1"
- HOST_NAME="$2"
- LOGFILE="/home/k8s/scripts/logs/reboot_drain_debug_$NODE_NAME.log"
- if [ -z "$NODE_NAME" ]; then
- echo "Usage: $0 <node-name> <hostname>"
- exit 1
- fi
- # Ensure log directory exists
- mkdir -p "$(dirname "$LOGFILE")"
- # Initialize log file (touch and truncate)
- > "$LOGFILE"
- {
- echo "=== Start time: $(date '+%Y-%m-%d %H:%M:%S %Z %A') ==="
- echo "Host: $HOST_NAME"
- echo "Node: $NODE_NAME"
- echo "Cordoning node $NODE_NAME..."
- echo 'kubectl cordon "$NODE_NAME"'
- kubectl cordon "$NODE_NAME"
- sleep 3600
- echo "Draining node $NODE_NAME..."
- echo 'kubectl drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --force --grace-period=30 --timeout=120s || true'
- if ! kubectl drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --force --grace-period=30 --timeout=120s; then
- echo "Drain failed for node $NODE_NAME. Logging remaining pods."
- kubectl get pods --all-namespaces -o wide | grep "$NODE_NAME" >> "$LOGFILE"
- echo "Force deleting stuck pods on $NODE_NAME..."
- for pod in $(kubectl get pods --all-namespaces -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.metadata.name,NODE:.spec.nodeName' --no-headers | awk -v node="$NODE_NAME" '$3 == node {print $1":"$2}'); do
- ns=${pod%%:*}
- name=${pod##*:}
- echo "Force deleting pod $name in namespace $ns" | tee -a "$LOGFILE"
- kubectl delete pod "$name" -n "$ns" --grace-period=0 --force >> "$LOGFILE" 2>&1 || true
- done
- fi
- echo "Restarting node $NODE_NAME..."
- echo 'ssh -tt -o StrictHostKeyChecking=no "$HOST_NAME" sudo reboot || true'
- ssh -tt -o StrictHostKeyChecking=no "$HOST_NAME" sudo reboot || true
- sleep 30
- TIMEOUT=600
- INTERVAL=30
- ELAPSED=0
- while [ $ELAPSED -lt $TIMEOUT ]; do
- NODE_STATUS=$(kubectl get node "$NODE_NAME" --no-headers 2>/dev/null | awk '{print $2}')
- if [[ "$NODE_STATUS" == *"Ready"* ]]; then
- echo "Node $NODE_NAME is Ready."
- break
- else
- echo "Node $NODE_NAME is not ready yet. Waiting... ($ELAPSED/$TIMEOUT seconds)"
- sleep $INTERVAL
- ELAPSED=$((ELAPSED+INTERVAL))
- fi
- done
- echo "Uncordoning node $NODE_NAME..."
- echo 'kubectl uncordon "$NODE_NAME"'
- kubectl uncordon "$NODE_NAME"
- echo "Done. Node $NODE_NAME has been rebooted and is schedulable."
- echo "=== End time: $(date '+%Y-%m-%d %H:%M:%S %Z %A') ==="
- } 2>&1 | tee -a "$LOGFILE"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement