Advertisement
Guest User

Cordon and Drain

a guest
May 5th, 2025
8
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.43 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. set -e
  4.  
  5. NODE_NAME="$1"
  6. HOST_NAME="$2"
  7.  
  8. LOGFILE="/home/k8s/scripts/logs/reboot_drain_debug_$NODE_NAME.log"
  9.  
  10. if [ -z "$NODE_NAME" ]; then
  11. echo "Usage: $0 <node-name> <hostname>"
  12. exit 1
  13. fi
  14.  
  15. # Ensure log directory exists
  16. mkdir -p "$(dirname "$LOGFILE")"
  17.  
  18. # Initialize log file (touch and truncate)
  19. > "$LOGFILE"
  20.  
  21.  
  22. {
  23. echo "=== Start time: $(date '+%Y-%m-%d %H:%M:%S %Z %A') ==="
  24. echo "Host: $HOST_NAME"
  25. echo "Node: $NODE_NAME"
  26.  
  27. echo "Cordoning node $NODE_NAME..."
  28. echo 'kubectl cordon "$NODE_NAME"'
  29. kubectl cordon "$NODE_NAME"
  30.  
  31. sleep 3600
  32.  
  33. echo "Draining node $NODE_NAME..."
  34. echo 'kubectl drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --force --grace-period=30 --timeout=120s || true'
  35. if ! kubectl drain "$NODE_NAME" --ignore-daemonsets --delete-emptydir-data --force --grace-period=30 --timeout=120s; then
  36. echo "Drain failed for node $NODE_NAME. Logging remaining pods."
  37. kubectl get pods --all-namespaces -o wide | grep "$NODE_NAME" >> "$LOGFILE"
  38.  
  39. echo "Force deleting stuck pods on $NODE_NAME..."
  40. for pod in $(kubectl get pods --all-namespaces -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.metadata.name,NODE:.spec.nodeName' --no-headers | awk -v node="$NODE_NAME" '$3 == node {print $1":"$2}'); do
  41. ns=${pod%%:*}
  42. name=${pod##*:}
  43. echo "Force deleting pod $name in namespace $ns" | tee -a "$LOGFILE"
  44. kubectl delete pod "$name" -n "$ns" --grace-period=0 --force >> "$LOGFILE" 2>&1 || true
  45. done
  46.  
  47. fi
  48.  
  49. echo "Restarting node $NODE_NAME..."
  50. echo 'ssh -tt -o StrictHostKeyChecking=no "$HOST_NAME" sudo reboot || true'
  51. ssh -tt -o StrictHostKeyChecking=no "$HOST_NAME" sudo reboot || true
  52.  
  53. sleep 30
  54.  
  55. TIMEOUT=600
  56. INTERVAL=30
  57. ELAPSED=0
  58.  
  59. while [ $ELAPSED -lt $TIMEOUT ]; do
  60. NODE_STATUS=$(kubectl get node "$NODE_NAME" --no-headers 2>/dev/null | awk '{print $2}')
  61. if [[ "$NODE_STATUS" == *"Ready"* ]]; then
  62. echo "Node $NODE_NAME is Ready."
  63. break
  64. else
  65. echo "Node $NODE_NAME is not ready yet. Waiting... ($ELAPSED/$TIMEOUT seconds)"
  66. sleep $INTERVAL
  67. ELAPSED=$((ELAPSED+INTERVAL))
  68. fi
  69. done
  70.  
  71. echo "Uncordoning node $NODE_NAME..."
  72. echo 'kubectl uncordon "$NODE_NAME"'
  73. kubectl uncordon "$NODE_NAME"
  74.  
  75. echo "Done. Node $NODE_NAME has been rebooted and is schedulable."
  76. echo "=== End time: $(date '+%Y-%m-%d %H:%M:%S %Z %A') ==="
  77. } 2>&1 | tee -a "$LOGFILE"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement