Advertisement
Guest User

Untitled

a guest
Jun 29th, 2016
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.59 KB | None | 0 0
  1. #!/usr/bin/env bash
  2. set -e
  3.  
  4. es_server_prefix=elastic10
  5. es_server_suffix=.eqiad.wmnet
  6. first_server_index=1
  7. nb_of_servers_in_cluster=47
  8.  
  9. # used to keep track on which server this script has already been executed
  10. # this enables this script to be mostly idempotent, so that it can be
  11. # relaunched as is in case of error
  12. # WARN: needs to be changed manually
  13. execution_id=ZplljWNP9hNobookM8fUhFuivkAKm8w3mVlQawXBg5c4JxzuEPcJcLgtY8ms0Pg
  14.  
  15. for i in $(seq -w ${first_server_index} ${nb_of_servers_in_cluster}); do
  16. hostname="${es_server_prefix}${i}"
  17. server="${es_server_prefix}${i}${es_server_suffix}"
  18.  
  19. if ssh ${server} grep -q ${execution_id} /var/lib/elasticsearch/script_execution_id ; then
  20. echo "restart already executed on ${hostname}, skipping..."
  21. continue
  22. fi
  23.  
  24. echo "disabling alerts for ${hostname}"
  25. ssh neon.wikimedia.org sudo icinga-downtime -h ${hostname} -d 1800 -r "restarting for config change - ${USER}"
  26.  
  27. echo "disabling replication"
  28. until ssh ${server} es-tool stop-replication
  29. do
  30. echo "failed to stop replication, trying again"
  31. done
  32.  
  33. echo "ready to start restart ${hostname}"
  34.  
  35. # echo "rebooting ${hostname}"
  36. # ssh neodymium.eqiad.wmnet sudo salt ${server} system.reboot
  37. # For some reason, rebooting as above does not work, host never completes shutdown
  38. echo "You can now reboot ${server}"
  39. echo "Press [enter] when done"
  40. read
  41.  
  42. echo "waiting for server to be up"
  43. until ssh ${server} true &> /dev/null; do
  44. echo -n .
  45. sleep 1
  46. done
  47. echo "server is up"
  48.  
  49. echo "waiting for elasticsearch to be started"
  50. until ssh ${server} curl -s 127.0.0.1:9200/_cat/health; do
  51. echo -n '.'
  52. sleep 1
  53. done
  54. echo "elasticsearch is started"
  55.  
  56. echo "enabling replication"
  57. until ssh ${server} es-tool start-replication
  58. do
  59. echo "failed to start replication, trying again"
  60. done
  61.  
  62. echo "waiting for cluster recovery"
  63. ssh ${server} "until curl -s 127.0.0.1:9200/_cat/health | grep green; do echo -n .; sleep 10; done"
  64. echo "cluster is green"
  65.  
  66. echo "creating file to keep track of script execution"
  67. ssh ${server} "echo ${execution_id} | sudo tee /var/lib/elasticsearch/script_execution_id"
  68.  
  69. echo "Done for ${hostname}"
  70. echo "=============================================="
  71. done
  72.  
  73. echo "Cluster restart completed"
  74. echo "Cleaning up..."
  75.  
  76. for i in $(seq -w ${first_server_index} ${nb_of_servers_in_cluster}); do
  77. hostname="${es_server_prefix}${i}"
  78. server="${es_server_prefix}${i}${es_server_suffix}"
  79.  
  80. ssh ${server} "sudo rm /var/lib/elasticsearch/script_execution_id"
  81. done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement