Guest User

Untitled

a guest
Jul 18th, 2018
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.94 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # this script acts as watchdog for a specified systemd service
  3. # tested on debian 9 - may work on other linux distros
  4. # this script runs as a systemd service
  5. # configuration of the service is done through the systemd unit file
  6. # email notifications require local smtp services
  7. # Run the following commands to setup and parameterise the watchdog service
  8.  
  9. """
  10. # change this path to where you will store and run this script
  11. watchdogpath=/opt/watchdog.py
  12. # change this variable to specify which service you want the watchdog to check
  13. servicename=apache2
  14. # the following section sets up the systemd unit file for the watchdog service
  15. cat > /etc/systemd/system/watchdog.service << EOF
  16. [Unit]
  17. Description=Watchdog
  18. After=${servicename}.service
  19. [Service]
  20. Type=simple
  21. # change these environment variables to suit your requirements
  22. Environment=SERVICENAME=${servicename}
  23. Environment=CHECKINTERVAL=60
  24. Environment=RESTARTATTEMPTINTERVAL=15
  25. Environment=RESTARTATTEMPTRETRIES=4
  26. Environment=NOTIFICATIONFROMEMAIL=mail@mydomain.com
  27. Environment=NOTIFICATIONTOEMAIL=mail@mydomain.com
  28. ExecStart=${watchdogpath}
  29. Restart=always
  30. [Install]
  31. WantedBy=multi-user.target
  32. EOF
  33. # run these commands to enable the service
  34. chmod 755 ${watchdogpath}
  35. systemctl enable watchdog.service
  36. # note: this command needs to be run any time the watchdog.service systemd script changes
  37. systemctl daemon-reload
  38. # start and check the service with
  39. systemctl restart watchdog.service
  40. journalctl --follow -u watchdog.service
  41. """
  42.  
  43. # Running the following commands will enable you to
  44. # run/test the script directly from the shell
  45. """
  46. # These Variables can be set in the shell if you want
  47. # to test the script directly from the shell
  48. export SERVICENAME=apache2
  49. export CHECKINTERVAL=60
  50. export RESTARTATTEMPTINTERVAL=15
  51. export RESTARTATTEMPTRETRIES=4
  52. export NOTIFICATIONFROMEMAIL=postmaster@fqdn.net
  53. export NOTIFICATIONTOEMAIL=postmaster@fqdn.net
  54. # Otherwise, the variables should be set in the systemd
  55. """
  56.  
  57. import os
  58. import sys
  59. import subprocess
  60. import time
  61. import logging
  62. import re
  63. import smtplib
  64. from email.mime.text import MIMEText
  65.  
  66. emailactive=False # set this to true if if you have email configured locally
  67.  
  68. logging.basicConfig(level="INFO")
  69.  
  70. def checkposint(val):
  71. try:
  72. val=int(val)
  73. val > 0
  74. except:
  75. logging.warning("Config Error: Interval and Attempt variables must be positive integers")
  76. return val
  77.  
  78. servicename = os.environ.get("SERVICENAME", None)
  79. checkinterval = checkposint(os.environ.get("CHECKINTERVAL", None))
  80. restartinterval = checkposint(os.environ.get("RESTARTATTEMPTINTERVAL", None))
  81. restartattemptretries = checkposint(os.environ.get("RESTARTATTEMPTRETRIES", None))
  82. notificationfromemail = os.environ.get("NOTIFICATIONFROMEMAIL", None)
  83. notificationtoemail = os.environ.get("NOTIFICATIONTOEMAIL", None)
  84.  
  85. # check if servicename appears valid
  86. with open(os.devnull, 'wb') as hide_output:
  87. var = subprocess.check_output(['systemctl', 'list-units'])
  88. if not (servicename + ".service" in str(var)) :
  89. logging.warning("Servicename does not appear to be valid. Please check...")
  90. sys.exit(1)
  91.  
  92. # check the timing variables
  93. if None in (servicename, checkinterval, restartinterval, restartattemptretries, notificationfromemail, notificationtoemail):
  94. logging.warning("One or more configuration variables missing. Please check...")
  95. sys.exit(1)
  96.  
  97. # check that the email is valid
  98. for tempemail in [notificationtoemail, notificationfromemail ]:
  99. if not re.match("[^@]+@[^@]+\.[^@]+", tempemail):
  100. logging.warning("An Email configured for notifications is invalid. Please check...")
  101. sys.exit(1)
  102.  
  103. def emailthis(message,fromemail,toemail,servicename):
  104. servicealert = 'ServiceAlert : ' + servicename + " on " + os.uname()[1]
  105. msg['Subject'] = servicealert % message
  106. msg['From'] = fromemail
  107. msg['To'] = toemail
  108. # Send the message via our own SMTP server, but don't include the envelope header.
  109. s = smtplib.SMTP('localhost')
  110. s.sendmail(fromemail, [toemail], msg.as_string())
  111. s.quit()
  112.  
  113. def service_command(servicename,action):
  114. with open(os.devnull, 'wb') as hide_output:
  115. return subprocess.Popen(['service', servicename, action], stdout=hide_output, stderr=hide_output).wait()
  116.  
  117. def service_running(servicename):
  118. exit_code = service_command(servicename,'status')
  119. return exit_code == 0
  120.  
  121. def restart_service(servicename):
  122. exit_code = service_command(servicename,'restart')
  123. return exit_code == 0
  124.  
  125. logging.info('Watchdog service name is ' + servicename)
  126. logging.info('Watchdog check interval is ' + str(checkinterval))
  127. logging.info('Watchdog restart interval is ' + str(restartinterval))
  128. logging.info('Watchdog restart attempt retries is ' + str(restartattemptretries))
  129. logging.info('Watchdog notification from email is ' + notificationfromemail)
  130. logging.info('Watchdog notification to email is ' + notificationtoemail)
  131.  
  132. if service_running(servicename):
  133. message = "Service " + servicename + " is running"
  134. logging.info(message)
  135. while True:
  136. while service_running(servicename):
  137. time.sleep(checkinterval)
  138. message = "Service " + servicename + " is down"
  139. logging.warning(message)
  140. if emailactive: emailthis(message)
  141. restartattemptcount=0
  142. while restartattemptcount < restartattemptretries:
  143. restartattemptcount=restartattemptcount+1
  144. message = "Attempting restart #" + str(restartattemptcount) + " of " + servicename
  145. logging.warning(message)
  146. restart_service(servicename)
  147. if service_running(servicename):
  148. message = "Service " + servicename + " is back up and running after " + str(restartattemptcount) + " attempts"
  149. logging.info(message)
  150. if emailactive: emailthis(message)
  151. break
  152. time.sleep(restartinterval)
  153. if not service_running(servicename):
  154. message = "Service " + servicename + " can\'t be started after " + str(restartattemptcount) + " attempts"
  155. if emailactive: emailthis(message)
  156. logging.warning(message)
  157. time.sleep(checkinterval)
Add Comment
Please, Sign In to add comment