Advertisement
Guest User

Untitled

a guest
May 17th, 2017
542
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.51 KB | None | 0 0
  1. #!/usr/bin/python3.5
  2. import logging
  3. import urllib.request
  4. import urllib.parse
  5. import json
  6. import time
  7. import smtplib
  8. from email.mime.multipart import MIMEMultipart
  9. from email.mime.text import MIMEText
  10.  
  11. # Base URL and Authentication
  12. baseUrl = "https://gis.oiigds.com/arcgis/"
  13. username = "arc"
  14. password = "*********"
  15.  
  16.  
  17. # Thresholds
  18. max_down_num = 2
  19. max_down_time = 300
  20. script_interval = 30
  21.  
  22. # Logging
  23. #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
  24. #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
  25. #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)-5s: %(message)s', datefmt='%Y-%m-%d_%H:%M:%S')
  26. logger = logging.getLogger(__name__)
  27. logger.setLevel(logging.INFO)
  28.  
  29. handler = logging.FileHandler('arcmon.log')
  30. handler.setLevel(logging.INFO)
  31.  
  32. #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  33. formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
  34. handler.setFormatter(formatter)
  35.  
  36. logger.addHandler(handler)
  37.  
  38. # get token
  39. def getToken():
  40. params = urllib.parse.urlencode({'username' : username, 'password' : password, 'client' : 'requestip', 'expiration': 60, 'f' : 'json'})
  41. params = params.encode('ascii')
  42. try:
  43. generateToken=json.loads(urllib.request.urlopen(baseUrl + 'admin/generateToken', params).read().decode('utf-8'))
  44. if 'token' in generateToken: return generateToken['token']
  45. except:
  46. print('Could not parse json from token response')
  47.  
  48. # start service
  49. def startService(service_full_name,token):
  50. folder,servicename,servicetype = service_full_name.split('.')
  51. params = urllib.parse.urlencode({'f' : 'json', 'token': token})
  52. params = params.encode('ascii')
  53. try:
  54. start_post=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/' + folder + '/' + servicename + '.' + servicetype + '/start', params).read().decode('utf-8'))
  55. if start_post['status'] == 'success': return True
  56. except:
  57. print('Could not parse json from start service response')
  58.  
  59. # send email
  60. def email(service_full_name,type):
  61. from_address = 'noreply@oceaneering.com'
  62. to_address = 'dcorrigan@oceaneering.com'
  63. message = MIMEMultipart()
  64. message['From'] = from_address
  65. message['To'] = to_address
  66. message['Subject'] = 'ArcGIS Monitor Alert'
  67. if (type == 'down'):
  68. body = 'The service: ' + service_full_name + ' is down more than ' + str(max_down_num) + ' times.'
  69. if (type == 'up'):
  70. body = 'The service: ' + service_full_name + ' has recovered.'
  71. if (type == 'fail'):
  72. body = 'The service: ' + service_full_name + ' was attempted to be restarted, but failed restarting.'
  73. if (type == 'unknown'):
  74. body = 'The service: ' + service_full_name + ' is in an unknown state.'
  75. message.attach(MIMEText(body, 'plain'))
  76. server = smtplib.SMTP('localhost', 25)
  77. text = message.as_string()
  78. server.sendmail(from_address, to_address, text)
  79. server.quit()
  80.  
  81. # update all_services dictionary
  82. def update_all_services(service_full_name,configured_state,realtime_state,status,downtime):
  83. status=str(status)
  84. downtime=str(downtime)
  85. logger.info('NEW STATUS: ' + service_full_name + ' - configured_state: ' + configured_state + ' - realtime_state: ' + realtime_state + ' - status: ' + status + ' - downtime: ' + downtime)
  86. service_status={service_full_name: {'configured_state': configured_state, 'realtime_state': realtime_state, 'status': status, 'downtime': downtime}}
  87. logger.debug('UPDATING all_services dist')
  88. all_services.update(service_status)
  89.  
  90. all_services={}
  91. logger.info('SCRIPT START')
  92. while True:
  93. logger.debug('REQUEST TOKEN')
  94. token=getToken()
  95. logger.debug('TOKEN: ' + token)
  96. logger.debug('TOKEN ACQUIRED')
  97. try:
  98. logger.debug('ATTEMPING TO POLL FOR LIST OF FOLDERS AT REST ENDPOINT: ' + baseUrl + 'admin/services/?f=json&token=' + token)
  99. folders=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/?f=json&token=' + token).read().decode('utf-8'))
  100. logger.debug('FOLDER JSON OUTPUT: ' + str(folders))
  101. except:
  102. logger.exception('PROBLEM: UNABLE TO PULL FOLDER LIST')
  103. for folder in folders['folders']:
  104. if folder != 'System' and folder != 'Utilities':
  105. logger.debug('SKIPPING System and Utilities folders')
  106. logger.info('CHECKING ' + folder)
  107. try:
  108. logger.debug('ATTEMPING TO POLL LIST OF SERVICE REPORTS FOR FOLDER \'' + folder + '\' AT REST ENDPOINT: ' + baseUrl + 'admin/services/' + folder + '/report?f=pjson&token=' + token)
  109. services=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/' + folder + '/report?f=pjson&token=' + token).read().decode('utf-8'))
  110. logger.debug(services)
  111. except:
  112. logger.exception('PROBLEM: UNABLE TO PULL REPORT FOR ' + str(folder))
  113. for report in services['reports']:
  114. logger.debug('REPORT OUTPUT: ' + str(report))
  115. service_name=report['instances']['serviceName']
  116. service_type=report['instances']['type']
  117. service_full_name=folder + '.' + service_name + '.' + service_type
  118. logger.debug('SERVICE NAME: ' + service_name + ' SERVICE TYPE: ' + service_type + ' SERVICE FULL NAME: ' + service_full_name)
  119. logger.debug('all_services dict keys' + str(all_services.keys()))
  120. if service_full_name in all_services.keys(): # if previous status collected
  121. logger.debug(service_full_name + ' EXISTS IN ' + str(all_services.keys()) + ' dict')
  122. logger.debug('NEW STATE: ' + service_full_name + ' CONFIGURED: ' + report['status']['configuredState'] + ' REALTIME: ' + report['status']['realTimeState'])
  123. if all_services[service_full_name]['status'] == 'BROKEN': # service was marked broken
  124. logger.debug(all_services[service_full_name]['status'] + ' WAS MARKED AS BROKEN')
  125. if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STARTED':
  126. logger.info('RECOVERED STATE FOR BROKEN SERVICE ' + service_full_name)
  127. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
  128. logger.info('EMAILING RECOVERY EMAIL for service ' + service_full_name)
  129. email(service_full_name, 'up')
  130. logger.debug('CHECKING IF SERVICE \'' + service_full_name + '\' BROKEN SERVICE stored realtime_state is STOPPED and new realTimeState is STOPPED')
  131. if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STOPPED':
  132. logger.info('STILL BROKEN STATE FOR BROKEN SERVICE ' + service_full_name + ' LAST REALTIME: STOPPED - NEW REALTIME: STARTED')
  133. else: # service was not marked broken
  134. logger.debug('CHECKING STATUS FOR NON-BROKEN SERVICE: ' + service_full_name)
  135. if report['status']['configuredState'] == 'STARTED': # service configured to be started
  136. logger.debug('SERVICE ' + service_full_name + ' CONFIGURED AS STARTED')
  137. if all_services[service_full_name]['realtime_state'] == 'STARTED' and report['status']['realTimeState'] == 'STARTED': # realstate was started and still started
  138. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
  139. logger.info('SERVICE OK: ' + service_full_name)
  140. if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STARTED': # realtime state was stopped and is now started
  141. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
  142. logger.info('SERVICE RECOVERED: ' + service_full_name)
  143. # encountered down service
  144. if all_services[service_full_name]['realtime_state'] == 'STARTED' and report['status']['realTimeState'] == 'STOPPED' and all_services[service_full_name]['status'] == 'OK': # realtime state was started and is now stopped first time
  145. logger.info('FIRST SERVICE PROBLEM: ' + service_full_name + ' - ATTEMPTING TO START SERVICE')
  146. if (startService(service_full_name,token) == True):
  147. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'1',int(time.time()))
  148. logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
  149. else:
  150. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
  151. logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
  152. email(service_full_name, 'fail')
  153. if (all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STOPPED' and all_services[service_full_name]['status'] == 1): # realtime state was stopped and is still stopped - not first time
  154. service_down_num=all_services[service_full_name]['status'] + 1
  155. logger.info('INCREASING DOWNTIME COUNT FOR ' + service_full_name + ' to ' + service_down_num)
  156. if (service_down_num <= max_down_num):
  157. logger.debug('DOWNTIME COUNT OF: ' + service_down_num + ' FOR ' + service_full_name + ' IS LESS THAN OR EQUAL TO max_down_num OF: ' + max_down_num)
  158. logger.info('ADDITIONAL SERVICE PROBLEM: ' + service_full_name + ' COUNT: ' + service_down_num + ' - ATTEMPTING TO START SERVICE')
  159. if (startService(service_full_name,token) == True):
  160. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],service_down_num,int(time.time()))
  161. logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
  162. else:
  163. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
  164. logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
  165. email(service_full_name, 'fail')
  166. else:
  167. difference = int(time.time()) - all_services[service_full_name]['downtime']
  168. logger.debug('DIFFERENCE BETWEEN FIRST DOWNTIME AND CURRENT DOWNTIME FOR ' + service_full_name + difference + ': SECONDS')
  169. if difference >= max_down_time:
  170. logger.debug('DIFFERENCE IS GREATER THAN MINIMUM THRESHOLD FOR ' + service_full_name)
  171. if (startService(service_full_name,token) == True):
  172. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],service_down_num,int(time.time()))
  173. logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
  174. else:
  175. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
  176. logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
  177. email(service_full_name, 'fail')
  178. else:
  179. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
  180. logger.info('PROBLEM: SERVICE DOWN MORE THAN MAX THRESHOLDS: ' + service_full_name + ' COUNT: ' + service_down_num + 'DIFFERENCE: ' + difference + ' MARKING SERVICE AS BROKEN')
  181. email(service_full_name, 'down')
  182. elif report['status']['configuredState'] == 'STOPPED': # service configured to be stopped
  183. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
  184. logger.info('PROBLEM: SERVICE MANUALLY DOWN: ' + service_full_name)
  185. else:
  186. service_status={service_full_name: {'configured_state': report['status']['configuredState'], 'realtime_state': report['status']['realTimeState'], 'status': 'BROKEN'}}
  187. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
  188. logger.info('PROBLEM: SERVICE IS UNKNOWN CONFIGURED STATE: ' + service_full_name) # service configured state not started or stopped
  189. email(service_full_name, 'unknown')
  190. else:
  191. update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
  192. logger.info('SLEEPING ' + str(script_interval) + ' SECONDS')
  193. time.sleep(script_interval)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement