Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3.5
- import logging
- import urllib.request
- import urllib.parse
- import json
- import time
- import smtplib
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- # Base URL and Authentication
- baseUrl = "https://gis.oiigds.com/arcgis/"
- username = "arc"
- password = "*********"
- # Thresholds
- max_down_num = 2
- max_down_time = 300
- script_interval = 30
- # Logging
- #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
- #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
- #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)-5s: %(message)s', datefmt='%Y-%m-%d_%H:%M:%S')
- logger = logging.getLogger(__name__)
- logger.setLevel(logging.INFO)
- handler = logging.FileHandler('arcmon.log')
- handler.setLevel(logging.INFO)
- #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- # get token
- def getToken():
- params = urllib.parse.urlencode({'username' : username, 'password' : password, 'client' : 'requestip', 'expiration': 60, 'f' : 'json'})
- params = params.encode('ascii')
- try:
- generateToken=json.loads(urllib.request.urlopen(baseUrl + 'admin/generateToken', params).read().decode('utf-8'))
- if 'token' in generateToken: return generateToken['token']
- except:
- print('Could not parse json from token response')
- # start service
- def startService(service_full_name,token):
- folder,servicename,servicetype = service_full_name.split('.')
- params = urllib.parse.urlencode({'f' : 'json', 'token': token})
- params = params.encode('ascii')
- try:
- start_post=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/' + folder + '/' + servicename + '.' + servicetype + '/start', params).read().decode('utf-8'))
- if start_post['status'] == 'success': return True
- except:
- print('Could not parse json from start service response')
- # send email
- def email(service_full_name,type):
- from_address = 'noreply@oceaneering.com'
- to_address = 'dcorrigan@oceaneering.com'
- message = MIMEMultipart()
- message['From'] = from_address
- message['To'] = to_address
- message['Subject'] = 'ArcGIS Monitor Alert'
- if (type == 'down'):
- body = 'The service: ' + service_full_name + ' is down more than ' + str(max_down_num) + ' times.'
- if (type == 'up'):
- body = 'The service: ' + service_full_name + ' has recovered.'
- if (type == 'fail'):
- body = 'The service: ' + service_full_name + ' was attempted to be restarted, but failed restarting.'
- if (type == 'unknown'):
- body = 'The service: ' + service_full_name + ' is in an unknown state.'
- message.attach(MIMEText(body, 'plain'))
- server = smtplib.SMTP('localhost', 25)
- text = message.as_string()
- server.sendmail(from_address, to_address, text)
- server.quit()
- # update all_services dictionary
- def update_all_services(service_full_name,configured_state,realtime_state,status,downtime):
- status=str(status)
- downtime=str(downtime)
- logger.info('NEW STATUS: ' + service_full_name + ' - configured_state: ' + configured_state + ' - realtime_state: ' + realtime_state + ' - status: ' + status + ' - downtime: ' + downtime)
- service_status={service_full_name: {'configured_state': configured_state, 'realtime_state': realtime_state, 'status': status, 'downtime': downtime}}
- logger.debug('UPDATING all_services dist')
- all_services.update(service_status)
- all_services={}
- logger.info('SCRIPT START')
- while True:
- logger.debug('REQUEST TOKEN')
- token=getToken()
- logger.debug('TOKEN: ' + token)
- logger.debug('TOKEN ACQUIRED')
- try:
- logger.debug('ATTEMPING TO POLL FOR LIST OF FOLDERS AT REST ENDPOINT: ' + baseUrl + 'admin/services/?f=json&token=' + token)
- folders=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/?f=json&token=' + token).read().decode('utf-8'))
- logger.debug('FOLDER JSON OUTPUT: ' + str(folders))
- except:
- logger.exception('PROBLEM: UNABLE TO PULL FOLDER LIST')
- for folder in folders['folders']:
- if folder != 'System' and folder != 'Utilities':
- logger.debug('SKIPPING System and Utilities folders')
- logger.info('CHECKING ' + folder)
- try:
- logger.debug('ATTEMPING TO POLL LIST OF SERVICE REPORTS FOR FOLDER \'' + folder + '\' AT REST ENDPOINT: ' + baseUrl + 'admin/services/' + folder + '/report?f=pjson&token=' + token)
- services=json.loads(urllib.request.urlopen(baseUrl + 'admin/services/' + folder + '/report?f=pjson&token=' + token).read().decode('utf-8'))
- logger.debug(services)
- except:
- logger.exception('PROBLEM: UNABLE TO PULL REPORT FOR ' + str(folder))
- for report in services['reports']:
- logger.debug('REPORT OUTPUT: ' + str(report))
- service_name=report['instances']['serviceName']
- service_type=report['instances']['type']
- service_full_name=folder + '.' + service_name + '.' + service_type
- logger.debug('SERVICE NAME: ' + service_name + ' SERVICE TYPE: ' + service_type + ' SERVICE FULL NAME: ' + service_full_name)
- logger.debug('all_services dict keys' + str(all_services.keys()))
- if service_full_name in all_services.keys(): # if previous status collected
- logger.debug(service_full_name + ' EXISTS IN ' + str(all_services.keys()) + ' dict')
- logger.debug('NEW STATE: ' + service_full_name + ' CONFIGURED: ' + report['status']['configuredState'] + ' REALTIME: ' + report['status']['realTimeState'])
- if all_services[service_full_name]['status'] == 'BROKEN': # service was marked broken
- logger.debug(all_services[service_full_name]['status'] + ' WAS MARKED AS BROKEN')
- if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STARTED':
- logger.info('RECOVERED STATE FOR BROKEN SERVICE ' + service_full_name)
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
- logger.info('EMAILING RECOVERY EMAIL for service ' + service_full_name)
- email(service_full_name, 'up')
- logger.debug('CHECKING IF SERVICE \'' + service_full_name + '\' BROKEN SERVICE stored realtime_state is STOPPED and new realTimeState is STOPPED')
- if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STOPPED':
- logger.info('STILL BROKEN STATE FOR BROKEN SERVICE ' + service_full_name + ' LAST REALTIME: STOPPED - NEW REALTIME: STARTED')
- else: # service was not marked broken
- logger.debug('CHECKING STATUS FOR NON-BROKEN SERVICE: ' + service_full_name)
- if report['status']['configuredState'] == 'STARTED': # service configured to be started
- logger.debug('SERVICE ' + service_full_name + ' CONFIGURED AS STARTED')
- if all_services[service_full_name]['realtime_state'] == 'STARTED' and report['status']['realTimeState'] == 'STARTED': # realstate was started and still started
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
- logger.info('SERVICE OK: ' + service_full_name)
- if all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STARTED': # realtime state was stopped and is now started
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
- logger.info('SERVICE RECOVERED: ' + service_full_name)
- # encountered down service
- if all_services[service_full_name]['realtime_state'] == 'STARTED' and report['status']['realTimeState'] == 'STOPPED' and all_services[service_full_name]['status'] == 'OK': # realtime state was started and is now stopped first time
- logger.info('FIRST SERVICE PROBLEM: ' + service_full_name + ' - ATTEMPTING TO START SERVICE')
- if (startService(service_full_name,token) == True):
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'1',int(time.time()))
- logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
- else:
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
- logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
- email(service_full_name, 'fail')
- if (all_services[service_full_name]['realtime_state'] == 'STOPPED' and report['status']['realTimeState'] == 'STOPPED' and all_services[service_full_name]['status'] == 1): # realtime state was stopped and is still stopped - not first time
- service_down_num=all_services[service_full_name]['status'] + 1
- logger.info('INCREASING DOWNTIME COUNT FOR ' + service_full_name + ' to ' + service_down_num)
- if (service_down_num <= max_down_num):
- logger.debug('DOWNTIME COUNT OF: ' + service_down_num + ' FOR ' + service_full_name + ' IS LESS THAN OR EQUAL TO max_down_num OF: ' + max_down_num)
- logger.info('ADDITIONAL SERVICE PROBLEM: ' + service_full_name + ' COUNT: ' + service_down_num + ' - ATTEMPTING TO START SERVICE')
- if (startService(service_full_name,token) == True):
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],service_down_num,int(time.time()))
- logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
- else:
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
- logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
- email(service_full_name, 'fail')
- else:
- difference = int(time.time()) - all_services[service_full_name]['downtime']
- logger.debug('DIFFERENCE BETWEEN FIRST DOWNTIME AND CURRENT DOWNTIME FOR ' + service_full_name + difference + ': SECONDS')
- if difference >= max_down_time:
- logger.debug('DIFFERENCE IS GREATER THAN MINIMUM THRESHOLD FOR ' + service_full_name)
- if (startService(service_full_name,token) == True):
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],service_down_num,int(time.time()))
- logger.info('SERVICE SUCCESSFULLY STARTED: ' + service_full_name)
- else:
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
- logger.info('PROBLEM: SERVICE FAILED TO START: ' + service_full_name)
- email(service_full_name, 'fail')
- else:
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
- logger.info('PROBLEM: SERVICE DOWN MORE THAN MAX THRESHOLDS: ' + service_full_name + ' COUNT: ' + service_down_num + 'DIFFERENCE: ' + difference + ' MARKING SERVICE AS BROKEN')
- email(service_full_name, 'down')
- elif report['status']['configuredState'] == 'STOPPED': # service configured to be stopped
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
- logger.info('PROBLEM: SERVICE MANUALLY DOWN: ' + service_full_name)
- else:
- service_status={service_full_name: {'configured_state': report['status']['configuredState'], 'realtime_state': report['status']['realTimeState'], 'status': 'BROKEN'}}
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'BROKEN',int(time.time()))
- logger.info('PROBLEM: SERVICE IS UNKNOWN CONFIGURED STATE: ' + service_full_name) # service configured state not started or stopped
- email(service_full_name, 'unknown')
- else:
- update_all_services(service_full_name,report['status']['configuredState'],report['status']['realTimeState'],'OK','NA')
- logger.info('SLEEPING ' + str(script_interval) + ' SECONDS')
- time.sleep(script_interval)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement