Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import atexit
- import getpass
- import time
- from pyVim import connect
- from pyVmomi import vmodl
- from pyVmomi import vim
- import tools.cli as cli
- vm = {}
- counterIDs = {}
- cpuHighCounter = {}
- cpuLowCounter = {}
- memCounter = {}
- netCounter = {}
- maxCpuUsage = {}
- failCounter = {}
- cpuUsageInMHz = 5
- memActive = 65545
- netBytesTx = 196619
- def resetCounter(vmname):
- cpuHighCounter[vmname] = 0
- cpuLowCounter[vmname] = 0
- memCounter[vmname] = 0
- netCounter[vmname] = 0
- def detection(vmname, heartbeat):
- global cpuHighCounter
- global cpuLowCounter
- global memCounter
- global netCounter
- if heartbeat == "red" and cpuHighCounter[vmname] >= 15 and memCounter >= 15: #5 minutes:
- print_error(vmname, "kernel panic")
- elif cpuHighCounter[vmname] >= 15 and heartbeat != "red":
- print_error(vmname, "kernel loop")
- # elif cpuHighCounter[vmname] >= 15 and memCounter >= 15: #5 minutes
- # print_error(vmname, "kernel panic")
- elif cpuLowCounter[vmname] >= 60: #20 minutes
- print_error(vmname, "long time idle vm")
- def init(service_instance):
- global vm
- global counterIDs
- global cpuHighCounter
- global cpuLowCounter
- global memCounter
- global netCounter
- global maxCpuUsage
- global failCounter
- content = service_instance.RetrieveContent()
- container = content.rootFolder # starting point to look into
- perfManager = content.perfManager
- viewType = [vim.VirtualMachine] # object types to look for
- recursive = True # whether we should look into it recursively
- containerView = content.viewManager.CreateContainerView(
- container, viewType, recursive)
- children = containerView.view
- for virtual_machine in children:
- vmname = virtual_machine.summary.config.name
- vm[vmname] = virtual_machine
- cpuHighCounter[vmname] = 0
- cpuLowCounter[vmname] = 0
- memCounter[vmname] = 0
- netCounter[vmname] = 0
- failCounter[vmname] = 0
- maxCpuUsage[vmname] = virtual_machine.summary.runtime.maxCpuUsage / virtual_machine.summary.config.numCpu
- counterIDs = {cpuUsageInMHz, memActive, netBytesTx} #cpu usageinmhz, memory active, network bytesTx
- def refresh(service_instance):
- global counterIDs
- global cpuHighCounter
- global cpuLowCounter
- global memCounter
- global netCounter
- global maxCpuUsage
- global failCounter
- content = service_instance.RetrieveContent()
- container = content.rootFolder # starting point to look into
- perfManager = content.perfManager
- viewType = [vim.VirtualMachine] # object types to look for
- recursive = True # whether we should look into it recursively
- counterInfo = {}
- for c in perfManager.perfCounter:
- prefix = c.groupInfo.key
- fullName = c.groupInfo.key + "." + c.nameInfo.key + "." + c.rollupType
- counterInfo[fullName] = c.key
- containerView = content.viewManager.CreateContainerView(
- container, viewType, recursive)
- children = containerView.view
- # Loop through all the VMs
- for child in children:
- vmname = child.summary.config.name
- # Using the IDs form a list of MetricId
- # objects for building the Query Spec
- metricIDs = [vim.PerformanceManager.MetricId(counterId=c,
- instance="*")
- for c in counterIDs]
- # Build the specification to be used
- # for querying the performance manager
- spec = vim.PerformanceManager.QuerySpec(maxSample=1,
- entity=child,
- metricId=metricIDs)
- # Query the performance manager
- # based on the metrics created above
- result = perfManager.QueryStats(querySpec=[spec])
- # Loop through the results and print the output
- for r in result:
- vmname = child.summary.config.name
- #print("VM name: " + child.summary.config.name)
- #print("Heartbeat: "+child.guestHeartbeatStatus)
- cpuUsage = []
- for val in result[0].value:
- if val.id.counterId == cpuUsageInMHz:
- cpuUsage.append(val.value[0])
- if val.id.counterId == memActive:
- if val.value[0] == 0:
- memCounter[vmname] += 1
- if val.id.counterId == netBytesTx:
- if val.value[0] == 0:
- netCounter[vmname] += 1
- #get max cpu usage
- cpumax = max(cpuUsage)
- cpuUsage.remove(cpumax)
- cpumax = max(cpuUsage)
- #get min cpu usage
- cpumin = min(cpuUsage)
- #calculate cpu usage
- if (float(cpumax) / float(maxCpuUsage[vmname])) >= 0.95: #high cpu usage
- cpuHighCounter[vmname] += 1
- failCounter[vmname] = 0
- #print(vmname + " cpu high "+str(cpuHighCounter[vmname]))
- elif (float(cpumin) / float(maxCpuUsage[vmname])) <= 0.03: #low cpu usage
- cpuLowCounter[vmname] += 1
- failCounter[vmname] = 0
- #print(vmname + " cpu low "+str(cpuLowCounter[vmname]))
- else:
- failCounter[vmname] += 1
- if failCounter[vmname] >= 5:
- failCounter[vmname] = 0
- resetCounter(vmname)
- #detection
- detection(vmname, child.guestHeartbeatStatus)
- def print_error(vmname, error_msg):
- print(vmname + " \t\t: "+error_msg)
- def main():
- args = cli.get_args()
- global counterIDs
- try:
- if args.disable_ssl_verification:
- service_instance = connect.SmartConnectNoSSL(host=args.host,
- user=args.user,
- pwd=args.password,
- port=int(args.port))
- else:
- service_instance = connect.SmartConnect(host=args.host,
- user=args.user,
- pwd=args.password,
- port=int(args.port))
- atexit.register(connect.Disconnect, service_instance)
- init(service_instance)
- while 1:
- print(time.strftime("%Y-%m-%d %H:%M:%S"))
- refresh(service_instance)
- print("==================================================\n")
- time.sleep(20)
- except vmodl.MethodFault as error:
- print("Caught vmodl fault : " + error.msg)
- return -1
- return 0
- # Start program
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment