Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # Program name: nmon2csv.py
- # Compatibility: Python 2x
- # Purpose - convert nmon files into csv data for Splunk Nmon App, see https://apps.splunk.com/app/1753
- # Author - Guilhem Marchand
- # Disclaimer: This script had been designed to used by Splunk Archive Processor in the context of the Nmon Splunk App, see above
- # Date - July 2014
- # Releases Notes:
- # - 07/27/2014, Guilhem Marchand: Initial version
- # Version: 1.0.0
- # Load libs
- from __future__ import print_function
- import sys
- import re
- import os
- import time
- import datetime
- #################################################
- ## Variables ##
- #################################################
- # Current date
- now = time.strftime("%c")
- # Verify SPLUNK_HOME environment variable is available
- try:
- os.environ["SPLUNK_HOME"]
- except KeyError:
- print (now + ', ' + 'ERROR:' + ' Please set the environment variable SPLUNK_HOME')
- sys.exit(1)
- # SPLUNK_HOME environment variable
- SPLUNK_HOME = os.environ['SPLUNK_HOME']
- # APP Directory for standard nmon, TA-nmon, PA-nmon
- NMON_APP = SPLUNK_HOME + '/etc/apps/nmon'
- TA_NMON_APP = SPLUNK_HOME + '/etc/apps/TA-nmon'
- PA_NMON_APP = SPLUNK_HOME + '/etc/slave-apps/PA-nmon'
- APP = ''
- # Verify APP exist
- if os.path.exists(NMON_APP):
- APP = NMON_APP
- elif os.path.exists(TA_NMON_APP):
- APP = TA_NMON_APP
- elif os.path.exists(PA_NMON_APP):
- APP = PA_NMON_APP
- else:
- print (now + ', ' + 'ERROR:' + ' The main APP directory could not be verified, is nmon / TA-nmon / PA-nmon installed ?')
- sys.exit(1)
- # APP_VAR directory
- APP_VAR = APP + '/var'
- if not os.path.exists(APP_VAR):
- os.mkdir(APP_VAR)
- # ID reference file
- ID_REF = APP_VAR + '/id_reference.txt'
- # CSV Perf data repository
- DATA_DIR = APP_VAR + '/csv_repository/'
- if not os.path.exists(DATA_DIR):
- os.mkdir(DATA_DIR)
- # CSV output repository
- CONFIG_DIR = APP_VAR + '/config_repository/'
- if not os.path.exists(CONFIG_DIR):
- os.mkdir(CONFIG_DIR)
- ####################################################################
- ############# Functions ############
- ####################################################################
- # Transposer function, used to transpose data for dynamic sections (eg. sections containing devices)
- class transposer(object):
- def _do_loop(self):
- line_number = 0
- for line in self.fin:
- line_number += 1
- line = line.strip();
- if line.strip().startswith('"No."'):
- self.keys = line.strip().split(',')[2:]
- elif line.startswith('"'):
- elts = line.strip().split(',')
- if len(elts) == (len(self.keys) + 2):
- dat = elts[1]
- ix = 0
- for val in elts[2:]:
- print(dat, self.keys[ix], val, sep=',', file = self.out)
- ix += 1
- else:
- raise Exception("Syntax error line %d expected %d values found %d"
- % (line_number, len(self.keys), len(elts) - 2))
- def transpose(self, ficin, ficout):
- with open(ficin) as fin:
- with open(ficout, 'w') as fout:
- fout.write('"time","device",value\n')
- self.do_transpose(fin, fout)
- def do_transpose(self, fin, fout):
- self.fin = fin
- self.out = fout
- self.keys = []
- self._do_loop()
- ####################################################################
- ############# Main Program ############
- ####################################################################
- #################################
- # Retrieve NMON data from stdin #
- #################################
- # Read nmon data from stdin
- data = sys.stdin.readlines()
- # Number of lines read
- nbr_lines = len(data)
- # Show current time and number of lines
- print (now)
- print ("Reading NMON data:", nbr_lines, "lines")
- ##################################################
- # Extract Various data from AAA and BBB sections #
- ##################################################
- # Set some default values
- SN = "-1"
- for line in data:
- # Set HOSTNAME
- host = re.match( r'^(AAA)\,(host)\,(.+)\n', line)
- if host:
- HOSTNAME = host.group(3)
- print ("HOSTNAME:", HOSTNAME)
- # Set VERSION
- version = re.match( r'^(AAA)\,(version)\,(.+)\n', line)
- if version:
- VERSION = version.group(3)
- print ("NMON VERSION:", VERSION)
- # Set SN
- sn = re.match( r'^(BBB.+)(systemid.+)(IBM,)(\w+)(.+)\n', line)
- if sn:
- SN = sn.group(4)
- print ("SN:", SN)
- # Set DATE
- date = re.match( r'^(AAA)\,(date)\,(.+)\n', line)
- if date:
- DATE = date.group(3)
- print ("DATE:", DATE)
- # Set date details
- date_details = re.match( r'(AAA,date,)([0-9]+)[\/|\-]([a-zA-Z-0-9]+)[\/|\-]([0-9]+)', line)
- if date_details:
- day = date_details.group(2)
- month = date_details.group(3)
- year = date_details.group(4)
- # Set TIME
- time = re.match( r'^(AAA)\,(time)\,(.+)\n', line)
- if time:
- TIME = time.group(3)
- print ("TIME:", TIME)
- # Set TIME DETAILS
- time_details = re.match( r'(AAA,time,)([0-9]+).([0-9]+).([0-9]+)', line)
- if time_details:
- hour = time_details.group(2)
- minute = time_details.group(3)
- second = time_details.group(4)
- # Set INTERVAL
- interval = re.match( r'^(AAA)\,(interval)\,(.+)\n', line)
- if interval:
- INTERVAL = interval.group(3)
- print ("INTERVAL:", INTERVAL)
- # Set SNAPSHOTS
- snapshots = re.match( r'^(AAA)\,(snapshots)\,(.+)\n', line)
- if snapshots:
- SNAPSHOTS = snapshots.group(3)
- print ("SNAPSHOTS:", SNAPSHOTS)
- # If SN could be defined, not an AIX host, SN == HOSTNAME
- if SN == '-1':
- SN = HOSTNAME
- ###############
- # ID Check #
- ###############
- # This section prevents Splunk from generating duplicated data for the same Nmon file
- # While using the archive mode, Splunk may open several times the same file
- # If the Nmon file id is already present in our reference file, then we have already proceeded this Nmon and nothing has to be done
- # NMON file id (concatenation of ids)
- id = DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN
- print ("NMON ID: ", id)
- # Open reference file for reading, if exists already
- if os.path.isfile(ID_REF):
- with open(ID_REF, "r") as ref:
- for line in ref:
- # Search for this ID
- idmatch = re.match ( id , line)
- if idmatch:
- print ("NMON data previously proceeded, nothing more to do")
- sys.exit(0)
- # If we here, then this file has not been previously proceeded
- # Open reference file for writing
- with open(ID_REF, "w") as ref:
- # write id
- ref.write( id + '\n')
- ###############################
- # NMON Structure Verification #
- ###############################
- # The purpose of this section is to achieve some structure verification of the Nmon file
- # to prevent data inconsistency
- for line in data:
- # Verify we do not have any line that contain ZZZZ without beginning the line by ZZZZ
- # In such case, the nmon data is bad and buggy, converting it would generate
- # Search for ZZZZ truncated lines (eg. line containing ZZZZ pattern BUT not beginning the line)
- ZZZZ_truncated = re.match( r'.+ZZZZ,', line)
- if ZZZZ_truncated:
- print ('ERROR:' + 'Detected Bad Nmon structure, found ZZZZ lines truncated !')
- print ('ZZZZ lines contains the event timestamp and should always begin the line.')
- print ('Please check how this nmon file is being generated, and upgrade nmon to a working version if required.')
- print ('Ignoring nmon data.')
- sys.exit(1)
- # Search for old time format (eg. Nmon version V9 and prior)
- time_oldformat = re.match( r'(AAA,date,)([0-9]+)\/([0-9]+)\/([0-9]+)', line)
- if time_oldformat:
- print ('INFO:' + 'Detected old Nmon version using old Date format (dd/mm/yy)')
- day = time_oldformat.group(2)
- month = time_oldformat.group(3)
- year = time_oldformat.group(4)
- # Convert %y to %Y
- year = datetime.datetime.strptime(year, '%y').strftime('%Y')
- # Convert from months numbers to months name for compatibility with later Nmon versions
- # Note: we won't use here datetime to avoid issues with locale names of months
- month_numbers = {'01': 'JAN', '02': 'FEB', '03': 'MAR', '04': 'APR', '05': 'MAY', '06': 'JUN', '07': 'JUL', '08': 'AUG', '09': 'SEP', '10': 'OCT', '11': 'NOV', '12': 'DEC'}
- for k, v in month_numbers.items():
- month = month.replace(k, v)
- DATE = day + '-' + month + '-' + year
- print ('INFO:' + 'Date converted to: ' + DATE)
- # End for
- ####################
- # Write CONFIG csv #
- ####################
- # Extraction of the AAA and BBB sections with a supplementary header to allow Splunk identifying the host and timestamp
- # Set section
- section = "CONFIG"
- # Set output file
- config_output = CONFIG_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '.config.csv'
- # Open config output for writing
- with open(config_output, "w") as config:
- config.write( 'CONFIG' + ',' + DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN + '\n')
- for line in data:
- # Extract AAA and BBB sections, and write to config output
- AAABBB = re.match( r'^[AAA|BBB].+', line)
- if AAABBB:
- config.write(line),
- # Open config output for reading and show number of line we extracted
- with open(config_output, "r") as config:
- num_lines = sum(1 for line in config)
- print ("CONFIG section: Wrote", num_lines, "lines")
- ##########################
- # Write PERFORMANCE DATA #
- ##########################
- ###################
- # Static Sections : Header is dynamic but no devices notion (disks, interfaces...) and there is no needs to transpose data
- ###################
- static_section = ["LPAR","CPU_ALL","FILE","MEM","PAGE","MEMNEW","MEMUSE","PROC","PROCSOL","VM"]
- for section in static_section:
- # Set output file
- currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
- # Open output for writing
- with open(currsection_output, "w") as currsection:
- for line in data:
- # Extract sections, and write to output
- myregex = r'^' + section + '|ZZZZ.+'
- find_section = re.match( myregex, line)
- if find_section:
- # csv header
- # Replace some symbols
- line=re.sub("%",'_PCT',line)
- line=re.sub(" ",'_',line)
- # Extract header excluding data that always has Txxxx for timestamp reference
- myregex = '(' + section + ')\,([^T].+)'
- fullheader_match = re.search( myregex, line)
- if fullheader_match:
- fullheader = fullheader_match.group(2)
- header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
- if header_match:
- header = header_match.group(2)
- # Write header
- currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),
- # Extract timestamp
- # Nmon V9 and prior do not have date in ZZZZ
- # If unavailable, we'll use the global date (AAA,date)
- ZZZZ_DATE = '-1'
- ZZZZ_TIME = '-1'
- # For Nmon V10 and more
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_DATE = timestamp_match.group(3)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # For Nmon V9 and less
- if ZZZZ_DATE == '-1':
- ZZZZ_DATE = DATE
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # Extract Data
- myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
- perfdata_match = re.match( myregex, line)
- if perfdata_match:
- perfdata = perfdata_match.group(2)
- # Write perf data
- currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),
- # End for
- # Open output for reading and show number of line we extracted
- with open(currsection_output, "r") as currsection:
- num_lines = sum(1 for line in currsection)
- print (section + " section: Wrote", num_lines, "lines")
- # End for
- ###################
- # TOP section: has a specific structure with uncommon fields, needs to be treated separately
- # Notably, it has a Time fields (containing the ZZZZ ref ID) we don't need to keep
- ###################
- static_section = ["TOP"]
- for section in static_section:
- # Set output file
- currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
- # Open output for writing
- with open(currsection_output, "w") as currsection:
- for line in data:
- # Extract sections, and write to output
- myregex = r'^' + 'TOP,.PID' + '|ZZZZ.+'
- find_section = re.match( myregex, line)
- if find_section:
- # csv header
- # Replace some symbols
- line=re.sub("%",'pct',line)
- line=re.sub(" ",'_',line)
- line=re.sub("\+",'',line)
- # Extract header excluding data that always has Txxxx for timestamp reference
- myregex = '(' + section + ')\,([^T].+)'
- fullheader_match = re.search( myregex, line)
- if fullheader_match:
- fullheader = fullheader_match.group(2)
- #currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + fullheader + '\n'),
- header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
- if header_match:
- header_part1 = header_match.group(1)
- header_part2 = header_match.group(3)
- header = header_part1 + header_part2
- # Write header
- currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),
- # Extract timestamp
- # Nmon V9 and prior do not have date in ZZZZ
- # If unavailable, we'll use the global date (AAA,date)
- ZZZZ_DATE = '-1'
- ZZZZ_TIME = '-1'
- # For Nmon V10 and more
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_DATE = timestamp_match.group(3)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # For Nmon V9 and less
- if ZZZZ_DATE == '-1':
- ZZZZ_DATE = DATE
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # Extract Data
- perfdata_match = re.match( '^TOP\,([0-9]+)\,(T\d+)\,(.+)\n', line)
- if perfdata_match:
- perfdata_part1 = perfdata_match.group(1)
- perfdata_part2 = perfdata_match.group(3)
- perfdata = perfdata_part1 + perfdata_part2
- # Write perf data
- currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),
- # End for
- # Open output for reading and show number of line we extracted
- with open(currsection_output, "r") as currsection:
- num_lines = sum(1 for line in currsection)
- print (section + " section: Wrote", num_lines, "lines")
- # End for
- ###################
- # Dynamic Sections : data requires to be transposed to be exploitable within Splunk
- ###################
- dynamic_section = ["DISKBUSY"]
- for section in dynamic_section:
- # Set output file
- currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
- # Open output for writing
- with open(currsection_output, "w") as currsection:
- for line in data:
- # Extract sections, and write to output
- myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
- find_section = re.match( myregex, line)
- if find_section:
- # csv header
- # Replace some symbols
- line=re.sub("%",'_PCT',line)
- line=re.sub(" ",'_',line)
- # Extract header excluding data that always has Txxxx for timestamp reference
- myregex = '(' + section + ')\,([^T].+)'
- fullheader_match = re.search( myregex, line)
- if fullheader_match:
- fullheader = fullheader_match.group(2)
- header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
- if header_match:
- header = header_match.group(2)
- # Write header
- currsection.write('time' + ',' + header + '\n'),
- # Extract timestamp
- # Nmon V9 and prior do not have date in ZZZZ
- # If unavailable, we'll use the global date (AAA,date)
- ZZZZ_DATE = '-1'
- ZZZZ_TIME = '-1'
- # For Nmon V10 and more
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_DATE = timestamp_match.group(3)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # For Nmon V9 and less
- if ZZZZ_DATE == '-1':
- ZZZZ_DATE = DATE
- timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
- if timestamp_match:
- ZZZZ_TIME = timestamp_match.group(2)
- ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
- # Extract Data
- myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
- perfdata_match = re.match( myregex, line)
- if perfdata_match:
- perfdata = perfdata_match.group(2)
- # Write perf data
- currsection.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
- # End for
- # Open output for reading and show number of line we extracted
- with open(currsection_output, "r") as currsection:
- num_lines = sum(1 for line in currsection)
- print (section + " section: Wrote", num_lines, "lines")
- # End for
Advertisement
Add Comment
Please, Sign In to add comment