nmon2csv.py

#!/usr/bin/python

# Program name: nmon2csv.py
# Compatibility: Python 2x
# Purpose - convert nmon files into csv data for Splunk Nmon App, see https://apps.splunk.com/app/1753
# Author - Guilhem Marchand
# Disclaimer: This script had been designed to used by Splunk Archive Processor in the context of the Nmon Splunk App, see above
# Date - July 2014

# Releases Notes:

# - 07/27/2014, Guilhem Marchand: Initial version


# Version: 1.0.0

# Load libs

from __future__ import print_function

import sys
import re
import os
import time
import datetime


#################################################
## 	Variables							           ##
#################################################

# Current date
now = time.strftime("%c")

# Verify SPLUNK_HOME environment variable is available
try:
   os.environ["SPLUNK_HOME"]
except KeyError:
   print (now + ', ' + 'ERROR:' + ' Please set the environment variable SPLUNK_HOME')
   sys.exit(1)

# SPLUNK_HOME environment variable
SPLUNK_HOME = os.environ['SPLUNK_HOME']

# APP Directory for standard nmon, TA-nmon, PA-nmon

NMON_APP = SPLUNK_HOME + '/etc/apps/nmon'
TA_NMON_APP = SPLUNK_HOME + '/etc/apps/TA-nmon'
PA_NMON_APP = SPLUNK_HOME + '/etc/slave-apps/PA-nmon'
APP = ''

# Verify APP exist
if os.path.exists(NMON_APP):
	APP = NMON_APP
elif os.path.exists(TA_NMON_APP):
	APP = TA_NMON_APP
elif os.path.exists(PA_NMON_APP):
	APP = PA_NMON_APP
else:
   print (now + ', ' + 'ERROR:' + ' The main APP directory could not be verified, is nmon / TA-nmon / PA-nmon installed ?')
   sys.exit(1)

# APP_VAR directory
APP_VAR = APP + '/var'
if not os.path.exists(APP_VAR):
	os.mkdir(APP_VAR)

# ID reference file
ID_REF = APP_VAR + '/id_reference.txt'

# CSV Perf data repository
DATA_DIR = APP_VAR + '/csv_repository/'
if not os.path.exists(DATA_DIR):
	os.mkdir(DATA_DIR)

# CSV output repository
CONFIG_DIR = APP_VAR + '/config_repository/'
if not os.path.exists(CONFIG_DIR):
	os.mkdir(CONFIG_DIR)


####################################################################
#############		Functions	 								  ############
####################################################################

# Transposer function, used to transpose data for dynamic sections (eg. sections containing devices)

class transposer(object):
    def _do_loop(self):
        line_number = 0
        for line in self.fin:
            line_number += 1
            line = line.strip();
            if line.strip().startswith('"No."'):
                self.keys = line.strip().split(',')[2:]
            elif line.startswith('"'):
                elts = line.strip().split(',')
                if len(elts) == (len(self.keys) + 2):
                    dat = elts[1]
                    ix = 0
                    for val in elts[2:]:
                        print(dat, self.keys[ix], val, sep=',', file = self.out)
                        ix += 1
                else:
                    raise Exception("Syntax error line %d expected %d values found %d"
                                    % (line_number, len(self.keys), len(elts) - 2))

    def transpose(self, ficin, ficout):
        with open(ficin) as fin:
            with open(ficout, 'w') as fout:
            	fout.write('"time","device",value\n')
               	self.do_transpose(fin, fout)
    def do_transpose(self, fin, fout):
        self.fin = fin
        self.out = fout
        self.keys = []
        self._do_loop()


####################################################################
#############		Main Program 								  ############
####################################################################

#################################
# Retrieve NMON data from stdin #
#################################

# Read nmon data from stdin

data = sys.stdin.readlines()

# Number of lines read
nbr_lines = len(data)

# Show current time and number of lines
print (now)
print ("Reading NMON data:", nbr_lines, "lines")

##################################################
# Extract Various data from AAA and BBB sections #
##################################################

# Set some default values
SN = "-1"

for line in data:

	# Set HOSTNAME
	host = re.match( r'^(AAA)\,(host)\,(.+)\n', line)
	if host:
			HOSTNAME = host.group(3)
			print ("HOSTNAME:", HOSTNAME)

	# Set VERSION
	version = re.match( r'^(AAA)\,(version)\,(.+)\n', line)
	if version:
			VERSION = version.group(3)
			print ("NMON VERSION:", VERSION)

	# Set SN
	sn = re.match( r'^(BBB.+)(systemid.+)(IBM,)(\w+)(.+)\n', line)
	if sn:
			SN = sn.group(4)
			print ("SN:", SN)

	# Set DATE
	date = re.match( r'^(AAA)\,(date)\,(.+)\n', line)
	if date:
			DATE = date.group(3)
			print ("DATE:", DATE)

	# Set date details
	date_details = re.match( r'(AAA,date,)([0-9]+)[\/|\-]([a-zA-Z-0-9]+)[\/|\-]([0-9]+)', line)
	if date_details:
			day = date_details.group(2)
			month = date_details.group(3)
			year = date_details.group(4)

	# Set TIME
	time = re.match( r'^(AAA)\,(time)\,(.+)\n', line)
	if time:
			TIME = time.group(3)
			print ("TIME:", TIME)

	# Set TIME DETAILS
	time_details = re.match( r'(AAA,time,)([0-9]+).([0-9]+).([0-9]+)', line)
	if time_details:
			hour = time_details.group(2)
			minute = time_details.group(3)
			second = time_details.group(4)

	# Set INTERVAL
	interval = re.match( r'^(AAA)\,(interval)\,(.+)\n', line)
	if interval:
			INTERVAL = interval.group(3)
			print ("INTERVAL:", INTERVAL)

	# Set SNAPSHOTS
	snapshots = re.match( r'^(AAA)\,(snapshots)\,(.+)\n', line)
	if snapshots:
			SNAPSHOTS = snapshots.group(3)
			print ("SNAPSHOTS:", SNAPSHOTS)

# If SN could be defined, not an AIX host, SN == HOSTNAME
if SN == '-1':
	SN = HOSTNAME

###############
# ID Check #
###############

# This section prevents Splunk from generating duplicated data for the same Nmon file
# While using the archive mode, Splunk may open several times the same file
# If the Nmon file id is already present in our reference file, then we have already proceeded this Nmon and nothing has to be done

# NMON file id (concatenation of ids)
id = DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN

print ("NMON ID: ", id)

# Open reference file for reading, if exists already
if os.path.isfile(ID_REF):

	with open(ID_REF, "r") as ref:

		for line in ref:

			# Search for this ID
			idmatch = re.match ( id , line)
			if idmatch:
				print ("NMON data previously proceeded, nothing more to do")
				sys.exit(0)

# If we here, then this file has not been previously proceeded

# Open reference file for writing
with open(ID_REF, "w") as ref:
	# write id
	ref.write( id + '\n')


###############################
# NMON Structure Verification #
###############################

# The purpose of this section is to achieve some structure verification of the Nmon file
# to prevent data inconsistency

	for line in data:

	# Verify we do not have any line that contain ZZZZ without beginning the line by ZZZZ
	# In such case, the nmon data is bad and buggy, converting it would generate

	# Search for ZZZZ truncated lines (eg. line containing ZZZZ pattern BUT not beginning the line)

		ZZZZ_truncated = re.match( r'.+ZZZZ,', line)
		if ZZZZ_truncated:

			print ('ERROR:' + 'Detected Bad Nmon structure, found ZZZZ lines truncated !')
			print ('ZZZZ lines contains the event timestamp and should always begin the line.')
			print ('Please check how this nmon file is being generated, and upgrade nmon to a working version if required.')
			print ('Ignoring nmon data.')
			sys.exit(1)

	# Search for old time format (eg. Nmon version V9 and prior)

		time_oldformat = re.match( r'(AAA,date,)([0-9]+)\/([0-9]+)\/([0-9]+)', line)
		if time_oldformat:

			print ('INFO:' + 'Detected old Nmon version using old Date format (dd/mm/yy)')

			day = time_oldformat.group(2)
			month = time_oldformat.group(3)
			year = time_oldformat.group(4)

			# Convert %y to %Y
			year = datetime.datetime.strptime(year, '%y').strftime('%Y')

			# Convert from months numbers to months name for compatibility with later Nmon versions
			# Note: we won't use here datetime to avoid issues with locale names of months

			month_numbers = {'01': 'JAN', '02': 'FEB', '03': 'MAR', '04': 'APR', '05': 'MAY', '06': 'JUN', '07': 'JUL', '08': 'AUG', '09': 'SEP', '10': 'OCT', '11': 'NOV', '12': 'DEC'}

			for k, v in month_numbers.items():
				month = month.replace(k, v)

			DATE = day + '-' + month + '-' + year

			print ('INFO:' + 'Date converted to: ' + DATE)


# End for


####################
# Write CONFIG csv #
####################

# Extraction of the AAA and BBB sections with a supplementary header to allow Splunk identifying the host and timestamp

# Set section
section = "CONFIG"

# Set output file
config_output = CONFIG_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '.config.csv'

# Open config output for writing
with open(config_output, "w") as config:

	config.write( 'CONFIG' + ',' + DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN + '\n')

	for line in data:

		# Extract AAA and BBB sections, and write to config output
		AAABBB = re.match( r'^[AAA|BBB].+', line)
		if AAABBB:
			config.write(line),

# Open config output for reading and show number of line we extracted
with open(config_output, "r") as config:

	num_lines = sum(1 for line in config)
	print ("CONFIG section: Wrote", num_lines, "lines")


##########################
# Write PERFORMANCE DATA #
##########################

###################
# Static Sections : Header is dynamic but no devices notion (disks, interfaces...) and there is no needs to transpose data
###################

static_section = ["LPAR","CPU_ALL","FILE","MEM","PAGE","MEMNEW","MEMUSE","PROC","PROCSOL","VM"]

for section in static_section:

	# Set output file
	currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'

	# Open output for writing
	with open(currsection_output, "w") as currsection:

		for line in data:

			# Extract sections, and write to output
			myregex = r'^' + section + '|ZZZZ.+'
			find_section = re.match( myregex, line)
			if find_section:

				# csv header

				# Replace some symbols
				line=re.sub("%",'_PCT',line)
				line=re.sub(" ",'_',line)

				# Extract header excluding data that always has Txxxx for timestamp reference
				myregex = '(' + section + ')\,([^T].+)'
				fullheader_match = re.search( myregex, line)

				if fullheader_match:
					fullheader = fullheader_match.group(2)

					header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)

					if header_match:
						header = header_match.group(2)

						# Write header
						currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),


				# Extract timestamp

				# Nmon V9 and prior do not have date in ZZZZ
				# If unavailable, we'll use the global date (AAA,date)
				ZZZZ_DATE = '-1'
				ZZZZ_TIME = '-1'

				# For Nmon V10 and more

				timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
				if timestamp_match:
					ZZZZ_TIME = timestamp_match.group(2)
					ZZZZ_DATE = timestamp_match.group(3)
					ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

				# For Nmon V9 and less

				if ZZZZ_DATE == '-1':
					ZZZZ_DATE = DATE
					timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
					if timestamp_match:
						ZZZZ_TIME = timestamp_match.group(2)
						ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

				# Extract Data
				myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
				perfdata_match = re.match( myregex, line)
				if perfdata_match:
					perfdata = perfdata_match.group(2)

					# Write perf data
					currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),

		# End for

	# Open output for reading and show number of line we extracted
	with open(currsection_output, "r") as currsection:

		num_lines = sum(1 for line in currsection)
		print (section + " section: Wrote", num_lines, "lines")

# End for


###################
# TOP section: has a specific structure with uncommon fields, needs to be treated separately
# Notably, it has a Time fields (containing the ZZZZ ref ID) we don't need to keep
###################

static_section = ["TOP"]

for section in static_section:

	# Set output file
	currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'

	# Open output for writing
	with open(currsection_output, "w") as currsection:

		for line in data:

			# Extract sections, and write to output
			myregex = r'^' + 'TOP,.PID' + '|ZZZZ.+'
			find_section = re.match( myregex, line)
			if find_section:

				# csv header

				# Replace some symbols
				line=re.sub("%",'pct',line)
				line=re.sub(" ",'_',line)
				line=re.sub("\+",'',line)

				# Extract header excluding data that always has Txxxx for timestamp reference
				myregex = '(' + section + ')\,([^T].+)'
				fullheader_match = re.search( myregex, line)

				if fullheader_match:
					fullheader = fullheader_match.group(2)

					#currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + fullheader + '\n'),

					header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)

					if header_match:
						header_part1 = header_match.group(1)
						header_part2 = header_match.group(3)
						header = header_part1 + header_part2

						# Write header
						currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),


				# Extract timestamp

				# Nmon V9 and prior do not have date in ZZZZ
				# If unavailable, we'll use the global date (AAA,date)
				ZZZZ_DATE = '-1'
				ZZZZ_TIME = '-1'

				# For Nmon V10 and more

				timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
				if timestamp_match:
					ZZZZ_TIME = timestamp_match.group(2)
					ZZZZ_DATE = timestamp_match.group(3)
					ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

				# For Nmon V9 and less

				if ZZZZ_DATE == '-1':
					ZZZZ_DATE = DATE
					timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
					if timestamp_match:
						ZZZZ_TIME = timestamp_match.group(2)
						ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

			# Extract Data
			perfdata_match = re.match( '^TOP\,([0-9]+)\,(T\d+)\,(.+)\n', line)
			if perfdata_match:
				perfdata_part1 = perfdata_match.group(1)
				perfdata_part2 = perfdata_match.group(3)
				perfdata = perfdata_part1 + perfdata_part2

				# Write perf data
				currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),

		# End for

	# Open output for reading and show number of line we extracted
	with open(currsection_output, "r") as currsection:

		num_lines = sum(1 for line in currsection)
		print (section + " section: Wrote", num_lines, "lines")

# End for


###################
# Dynamic Sections : data requires to be transposed to be exploitable within Splunk
###################


dynamic_section = ["DISKBUSY"]

for section in dynamic_section:

	# Set output file
	currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'

	# Open output for writing
	with open(currsection_output, "w") as currsection:

		for line in data:

			# Extract sections, and write to output
			myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
			find_section = re.match( myregex, line)
			if find_section:

				# csv header

				# Replace some symbols
				line=re.sub("%",'_PCT',line)
				line=re.sub(" ",'_',line)

				# Extract header excluding data that always has Txxxx for timestamp reference
				myregex = '(' + section + ')\,([^T].+)'
				fullheader_match = re.search( myregex, line)

				if fullheader_match:
					fullheader = fullheader_match.group(2)

					header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)

					if header_match:
						header = header_match.group(2)

						# Write header
						currsection.write('time' + ',' + header + '\n'),


				# Extract timestamp

				# Nmon V9 and prior do not have date in ZZZZ
				# If unavailable, we'll use the global date (AAA,date)
				ZZZZ_DATE = '-1'
				ZZZZ_TIME = '-1'

				# For Nmon V10 and more

				timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
				if timestamp_match:
					ZZZZ_TIME = timestamp_match.group(2)
					ZZZZ_DATE = timestamp_match.group(3)
					ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

				# For Nmon V9 and less

				if ZZZZ_DATE == '-1':
					ZZZZ_DATE = DATE
					timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
					if timestamp_match:
						ZZZZ_TIME = timestamp_match.group(2)
						ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

				# Extract Data
				myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
				perfdata_match = re.match( myregex, line)
				if perfdata_match:
					perfdata = perfdata_match.group(2)

					# Write perf data
					currsection.write(ZZZZ_timestamp + ',' + perfdata + '\n'),

		# End for

	# Open output for reading and show number of line we extracted
	with open(currsection_output, "r") as currsection:

		num_lines = sum(1 for line in currsection)
		print (section + " section: Wrote", num_lines, "lines")

# End for