Guest User

nmon2csv.py

a guest
Jul 28th, 2014
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.68 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. # Program name: nmon2csv.py
  4. # Compatibility: Python 2x
  5. # Purpose - convert nmon files into csv data for Splunk Nmon App, see https://apps.splunk.com/app/1753
  6. # Author - Guilhem Marchand
  7. # Disclaimer: This script had been designed to used by Splunk Archive Processor in the context of the Nmon Splunk App, see above
  8. # Date - July 2014
  9.  
  10. # Releases Notes:
  11.  
  12. # - 07/27/2014, Guilhem Marchand: Initial version
  13.  
  14.  
  15. # Version: 1.0.0
  16.  
  17. # Load libs
  18.  
  19. from __future__ import print_function
  20.  
  21. import sys
  22. import re
  23. import os
  24. import time
  25. import datetime
  26.  
  27.  
  28. #################################################
  29. ## Variables ##
  30. #################################################
  31.  
  32. # Current date
  33. now = time.strftime("%c")
  34.  
  35. # Verify SPLUNK_HOME environment variable is available
  36. try:
  37. os.environ["SPLUNK_HOME"]
  38. except KeyError:
  39. print (now + ', ' + 'ERROR:' + ' Please set the environment variable SPLUNK_HOME')
  40. sys.exit(1)
  41.  
  42. # SPLUNK_HOME environment variable
  43. SPLUNK_HOME = os.environ['SPLUNK_HOME']
  44.  
  45. # APP Directory for standard nmon, TA-nmon, PA-nmon
  46.  
  47. NMON_APP = SPLUNK_HOME + '/etc/apps/nmon'
  48. TA_NMON_APP = SPLUNK_HOME + '/etc/apps/TA-nmon'
  49. PA_NMON_APP = SPLUNK_HOME + '/etc/slave-apps/PA-nmon'
  50. APP = ''
  51.  
  52. # Verify APP exist
  53. if os.path.exists(NMON_APP):
  54. APP = NMON_APP
  55. elif os.path.exists(TA_NMON_APP):
  56. APP = TA_NMON_APP
  57. elif os.path.exists(PA_NMON_APP):
  58. APP = PA_NMON_APP
  59. else:
  60. print (now + ', ' + 'ERROR:' + ' The main APP directory could not be verified, is nmon / TA-nmon / PA-nmon installed ?')
  61. sys.exit(1)
  62.  
  63. # APP_VAR directory
  64. APP_VAR = APP + '/var'
  65. if not os.path.exists(APP_VAR):
  66. os.mkdir(APP_VAR)
  67.  
  68. # ID reference file
  69. ID_REF = APP_VAR + '/id_reference.txt'
  70.  
  71. # CSV Perf data repository
  72. DATA_DIR = APP_VAR + '/csv_repository/'
  73. if not os.path.exists(DATA_DIR):
  74. os.mkdir(DATA_DIR)
  75.  
  76. # CSV output repository
  77. CONFIG_DIR = APP_VAR + '/config_repository/'
  78. if not os.path.exists(CONFIG_DIR):
  79. os.mkdir(CONFIG_DIR)
  80.  
  81.  
  82. ####################################################################
  83. ############# Functions ############
  84. ####################################################################
  85.  
  86. # Transposer function, used to transpose data for dynamic sections (eg. sections containing devices)
  87.  
  88. class transposer(object):
  89. def _do_loop(self):
  90. line_number = 0
  91. for line in self.fin:
  92. line_number += 1
  93. line = line.strip();
  94. if line.strip().startswith('"No."'):
  95. self.keys = line.strip().split(',')[2:]
  96. elif line.startswith('"'):
  97. elts = line.strip().split(',')
  98. if len(elts) == (len(self.keys) + 2):
  99. dat = elts[1]
  100. ix = 0
  101. for val in elts[2:]:
  102. print(dat, self.keys[ix], val, sep=',', file = self.out)
  103. ix += 1
  104. else:
  105. raise Exception("Syntax error line %d expected %d values found %d"
  106. % (line_number, len(self.keys), len(elts) - 2))
  107.  
  108. def transpose(self, ficin, ficout):
  109. with open(ficin) as fin:
  110. with open(ficout, 'w') as fout:
  111. fout.write('"time","device",value\n')
  112. self.do_transpose(fin, fout)
  113. def do_transpose(self, fin, fout):
  114. self.fin = fin
  115. self.out = fout
  116. self.keys = []
  117. self._do_loop()
  118.  
  119.  
  120.  
  121. ####################################################################
  122. ############# Main Program ############
  123. ####################################################################
  124.  
  125. #################################
  126. # Retrieve NMON data from stdin #
  127. #################################
  128.  
  129. # Read nmon data from stdin
  130.  
  131. data = sys.stdin.readlines()
  132.  
  133. # Number of lines read
  134. nbr_lines = len(data)
  135.  
  136. # Show current time and number of lines
  137. print (now)
  138. print ("Reading NMON data:", nbr_lines, "lines")
  139.  
  140. ##################################################
  141. # Extract Various data from AAA and BBB sections #
  142. ##################################################
  143.  
  144. # Set some default values
  145. SN = "-1"
  146.  
  147. for line in data:
  148.  
  149. # Set HOSTNAME
  150. host = re.match( r'^(AAA)\,(host)\,(.+)\n', line)
  151. if host:
  152. HOSTNAME = host.group(3)
  153. print ("HOSTNAME:", HOSTNAME)
  154.  
  155. # Set VERSION
  156. version = re.match( r'^(AAA)\,(version)\,(.+)\n', line)
  157. if version:
  158. VERSION = version.group(3)
  159. print ("NMON VERSION:", VERSION)
  160.  
  161. # Set SN
  162. sn = re.match( r'^(BBB.+)(systemid.+)(IBM,)(\w+)(.+)\n', line)
  163. if sn:
  164. SN = sn.group(4)
  165. print ("SN:", SN)
  166.  
  167. # Set DATE
  168. date = re.match( r'^(AAA)\,(date)\,(.+)\n', line)
  169. if date:
  170. DATE = date.group(3)
  171. print ("DATE:", DATE)
  172.  
  173. # Set date details
  174. date_details = re.match( r'(AAA,date,)([0-9]+)[\/|\-]([a-zA-Z-0-9]+)[\/|\-]([0-9]+)', line)
  175. if date_details:
  176. day = date_details.group(2)
  177. month = date_details.group(3)
  178. year = date_details.group(4)
  179.  
  180. # Set TIME
  181. time = re.match( r'^(AAA)\,(time)\,(.+)\n', line)
  182. if time:
  183. TIME = time.group(3)
  184. print ("TIME:", TIME)
  185.  
  186. # Set TIME DETAILS
  187. time_details = re.match( r'(AAA,time,)([0-9]+).([0-9]+).([0-9]+)', line)
  188. if time_details:
  189. hour = time_details.group(2)
  190. minute = time_details.group(3)
  191. second = time_details.group(4)
  192.  
  193. # Set INTERVAL
  194. interval = re.match( r'^(AAA)\,(interval)\,(.+)\n', line)
  195. if interval:
  196. INTERVAL = interval.group(3)
  197. print ("INTERVAL:", INTERVAL)
  198.  
  199. # Set SNAPSHOTS
  200. snapshots = re.match( r'^(AAA)\,(snapshots)\,(.+)\n', line)
  201. if snapshots:
  202. SNAPSHOTS = snapshots.group(3)
  203. print ("SNAPSHOTS:", SNAPSHOTS)
  204.  
  205. # If SN could be defined, not an AIX host, SN == HOSTNAME
  206. if SN == '-1':
  207. SN = HOSTNAME
  208.  
  209. ###############
  210. # ID Check #
  211. ###############
  212.  
  213. # This section prevents Splunk from generating duplicated data for the same Nmon file
  214. # While using the archive mode, Splunk may open several times the same file
  215. # If the Nmon file id is already present in our reference file, then we have already proceeded this Nmon and nothing has to be done
  216.  
  217. # NMON file id (concatenation of ids)
  218. id = DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN
  219.  
  220. print ("NMON ID: ", id)
  221.  
  222. # Open reference file for reading, if exists already
  223. if os.path.isfile(ID_REF):
  224.  
  225. with open(ID_REF, "r") as ref:
  226.  
  227. for line in ref:
  228.  
  229. # Search for this ID
  230. idmatch = re.match ( id , line)
  231. if idmatch:
  232. print ("NMON data previously proceeded, nothing more to do")
  233. sys.exit(0)
  234.  
  235. # If we here, then this file has not been previously proceeded
  236.  
  237. # Open reference file for writing
  238. with open(ID_REF, "w") as ref:
  239. # write id
  240. ref.write( id + '\n')
  241.  
  242.  
  243. ###############################
  244. # NMON Structure Verification #
  245. ###############################
  246.  
  247. # The purpose of this section is to achieve some structure verification of the Nmon file
  248. # to prevent data inconsistency
  249.  
  250. for line in data:
  251.  
  252. # Verify we do not have any line that contain ZZZZ without beginning the line by ZZZZ
  253. # In such case, the nmon data is bad and buggy, converting it would generate
  254.  
  255. # Search for ZZZZ truncated lines (eg. line containing ZZZZ pattern BUT not beginning the line)
  256.  
  257. ZZZZ_truncated = re.match( r'.+ZZZZ,', line)
  258. if ZZZZ_truncated:
  259.  
  260. print ('ERROR:' + 'Detected Bad Nmon structure, found ZZZZ lines truncated !')
  261. print ('ZZZZ lines contains the event timestamp and should always begin the line.')
  262. print ('Please check how this nmon file is being generated, and upgrade nmon to a working version if required.')
  263. print ('Ignoring nmon data.')
  264. sys.exit(1)
  265.  
  266. # Search for old time format (eg. Nmon version V9 and prior)
  267.  
  268. time_oldformat = re.match( r'(AAA,date,)([0-9]+)\/([0-9]+)\/([0-9]+)', line)
  269. if time_oldformat:
  270.  
  271. print ('INFO:' + 'Detected old Nmon version using old Date format (dd/mm/yy)')
  272.  
  273. day = time_oldformat.group(2)
  274. month = time_oldformat.group(3)
  275. year = time_oldformat.group(4)
  276.  
  277. # Convert %y to %Y
  278. year = datetime.datetime.strptime(year, '%y').strftime('%Y')
  279.  
  280. # Convert from months numbers to months name for compatibility with later Nmon versions
  281. # Note: we won't use here datetime to avoid issues with locale names of months
  282.  
  283. month_numbers = {'01': 'JAN', '02': 'FEB', '03': 'MAR', '04': 'APR', '05': 'MAY', '06': 'JUN', '07': 'JUL', '08': 'AUG', '09': 'SEP', '10': 'OCT', '11': 'NOV', '12': 'DEC'}
  284.  
  285. for k, v in month_numbers.items():
  286. month = month.replace(k, v)
  287.  
  288. DATE = day + '-' + month + '-' + year
  289.  
  290. print ('INFO:' + 'Date converted to: ' + DATE)
  291.  
  292.  
  293. # End for
  294.  
  295.  
  296.  
  297. ####################
  298. # Write CONFIG csv #
  299. ####################
  300.  
  301. # Extraction of the AAA and BBB sections with a supplementary header to allow Splunk identifying the host and timestamp
  302.  
  303. # Set section
  304. section = "CONFIG"
  305.  
  306. # Set output file
  307. config_output = CONFIG_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '.config.csv'
  308.  
  309. # Open config output for writing
  310. with open(config_output, "w") as config:
  311.  
  312. config.write( 'CONFIG' + ',' + DATE + ':' + TIME + ',' + HOSTNAME + ',' + SN + '\n')
  313.  
  314. for line in data:
  315.  
  316. # Extract AAA and BBB sections, and write to config output
  317. AAABBB = re.match( r'^[AAA|BBB].+', line)
  318. if AAABBB:
  319. config.write(line),
  320.  
  321. # Open config output for reading and show number of line we extracted
  322. with open(config_output, "r") as config:
  323.  
  324. num_lines = sum(1 for line in config)
  325. print ("CONFIG section: Wrote", num_lines, "lines")
  326.  
  327.  
  328.  
  329. ##########################
  330. # Write PERFORMANCE DATA #
  331. ##########################
  332.  
  333. ###################
  334. # Static Sections : Header is dynamic but no devices notion (disks, interfaces...) and there is no needs to transpose data
  335. ###################
  336.  
  337. static_section = ["LPAR","CPU_ALL","FILE","MEM","PAGE","MEMNEW","MEMUSE","PROC","PROCSOL","VM"]
  338.  
  339. for section in static_section:
  340.  
  341. # Set output file
  342. currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
  343.  
  344. # Open output for writing
  345. with open(currsection_output, "w") as currsection:
  346.  
  347. for line in data:
  348.  
  349. # Extract sections, and write to output
  350. myregex = r'^' + section + '|ZZZZ.+'
  351. find_section = re.match( myregex, line)
  352. if find_section:
  353.  
  354. # csv header
  355.  
  356. # Replace some symbols
  357. line=re.sub("%",'_PCT',line)
  358. line=re.sub(" ",'_',line)
  359.  
  360. # Extract header excluding data that always has Txxxx for timestamp reference
  361. myregex = '(' + section + ')\,([^T].+)'
  362. fullheader_match = re.search( myregex, line)
  363.  
  364. if fullheader_match:
  365. fullheader = fullheader_match.group(2)
  366.  
  367. header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
  368.  
  369. if header_match:
  370. header = header_match.group(2)
  371.  
  372. # Write header
  373. currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),
  374.  
  375.  
  376. # Extract timestamp
  377.  
  378. # Nmon V9 and prior do not have date in ZZZZ
  379. # If unavailable, we'll use the global date (AAA,date)
  380. ZZZZ_DATE = '-1'
  381. ZZZZ_TIME = '-1'
  382.  
  383. # For Nmon V10 and more
  384.  
  385. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
  386. if timestamp_match:
  387. ZZZZ_TIME = timestamp_match.group(2)
  388. ZZZZ_DATE = timestamp_match.group(3)
  389. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  390.  
  391. # For Nmon V9 and less
  392.  
  393. if ZZZZ_DATE == '-1':
  394. ZZZZ_DATE = DATE
  395. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
  396. if timestamp_match:
  397. ZZZZ_TIME = timestamp_match.group(2)
  398. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  399.  
  400. # Extract Data
  401. myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
  402. perfdata_match = re.match( myregex, line)
  403. if perfdata_match:
  404. perfdata = perfdata_match.group(2)
  405.  
  406. # Write perf data
  407. currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),
  408.  
  409. # End for
  410.  
  411. # Open output for reading and show number of line we extracted
  412. with open(currsection_output, "r") as currsection:
  413.  
  414. num_lines = sum(1 for line in currsection)
  415. print (section + " section: Wrote", num_lines, "lines")
  416.  
  417. # End for
  418.  
  419.  
  420. ###################
  421. # TOP section: has a specific structure with uncommon fields, needs to be treated separately
  422. # Notably, it has a Time fields (containing the ZZZZ ref ID) we don't need to keep
  423. ###################
  424.  
  425. static_section = ["TOP"]
  426.  
  427. for section in static_section:
  428.  
  429. # Set output file
  430. currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
  431.  
  432. # Open output for writing
  433. with open(currsection_output, "w") as currsection:
  434.  
  435. for line in data:
  436.  
  437. # Extract sections, and write to output
  438. myregex = r'^' + 'TOP,.PID' + '|ZZZZ.+'
  439. find_section = re.match( myregex, line)
  440. if find_section:
  441.  
  442. # csv header
  443.  
  444. # Replace some symbols
  445. line=re.sub("%",'pct',line)
  446. line=re.sub(" ",'_',line)
  447. line=re.sub("\+",'',line)
  448.  
  449. # Extract header excluding data that always has Txxxx for timestamp reference
  450. myregex = '(' + section + ')\,([^T].+)'
  451. fullheader_match = re.search( myregex, line)
  452.  
  453. if fullheader_match:
  454. fullheader = fullheader_match.group(2)
  455.  
  456. #currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + fullheader + '\n'),
  457.  
  458. header_match = re.search( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
  459.  
  460. if header_match:
  461. header_part1 = header_match.group(1)
  462. header_part2 = header_match.group(3)
  463. header = header_part1 + header_part2
  464.  
  465. # Write header
  466. currsection.write('type' + ',' + 'serialnum' + ',' + 'hostname' + ',' + 'time' + ',' + header + '\n'),
  467.  
  468.  
  469. # Extract timestamp
  470.  
  471. # Nmon V9 and prior do not have date in ZZZZ
  472. # If unavailable, we'll use the global date (AAA,date)
  473. ZZZZ_DATE = '-1'
  474. ZZZZ_TIME = '-1'
  475.  
  476. # For Nmon V10 and more
  477.  
  478. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
  479. if timestamp_match:
  480. ZZZZ_TIME = timestamp_match.group(2)
  481. ZZZZ_DATE = timestamp_match.group(3)
  482. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  483.  
  484. # For Nmon V9 and less
  485.  
  486. if ZZZZ_DATE == '-1':
  487. ZZZZ_DATE = DATE
  488. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
  489. if timestamp_match:
  490. ZZZZ_TIME = timestamp_match.group(2)
  491. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  492.  
  493. # Extract Data
  494. perfdata_match = re.match( '^TOP\,([0-9]+)\,(T\d+)\,(.+)\n', line)
  495. if perfdata_match:
  496. perfdata_part1 = perfdata_match.group(1)
  497. perfdata_part2 = perfdata_match.group(3)
  498. perfdata = perfdata_part1 + perfdata_part2
  499.  
  500. # Write perf data
  501. currsection.write(section + ',' + SN + ',' + HOSTNAME + ',' + ZZZZ_timestamp + ',' + perfdata + '\n'),
  502.  
  503. # End for
  504.  
  505. # Open output for reading and show number of line we extracted
  506. with open(currsection_output, "r") as currsection:
  507.  
  508. num_lines = sum(1 for line in currsection)
  509. print (section + " section: Wrote", num_lines, "lines")
  510.  
  511. # End for
  512.  
  513.  
  514. ###################
  515. # Dynamic Sections : data requires to be transposed to be exploitable within Splunk
  516. ###################
  517.  
  518.  
  519. dynamic_section = ["DISKBUSY"]
  520.  
  521. for section in dynamic_section:
  522.  
  523. # Set output file
  524. currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.csv'
  525.  
  526. # Open output for writing
  527. with open(currsection_output, "w") as currsection:
  528.  
  529. for line in data:
  530.  
  531. # Extract sections, and write to output
  532. myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
  533. find_section = re.match( myregex, line)
  534. if find_section:
  535.  
  536. # csv header
  537.  
  538. # Replace some symbols
  539. line=re.sub("%",'_PCT',line)
  540. line=re.sub(" ",'_',line)
  541.  
  542. # Extract header excluding data that always has Txxxx for timestamp reference
  543. myregex = '(' + section + ')\,([^T].+)'
  544. fullheader_match = re.search( myregex, line)
  545.  
  546. if fullheader_match:
  547. fullheader = fullheader_match.group(2)
  548.  
  549. header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
  550.  
  551. if header_match:
  552. header = header_match.group(2)
  553.  
  554. # Write header
  555. currsection.write('time' + ',' + header + '\n'),
  556.  
  557.  
  558. # Extract timestamp
  559.  
  560. # Nmon V9 and prior do not have date in ZZZZ
  561. # If unavailable, we'll use the global date (AAA,date)
  562. ZZZZ_DATE = '-1'
  563. ZZZZ_TIME = '-1'
  564.  
  565. # For Nmon V10 and more
  566.  
  567. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
  568. if timestamp_match:
  569. ZZZZ_TIME = timestamp_match.group(2)
  570. ZZZZ_DATE = timestamp_match.group(3)
  571. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  572.  
  573. # For Nmon V9 and less
  574.  
  575. if ZZZZ_DATE == '-1':
  576. ZZZZ_DATE = DATE
  577. timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
  578. if timestamp_match:
  579. ZZZZ_TIME = timestamp_match.group(2)
  580. ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
  581.  
  582. # Extract Data
  583. myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
  584. perfdata_match = re.match( myregex, line)
  585. if perfdata_match:
  586. perfdata = perfdata_match.group(2)
  587.  
  588. # Write perf data
  589. currsection.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
  590.  
  591. # End for
  592.  
  593. # Open output for reading and show number of line we extracted
  594. with open(currsection_output, "r") as currsection:
  595.  
  596. num_lines = sum(1 for line in currsection)
  597. print (section + " section: Wrote", num_lines, "lines")
  598.  
  599. # End for
Advertisement
Add Comment
Please, Sign In to add comment