Guest User

sa2pyrrd.py

a guest
Dec 12th, 2013
1,718
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. """ import needed classes from rrd libraries"""
  2. from pyrrd.rrd import RRD, RRA, DS
  3. from pyrrd.graph import DEF, CDEF, VDEF
  4. from pyrrd.graph import LINE, AREA, GPRINT
  5. from pyrrd.graph import ColorAttributes, Graph
  6. """ import subprocess to launch external commands like sar"""
  7. import subprocess
  8. """ import os.path to test if files exist"""
  9. import os.path
  10. """ import sys to handle error exits"""
  11. import  sys
  12. """ import re to parse sar output"""
  13. import re
  14. """ import time to convert time to unix stamps"""
  15. import time
  16. """ import platform to be able to get hostname"""
  17. import platform
  18. """ import optparse to parse command-line arguments"""
  19. import optparse
  20.  
  21. # one day in seconds
  22. day_secs = 60*60*24
  23. # get hostname
  24. hostname = platform.node()
  25. # default path to sa files
  26. logs_path = "/var/log/sa"
  27. # name for rrd database file with CPU-load data
  28. filename_load = ('sar_load_%s.rrd' % (hostname,))
  29. # name for rrf database file with memory usage data
  30. filename_mem = ('sar_mem_%s.rrd' % (hostname,))
  31. # right range for one month period
  32. right_range = range(1,32)
  33.  
  34. def get_sar_output(command):
  35.     """ This function gets sar output, replaces all spaces between values in "|" and returns list of strings.
  36.        For CPU load:
  37.        ...
  38.        09:45:01|PM|all|0.25|0.00|0.29|0.01|0.00|99.46
  39.        09:50:01|PM|all|0.25|0.00|0.29|0.01|0.00|99.45
  40.        09:55:01|PM|all|0.25|0.10|0.30|0.01|0.00|99.34
  41.        ...
  42.        And for memory usage:
  43.        ...
  44.        11:10:01|PM|10522976|5908216|35.96|336636|4275532|18481144|0|0.00|0
  45.        11:15:01|PM|10522480|5908712|35.96|336636|4275652|18481144|0|0.00|0
  46.        11:20:01|PM|10522480|5908712|35.96|336636|4275780|18481144|0|0.00|0
  47.        ...
  48.    """
  49.     # get sar output
  50.     sar_process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE)
  51.     sar_out_raw, sar_err = sar_process.communicate()
  52.     #repalce spaces with "|"
  53.     sar_out = re.sub('\ +','|',sar_out_raw)
  54.     # some cleanup
  55.     sar_out = re.sub('\t','',sar_out)
  56.     return sar_out.split("\n")
  57.  
  58. def get_archive_date(sar_raw):
  59.     # first element in list of sar strings contains the date of archievw
  60.     return sar_raw.split("|")[3]
  61.  
  62. def prepare_output_list(sar_cmd):
  63.     """ This  parses sar strings and returns list of lists:
  64.        ...
  65.        [1393334101, 0.25]
  66.        [1393334401, 0.25]
  67.        [1393334701, 0.25]
  68.        ...
  69.        These items contain pairs of values - unix timestamp and value of load - rrd-ready data to update rrd databases
  70.    """
  71.     # get sar output
  72.     data = get_sar_output(sar_cmd)
  73.     res = []
  74.     # determine right length of sar string
  75.     right_len = int(len(data[4].split("|")))
  76.     # get date of sa file
  77.     date_ = get_archive_date (data[0])
  78.     # convert date to unix timestamp, starting time for rrd database
  79.     time_obj = time.strptime(date_, "%m/%d/%Y")
  80.     unix_stamp_start = int(time.mktime(time_obj))
  81.     for sar_record in data:
  82.         sar_split = sar_record.split("|")
  83.         if len (sar_split) == right_len: # if the list has right length
  84.             # convert time of record to unix timestamp
  85.             date_str = ("%s %s%s" % (date_, sar_split[0], sar_split[1]))
  86.             time_obj = time.strptime(date_str, "%m/%d/%Y %I:%M:%S%p")
  87.             unix_stamp = int(time.mktime(time_obj))
  88.             try:
  89.                 if sar_cmd.split(" ")[-1] != "-r": # if sar command line doesn't end with "-r", for CPU load
  90.                     # third element in splitted string is value of CPU load
  91.                     res.append([unix_stamp, float(sar_split[3])])
  92.                 else:
  93.                     # 4th element in splitted string is value of memory usage
  94.                     res.append([unix_stamp, float(sar_split[4])])
  95.             except ValueError:
  96.                 # sar output contains extra strings with column names
  97.                 # if ValueError is raised - value cannot be converted to float, we are passing this value
  98.                 pass
  99.     return res
  100.  
  101. def dump_sar_to_rrd(sar_data, rrd_obj):
  102.     """ This function fills RRD database with records
  103.        row[0] is unix timestamp
  104.        row[1] is value of CPU load or memory usage, in %"""
  105.     for row in sar_data:
  106.         rrd_obj.bufferValue(row[0], row[1])
  107.     rrd_obj.update()
  108.  
  109. def prepare_rrds(unix_start_stamp):
  110.     """ This function initializes Data Sources, Round-Robin Archives and createa RRD database files.
  111.        In examples below heartbeat for DSs is set to 310 seconds, but we are awaiting updates every 300 seconds.
  112.        Actualy, heartbeat in DS is maximum heartbeat. Sometimes, due to highload, sar cron scripts can dump
  113.        data to binary sar files with some latency, so these 10 seconds is a reserve for such cases.
  114.  
  115.        In this example we will have 2 RRD databases (files) - for CPU load and memory usage.
  116.        Every database will have 2 RRA - for last 24 hours and last month.
  117.        All the parameters are explained in article."""
  118.     # create DS for CPU load values
  119.     dss1 = []
  120.     ds1 = DS(dsName='load', dsType='GAUGE', heartbeat=310)
  121.     dss1.append(ds1)
  122.  
  123.     # create DS for memory usage values
  124.     dss2 = []
  125.     ds2 = DS(dsName='mem', dsType='GAUGE', heartbeat=310)
  126.     dss2.append(ds2)
  127.     # creation of RRAs. RRAs have similar configurtion for CPU load and memory usage
  128.     # so the will be used for creation of both databases
  129.     rras = []
  130.     rra1 = RRA(cf='AVERAGE', xff=0.5, steps=1, rows=288)
  131.     rra2 = RRA(cf='AVERAGE', xff=0.5, steps=12, rows=744)
  132.     rras.extend([rra1, rra2])
  133.     # creating RRD database files
  134.     myRRD_load = RRD(filename_load, ds=dss1, rra=rras, start=unix_start_stamp)
  135.     myRRD_load.create()
  136.     myRRD_mem = RRD(filename_mem, ds=dss2, rra=rras, start=unix_start_stamp)
  137.     myRRD_mem.create()
  138.     return myRRD_load, myRRD_mem
  139.  
  140. def draw_file(rrd_load, rrd_mem, unix_stamp_start, unix_stamp_end, graphfile, graph_width, graph_height, graph_title):
  141.     """This function draws graph. Many parameters can be customized accordingly to your needs.
  142.        """
  143.     def1 = DEF(rrdfile=rrd_load.filename, vname='load', dsName='load')
  144.     def2 = DEF(rrdfile=rrd_mem.filename, vname='mem', dsName='mem')
  145.     cline1 = LINE(defObj=def1, color='#FFcc00', legend='Load Level')
  146.     cline2 = LINE(defObj=def2, color='#00ccff', legend='Memory Usage')
  147.     vdef2 = VDEF(vname='myavg', rpn='%s,AVERAGE' % def1.vname)
  148.     constline1 = LINE(value=100, color='#990000', legend='Max')
  149.     gprint1 = GPRINT(vdef2, '%6.2lf percent')
  150.  
  151.     ca = ColorAttributes()
  152.     ca.back = '#333333'
  153.     ca.canvas = '#333333'
  154.     ca.shadea = '#000000'
  155.     ca.shadeb = '#111111'
  156.     ca.mgrid = '#CCCCCC'
  157.     ca.axis = '#FFFFFF'
  158.     ca.frame = '#AAAAAA'
  159.     ca.font = '#FFFFFF'
  160.     ca.arrow = '#FFFFFF'
  161.  
  162.     g = Graph(graphfile, start=unix_stamp_start, end=unix_stamp_end, vertical_label='load', color=ca)
  163.     g.data.extend([def1,def2, cline2, vdef2, cline1, constline1, gprint1])
  164.     g.title = graph_title
  165.     g.width = graph_width
  166.     g.height = graph_height
  167.     g.write()
  168.     print ("Graph file: %s" % (graphfile,))
  169. # ----------------
  170.  
  171. def main():
  172.     parser = optparse.OptionParser()
  173.     parser.add_option('-d', '--day', action="store", dest="day", help="day for statistics", default="1")
  174.     options, args = parser.parse_args()
  175.  
  176.     try:
  177.         day = int(options.day)
  178.     except ValueError:
  179.         print "Wrong input!"
  180.         sys.exit(1)
  181.  
  182.     right_range = range(1,32)
  183.     if day not in right_range:
  184.         print "Wrong day of month! Please check the boundaries."
  185.         sys.exit(1)
  186.     # aff zero before the day of number if the day < 10
  187.     if day in range(1,10): day = ("0%s" % (day,))
  188.  
  189.     sa_file = ("%s/sa%s" % (logs_path, day))
  190.     if not os.path.exists(sa_file):
  191.         print ("No sar data for %s day (%s). Aborting..." % (day, sa_file))
  192.         sys.exit(1)
  193.  
  194.     print "Processing %s..." % sa_file
  195.     sar_cpu_load_cmd = ("sar -f %s" % (sa_file,))
  196.     sar_mem_used_cmd = ("sar -f %s -r" % (sa_file,))
  197.     graph_width = 800
  198.     graph_height = 400
  199.     graphfile = ('cpu_mem_usage_%s_%s.png' % (hostname, day))
  200.     graph_title = ('\"%s - statistics for %s\"' % (hostname, day))
  201.  
  202.  
  203.     cpu_load_list = prepare_output_list(sar_cpu_load_cmd)
  204.     mem_used_list = prepare_output_list(sar_mem_used_cmd)
  205.  
  206.     #first element in cpu_load_list contains start time for graph
  207.     unix_stamp_start = cpu_load_list[0][0] - 300
  208.     unix_stamp_end = unix_stamp_start + day_secs
  209.  
  210.     myRRD_load, myRRD_mem = prepare_rrds(unix_stamp_start)
  211.  
  212.     dump_sar_to_rrd(cpu_load_list, myRRD_load)
  213.     dump_sar_to_rrd(mem_used_list, myRRD_mem)
  214.  
  215.     draw_file(myRRD_load, myRRD_mem, unix_stamp_start, unix_stamp_end, graphfile, graph_width, graph_height, graph_title)
  216.  
  217.     print "Done."
  218.  
  219. if __name__ == "__main__":
  220.         main()
RAW Paste Data