Advertisement
phoenixdigital

Splunk Eventgen - eventgensamples.py

Aug 9th, 2015
434
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 69.98 KB | None | 0 0
  1. # -*-  indent-tabs-mode:nil;  -*-
  2. from __future__ import division, with_statement
  3. import os, sys
  4. import logging
  5. import pprint
  6. import random
  7. import datetime
  8. import re
  9. import csv
  10. import json
  11. import copy
  12. from eventgenoutput import Output
  13. from timeparser import timeParser, timeDelta2secs
  14. import httplib2, urllib
  15. from xml.dom import minidom
  16. from xml.parsers.expat import ExpatError
  17. import uuid
  18. from collections import deque
  19.  
  20. class CircularBuffer(deque):
  21.     def __init__(self):
  22.         self.capacity = 100
  23.         super(CircularBuffer, self).__init__(maxlen=self.capacity)
  24.     def __deepcopy__(self, memo):
  25.         cls = self.__class__
  26.         result = cls.__new__(cls)
  27.         memo[id(self)] = result
  28.         for k, v in self.__dict__.items():
  29.             setattr(result, k, copy.deepcopy(v, memo))
  30.         return result
  31.  
  32. class Sample:
  33.     # Required fields for Sample
  34.     name = None
  35.     app = None
  36.     filePath = None
  37.    
  38.     # Options which are all valid for a sample
  39.     disabled = None
  40.     spoolDir = None
  41.     spoolFile = None
  42.     breaker = None
  43.     sampletype = None
  44.     mode = None
  45.     interval = None
  46.     delay = None
  47.     count = None
  48.     bundlelines = None
  49.     earliest = None
  50.     latest = None
  51.     hourOfDayRate = None
  52.     dayOfWeekRate = None
  53.     randomizeEvents = None
  54.     randomizeCount = None
  55.     outputMode = None
  56.     fileName = None
  57.     fileMaxBytes = None
  58.     fileBackupFiles = None
  59.     splunkHost = None
  60.     splunkPort = None
  61.     splunkMethod = None
  62.     splunkUser = None
  63.     splunkPass = None
  64.     index = None
  65.     source = None
  66.     sourcetype = None
  67.     host = None
  68.     hostRegex = None
  69.     hostToken = None
  70.     tokens = None
  71.     projectID = None
  72.     accessToken = None
  73.     backfill = None
  74.     backfillSearch = None
  75.     backfillSearchUrl = None
  76.     minuteOfHourRate = None
  77.     timeMultiple = None
  78.     debug = None
  79.     timezone = datetime.timedelta(days=1)
  80.     dayOfMonthRate = None
  81.     monthOfYearRate = None
  82.     timeField = None
  83.    
  84.     # Internal fields
  85.     _c = None
  86.     _out = None
  87.     _sampleLines = None
  88.     _sampleDict = None
  89.     _lockedSettings = None
  90.     _priority = None
  91.     _origName = None
  92.     _lastts = None
  93.     _backfillts = None
  94.     _timeSinceSleep = None
  95.     _earliestParsed = None
  96.     _latestParsed = None
  97.    
  98.     def __init__(self, name):
  99.         # Logger already setup by config, just get an instance
  100.         logger = logging.getLogger('eventgen')
  101.         globals()['logger'] = logger
  102.        
  103.         self.name = name
  104.         self.tokens = [ ]
  105.         self._lockedSettings = [ ]
  106.  
  107.         self._currentevent = 0
  108.         self._rpevents = None
  109.         self._backfilldone = False
  110.         self._timeSinceSleep = datetime.timedelta()
  111.         self._eventIntervals = CircularBuffer()
  112.  
  113.        
  114.         # Import config
  115.         from eventgenconfig import Config
  116.         self._c = Config()
  117.        
  118.     def __str__(self):
  119.         """Only used for debugging, outputs a pretty printed representation of this sample"""
  120.         # Eliminate recursive going back to parent
  121.         temp = dict([ (key, value) for (key, value) in self.__dict__.items() if key != '_c' ])
  122.         return pprint.pformat(temp)
  123.        
  124.     def __repr__(self):
  125.         return self.__str__()
  126.    
  127.     def gen(self):
  128.         logger.debug("Generating sample '%s' in app '%s'" % (self.name, self.app))
  129.         startTime = datetime.datetime.now()
  130.        
  131.         # If this is the first time we're generating, setup out
  132.         if self._out == None:
  133.             logger.debug("Setting up Output class for sample '%s' in app '%s'" % (self.name, self.app))
  134.             self._out = Output(self)
  135.             if self.backfillSearchUrl == None:
  136.                 self.backfillSearchUrl = self._out._splunkUrl
  137.  
  138.         # Setup initial backfillts
  139.         if self._backfillts == None and self.backfill != None and not self._backfilldone:
  140.             try:
  141.                 self._backfillts = timeParser(self.backfill, timezone=self.timezone)
  142.                 logger.info("Setting up backfill of %s (%s)" % (self.backfill,self._backfillts))
  143.             except Exception as ex:
  144.                 logger.error("Failed to parse backfill '%s': %s" % (self.backfill, ex))
  145.                 raise
  146.  
  147.             if self._out._outputMode == "splunkstream" and self.backfillSearch != None:
  148.                 if not self.backfillSearch.startswith('search'):
  149.                     self.backfillSearch = 'search ' + self.backfillSearch
  150.                 self.backfillSearch += '| head 1 | table _time'
  151.  
  152.                 logger.debug("Searching Splunk URL '%s/services/search/jobs' with search '%s' with sessionKey '%s'" % (self.backfillSearchUrl, self.backfillSearch, self._out._c.sessionKey))
  153.  
  154.                 results = httplib2.Http(disable_ssl_certificate_validation=True).request(\
  155.                             self.backfillSearchUrl + '/services/search/jobs',
  156.                             'POST', headers={'Authorization': 'Splunk %s' % self._out._c.sessionKey}, \
  157.                             body=urllib.urlencode({'search': self.backfillSearch,
  158.                                                     'earliest_time': self.backfill,
  159.                                                     'exec_mode': 'oneshot'}))[1]
  160.                 try:
  161.                     temptime = minidom.parseString(results).getElementsByTagName('text')[0].childNodes[0].nodeValue
  162.                     # logger.debug("Time returned from backfill search: %s" % temptime)
  163.                     # Results returned look like: 2013-01-16T10:59:15.411-08:00
  164.                     # But the offset in time can also be +, so make sure we strip that out first
  165.                     if len(temptime) > 0:
  166.                         if temptime.find('+') > 0:
  167.                             temptime = temptime.split('+')[0]
  168.                         temptime = '-'.join(temptime.split('-')[0:3])
  169.                     self._backfillts = datetime.datetime.strptime(temptime, '%Y-%m-%dT%H:%M:%S.%f')
  170.                     logger.info("Backfill search results: '%s' value: '%s' time: '%s'" % (pprint.pformat(results), temptime, self._backfillts))
  171.                 except (ExpatError, IndexError):
  172.                     pass
  173.  
  174.         # Override earliest and latest during backfill until we're at current time
  175.         if self.backfill != None and not self._backfilldone:
  176.             if self._backfillts >= self.now(realnow=True):
  177.                 logger.info("Backfill complete")
  178.                 # exit(1)  # Added for perf test, REMOVE LATER
  179.                 self._backfilldone = True
  180.             else:
  181.                 logger.info("Still backfilling for sample '%s'.  Currently at %s" % (self.name, self._backfillts))
  182.                 # if not self.mode == 'replay':
  183.                 #     self._backfillts += datetime.timedelta(seconds=self.interval)
  184.  
  185.        
  186.         logger.debug("Opening sample '%s' in app '%s'" % (self.name, self.app) )
  187.         sampleFH = open(self.filePath, 'rU')
  188.         if self.sampletype == 'raw':
  189.             # 5/27/12 CS Added caching of the sample file
  190.             if self._sampleLines == None:
  191.                 logger.debug("Reading raw sample '%s' in app '%s'" % (self.name, self.app))
  192.                 sampleLines = sampleFH.readlines()
  193.                 self._sampleLines = sampleLines
  194.                 sampleDict = [ ]
  195.             else:
  196.                 sampleLines = self._sampleLines
  197.         elif self.sampletype == 'csv':
  198.             logger.debug("Reading csv sample '%s' in app '%s'" % (self.name, self.app))
  199.             if self._sampleLines == None:
  200.                 logger.debug("Reading csv sample '%s' in app '%s'" % (self.name, self.app))
  201.                 sampleDict = [ ]
  202.                 sampleLines = [ ]
  203.                 # Fix to load large csv files, work with python 2.5 onwards
  204.                 csv.field_size_limit(sys.maxint)
  205.                 csvReader = csv.DictReader(sampleFH)
  206.                 for line in csvReader:
  207.                     sampleDict.append(line)
  208.                     try:
  209.                         tempstr = line['_raw'].decode('string_escape')
  210.                         if self.bundlelines:
  211.                             tempstr = tempstr.replace('\n', 'NEWLINEREPLACEDHERE!!!')
  212.                         sampleLines.append(tempstr)
  213.                     except ValueError:
  214.                         logger.error("Error in sample at line '%d' in sample '%s' in app '%s' - did you quote your backslashes?" % (csvReader.line_num, self.name, self.app))
  215.                     except AttributeError:
  216.                         logger.error("Missing _raw at line '%d' in sample '%s' in app '%s'" % (csvReader.line_num, self.name, self.app))
  217.                 self._sampleDict = copy.deepcopy(sampleDict)
  218.                 self._sampleLines = copy.deepcopy(sampleLines)
  219.                 logger.debug('Finished creating sampleDict & sampleLines.  Len samplesLines: %d Len sampleDict: %d' % (len(sampleLines), len(sampleDict)))
  220.             else:
  221.                 # If we're set to bundlelines, we'll modify sampleLines regularly.
  222.                 # Since lists in python are referenced rather than copied, we
  223.                 # need to make a fresh copy every time if we're bundlelines.
  224.                 # If not, just used the cached copy, we won't mess with it.
  225.                 if not self.bundlelines:
  226.                     sampleDict = self._sampleDict
  227.                     sampleLines = self._sampleLines
  228.                 else:
  229.                     sampleDict = copy.deepcopy(self._sampleDict)
  230.                     sampleLines = copy.deepcopy(self._sampleLines)
  231.  
  232.  
  233.         # Check to see if this is the first time we've run, or if we're at the end of the file
  234.         # and we're running replay.  If so, we need to parse the whole file and/or setup our counters
  235.         if self._rpevents == None and self.mode == 'replay':
  236.             if self.sampletype == 'csv':
  237.                 self._rpevents = sampleDict
  238.             else:
  239.                 if self.breaker != self._c.breaker:
  240.                     self._rpevents = []
  241.                     lines = '\n'.join(sampleLines)
  242.                     breaker = re.search(self.breaker, lines)
  243.                     currentchar = 0
  244.                     while breaker:
  245.                         self._rpevents.append(lines[currentchar:breaker.start(0)])
  246.                         lines = lines[breaker.end(0):]
  247.                         currentchar += breaker.start(0)
  248.                         breaker = re.search(self.breaker, lines)
  249.                 else:
  250.                     self._rpevents = sampleLines
  251.             self._currentevent = 0
  252.        
  253.         # If we are replaying then we need to set the current sampleLines to the event
  254.         # we're currently on
  255.         if self.mode == 'replay':
  256.             if self.sampletype == 'csv':
  257.                 sampleDict = [ self._rpevents[self._currentevent] ]
  258.                 sampleLines = [ self._rpevents[self._currentevent]['_raw'].decode('string_escape') ]
  259.             else:
  260.                 sampleLines = [ self._rpevents[self._currentevent] ]
  261.             self._currentevent += 1
  262.             # If we roll over the max number of lines, roll over the counter and start over
  263.             if self._currentevent >= len(self._rpevents):
  264.                 logger.debug("At end of the sample file, starting replay from the top")
  265.                 self._currentevent = 0
  266.                 self._lastts = None
  267.                 if len(self._eventIntervals) > 0:
  268.                     sleep_time = self._eventIntervals[-1]
  269.                     logger.info("Sleeping for %f seconds" % sleep_time)
  270.                     self._timeSinceSleep = datetime.timedelta()
  271.                     return sleep_time
  272.  
  273.         # Ensure all lines have a newline
  274.         for i in xrange(0, len(sampleLines)):
  275.             if sampleLines[i][-1] != '\n':
  276.                 sampleLines[i] += '\n'
  277.  
  278.         # If we've set bundlelines, then we want count copies of all of the lines in the file
  279.         # And we'll set breaker to be a weird delimiter so that we'll end up with an events
  280.         # array that can be rated by the hour of day and day of week rates
  281.         # This is only for weird outside use cases like when we want to include a CSV file as the source
  282.         # so we can't set breaker properly
  283.         if self.bundlelines:
  284.             logger.debug("Bundlelines set.  Creating %s copies of original sample lines and setting breaker." % (self.count-1))
  285.             self.breaker = '\n------\n'
  286.             origSampleLines = copy.deepcopy(sampleLines)
  287.             origSampleDict = copy.deepcopy(sampleDict)
  288.             sampleLines.append(self.breaker)
  289.             for i in range(0, self.count-1):
  290.                 sampleLines.extend(origSampleLines)
  291.                 sampleLines.append(self.breaker)
  292.            
  293.  
  294.         if len(sampleLines) > 0:
  295.             count = self.count
  296.             if self.count == 0 and self.mode == 'sample':
  297.                 logger.debug("Count %s specified as default for sample '%s' in app '%s'; adjusting count to sample length %s; using default breaker" \
  298.                                 % (self.count, self.name, self.app, len(sampleLines)) )
  299.                 count = len(sampleLines)
  300.                 self.breaker = self._c.breaker
  301.             elif self.count > 0 or self.mode == 'replay':
  302.                
  303.                 # 5/8/12 CS We've requested not the whole file, so we should adjust count based on
  304.                 # hourOfDay, dayOfWeek and randomizeCount configs
  305.                 rateFactor = 1.0
  306.                 if self.randomizeCount != 0 and self.randomizeCount != None:
  307.                     try:
  308.                         logger.debug("randomizeCount for sample '%s' in app '%s' is %s" \
  309.                                         % (self.name, self.app, self.randomizeCount))
  310.                         # If we say we're going to be 20% variable, then that means we
  311.                         # can be .1% high or .1% low.  Math below does that.
  312.                         randBound = round(self.randomizeCount * 1000, 0)
  313.                         rand = random.randint(0, randBound)
  314.                         randFactor = 1+((-((randBound / 2) - rand)) / 1000)
  315.                         logger.debug("randFactor for sample '%s' in app '%s' is %s" \
  316.                                         % (self.name, self.app, randFactor))
  317.                         rateFactor *= randFactor
  318.                     except:
  319.                         import traceback
  320.                         stack =  traceback.format_exc()
  321.                         logger.error("Randomize count failed.  Stacktrace %s" % stack)
  322.                 if type(self.hourOfDayRate) == dict:
  323.                     try:
  324.                         rate = self.hourOfDayRate[str(self.now().hour)]
  325.                         logger.debug("hourOfDayRate for sample '%s' in app '%s' is %s" % (self.name, self.app, rate))
  326.                         rateFactor *= rate
  327.                     except KeyError:
  328.                         import traceback
  329.                         stack =  traceback.format_exc()
  330.                         logger.error("Hour of day rate failed.  Stacktrace %s" % stack)
  331.                 if type(self.dayOfWeekRate) == dict:
  332.                     try:
  333.                         weekday = datetime.date.weekday(self.now())
  334.                         if weekday == 6:
  335.                             weekday = 0
  336.                         else:
  337.                             weekday += 1
  338.                         rate = self.dayOfWeekRate[str(weekday)]
  339.                         logger.debug("dayOfWeekRate for sample '%s' in app '%s' is %s" % (self.name, self.app, rate))
  340.                         rateFactor *= rate
  341.                     except KeyError:
  342.                         import traceback
  343.                         stack =  traceback.format_exc()
  344.                         logger.error("Hour of day rate failed for sample '%s'.  Stacktrace %s" % (self.name, stack))
  345.                 if type(self.minuteOfHourRate) == dict:
  346.                     try:
  347.                         rate = self.minuteOfHourRate[str(self.now().minute)]
  348.                         logger.debug("minuteOfHourRate for sample '%s' in app '%s' is %s" % (self.name, self.app, rate))
  349.                         rateFactor *= rate
  350.                     except KeyError:
  351.                         import traceback
  352.                         stack =  traceback.format_exc()
  353.                         logger.error("Minute of hour rate failed for sample '%s'.  Stacktrace %s" % (self.name, stack))
  354.                 if type(self.dayOfMonthRate) == dict:
  355.                     try:
  356.                         rate = self.dayOfMonthRate[str(self.now().day)]
  357.                         logger.debug("dayOfMonthRate for sample '%s' in app '%s' is %s" % (self.name, self.app, rate))
  358.                         rateFactor *= rate
  359.                     except KeyError:
  360.                         import traceback
  361.                         stack =  traceback.format_exc()
  362.                         logger.error("Day of Month rate for sample '%s' failed.  Stacktrace %s" % (self.name, stack))
  363.                 if type(self.monthOfYearRate) == dict:
  364.                     try:
  365.                         rate = self.monthOfYearRate[str(self.now().month)]
  366.                         logger.debug("monthOfYearRate for sample '%s' in app '%s' is %s" % (self.name, self.app, rate))
  367.                         rateFactor *= rate
  368.                     except KeyError:
  369.                         import traceback
  370.                         stack =  traceback.format_exc()
  371.                         logger.error("Month Of Year rate failed for sample '%s'.  Stacktrace %s" % (self.name, stack))
  372.                 count = int(round(count * rateFactor, 0))
  373.                 if rateFactor != 1.0:
  374.                     logger.info("Original count: %s Rated count: %s Rate factor: %s" % (self.count, count, rateFactor))
  375.  
  376.             try:
  377.                 breakerRE = re.compile(self.breaker)
  378.             except:
  379.                 logger.error("Line breaker '%s' for sample '%s' in app '%s' could not be compiled; using default breaker" \
  380.                             % (self.breaker, self.name, self.app) )
  381.                 self.breaker = self._c.breaker
  382.  
  383.             events = []
  384.             # 9/7/13 CS If we're sampleType CSV and we do an events fill that's greater than the count
  385.             # we don't have entries in sampleDict to match what index/host/source/sourcetype they are
  386.             # so creating a new dict to track that metadata
  387.             eventsDict = []
  388.             event = ''
  389.  
  390.             if self.breaker == self._c.breaker:
  391.                 logger.debug("Default breaker detected for sample '%s' in app '%s'; using simple event fill" \
  392.                                 % (self.name, self.app) )
  393.                 logger.debug("Filling events array for sample '%s' in app '%s'; count=%s, sampleLines=%s" \
  394.                                 % (self.name, self.app, count, len(sampleLines)) )
  395.  
  396.                 # 5/8/12 CS Added randomizeEvents config to randomize items from the file
  397.                 # 5/27/12 CS Don't randomize unless we're raw
  398.                 try:
  399.                     # 7/30/12 CS Can't remember why I wouldn't allow randomize Events for CSV so commenting
  400.                     # this out and seeing what breaks
  401.                     #if self.randomizeEvents and self.sampletype == 'raw':
  402.                     if self.randomizeEvents:
  403.                         logger.debug("Shuffling events for sample '%s' in app '%s'" \
  404.                                         % (self.name, self.app))
  405.                         random.shuffle(sampleLines)
  406.                 except:
  407.                     logger.error("randomizeEvents for sample '%s' in app '%s' unparseable." \
  408.                                     % (self.name, self.app))
  409.                
  410.                 if count >= len(sampleLines):
  411.                     events = sampleLines
  412.                     if self.sampletype == 'csv':
  413.                         eventsDict = sampleDict[:]
  414.                 else:
  415.                     events = sampleLines[0:count]
  416.                     if self.sampletype == 'csv':
  417.                         eventsDict = sampleDict[0:count]
  418.             else:
  419.                 logger.debug("Non-default breaker '%s' detected for sample '%s' in app '%s'; using advanced event fill" \
  420.                                 % (self.breaker, self.name, self.app) )
  421.  
  422.                 ## Fill events array from breaker and sampleLines
  423.                 breakersFound = 0
  424.                 x = 0
  425.  
  426.                 logger.debug("Filling events array for sample '%s' in app '%s'; count=%s, sampleLines=%s" \
  427.                                 % (self.name, self.app, count, len(sampleLines)) )
  428.                 while len(events) < count and x < len(sampleLines):
  429.                     #logger.debug("Attempting to match regular expression '%s' with line '%s' for sample '%s' in app '%s'" % (breaker, sampleLines[x], sample, app) )
  430.                     breakerMatch = breakerRE.search(sampleLines[x])
  431.  
  432.                     if breakerMatch:
  433.                         #logger.debug("Match found for regular expression '%s' and line '%s' for sample '%s' in app '%s'" % (breaker, sampleLines[x], sample, app) )
  434.                         ## If not first
  435.                         # 5/28/12 CS This may cause a regression defect, but I can't figure out why
  436.                         # you'd want to ignore the first breaker you find.  It's certainly breaking
  437.                         # my current use case.
  438.  
  439.                         # 6/25/12 CS Definitely caused a regression defect.  I'm going to add
  440.                         # a check for bundlelines which is where I need this to work every time
  441.                         if breakersFound != 0 or self.bundlelines:
  442.                             events.append(event)
  443.                             event = ''
  444.  
  445.                         breakersFound += 1
  446.                     # else:
  447.                     #     logger.debug("Match not found for regular expression '%s' and line '%s' for sample '%s' in app '%s'" % (breaker, sampleLines[x], sample, app) )
  448.  
  449.                     # If we've inserted the breaker with bundlelines, don't insert the line, otherwise insert
  450.                     if not (self.bundlelines and breakerMatch):
  451.                         event += sampleLines[x]
  452.                     x += 1
  453.  
  454.                 ## If events < count append remaining data in samples
  455.                 if len(events) < count:
  456.                     events.append(event + '\n')
  457.  
  458.                 if self.bundlelines:
  459.                     eventsDict = sampleDict[:]
  460.  
  461.                 ## If breaker wasn't found in sample
  462.                 ## events = sample
  463.                 if breakersFound == 0:
  464.                     logger.warn("Breaker '%s' not found for sample '%s' in app '%s'; using default breaker" % (self.breaker, self.name, self.app) )
  465.  
  466.                     if count >= len(sampleLines):
  467.                         events = sampleLines
  468.                     else:
  469.                         events = sampleLines[0:count]
  470.                 else:
  471.                     logger.debug("Found '%s' breakers for sample '%s' in app '%s'" % (breakersFound, self.name, self.app) )
  472.  
  473.             ## Continue to fill events array until len(events) == count
  474.             if len(events) > 0 and len(events) < count:
  475.                 logger.debug("Events fill for sample '%s' in app '%s' less than count (%s vs. %s); continuing fill" % (self.name, self.app, len(events), count) )
  476.                 tempEvents = events[:]
  477.                 if self.sampletype == 'csv':
  478.                     tempEventsDict = eventsDict[:]
  479.                 while len(events) < count:
  480.                     y = 0
  481.                     while len(events) < count and y < len(tempEvents):
  482.                         events.append(tempEvents[y])
  483.                         if self.sampletype == 'csv':
  484.                             eventsDict.append(tempEventsDict[y])
  485.                         y += 1
  486.  
  487.             # logger.debug("events: %s" % pprint.pformat(events))
  488.             logger.debug("Replacing %s tokens in %s events for sample '%s' in app '%s'" % (len(self.tokens), len(events), self.name, self.app))
  489.  
  490.             if self.sampletype == 'csv' and len(eventsDict) > 0:
  491.                 self.index = eventsDict[0]['index']
  492.                 self.host = eventsDict[0]['host']
  493.                 self.source = eventsDict[0]['source']
  494.                 self.sourcetype = eventsDict[0]['sourcetype']
  495.                 logger.debug("Sampletype CSV.  Setting self._out to CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" \
  496.                             % (self.index, self.host, self.source, self.sourcetype))
  497.                 self._out.refreshconfig(self)
  498.                
  499.             # Find interval before we muck with the event but after we've done event breaking
  500.             if self.mode == 'replay':
  501.                 logger.debug("Finding timestamp to compute interval for events")
  502.                 if self._lastts == None:
  503.                     if self.sampletype == 'csv':
  504.                         self._lastts = self._getTSFromEvent(self._rpevents[self._currentevent][self.timeField])
  505.                     else:
  506.                         self._lastts = self._getTSFromEvent(self._rpevents[self._currentevent])
  507.                 if (self._currentevent+1) < len(self._rpevents):
  508.                     if self.sampletype == 'csv':
  509.                         nextts = self._getTSFromEvent(self._rpevents[self._currentevent+1][self.timeField])
  510.                     else:
  511.                         nextts = self._getTSFromEvent(self._rpevents[self._currentevent+1])
  512.                 else:
  513.                     logger.debug("At end of _rpevents")
  514.                     return 0
  515.  
  516.                 logger.debug('Computing timeDiff nextts: "%s" lastts: "%s"' % (nextts, self._lastts))
  517.  
  518.                 timeDiff = nextts - self._lastts
  519.                 if timeDiff.days >= 0 and timeDiff.seconds >= 0 and timeDiff.microseconds >= 0:
  520.                     partialInterval = float("%d.%06d" % (timeDiff.seconds, timeDiff.microseconds))
  521.                 else:
  522.                     partialInterval = 0
  523.  
  524.                 if self.timeMultiple > 0:
  525.                     partialInterval *= self.timeMultiple
  526.  
  527.                 logger.debug("Setting partialInterval for replay mode with timeMultiple %s: %s %s" % (self.timeMultiple, timeDiff, partialInterval))
  528.                 self._lastts = nextts
  529.  
  530.             ## Iterate events
  531.             for x in range(0, len(events)):
  532.                 event = events[x]
  533.  
  534.                 # Maintain state for every token in a given event
  535.                 # Hash contains keys for each file name which is assigned a list of values
  536.                 # picked from a random line in that file
  537.                 mvhash = { }
  538.  
  539.                 ## Iterate tokens
  540.                 for token in self.tokens:
  541.                     token.mvhash = mvhash
  542.                     event = token.replace(event)
  543.                 if(self.hostToken):
  544.                     # clear the host mvhash every time, because we need to re-randomize it
  545.                     self.hostToken.mvhash =  {}
  546.  
  547.                 # Hack for bundle lines to work with sampletype csv
  548.                 # Basically, bundlelines allows us to create copies of a bundled set of
  549.                 # of events as one event, and this splits those back out so that we properly
  550.                 # send each line with the proper sourcetype and source if we're we're sampletype csv
  551.                 if self.bundlelines and self.sampletype == 'csv':
  552.                     # Trim last newline so we don't end up with blank at end of the array
  553.                     if event[-1] == '\n':
  554.                         event = event[:-1]
  555.                     lines = event.split('\n')
  556.                     logger.debug("Bundlelines set and sampletype csv, breaking event back apart.  %d lines %d eventsDict." % (len(lines), len(eventsDict)))
  557.                     for lineno in range(0, len(lines)):
  558.                         if self.sampletype == 'csv' and (eventsDict[lineno]['index'] != self.index or \
  559.                                                          eventsDict[lineno]['host'] != self.host or \
  560.                                                          eventsDict[lineno]['source'] != self.source or \
  561.                                                          eventsDict[lineno]['sourcetype'] != self.sourcetype):
  562.                             # Flush events before we change all the various parameters
  563.                             logger.debug("Sampletype CSV with bundlelines, parameters changed at event %s.  Flushing output." % lineno)
  564.                             self._out.flush()
  565.                             self.index = eventsDict[lineno]['index']
  566.                             self.host = eventsDict[lineno]['host']
  567.                             # Allow randomizing the host:
  568.                             if(self.hostToken):
  569.                                 self.host = self.hostToken.replace(self.host)
  570.  
  571.                             self.source = eventsDict[lineno]['source']
  572.                             self.sourcetype = eventsDict[lineno]['sourcetype']
  573.                             logger.debug("Sampletype CSV.  Setting self._out to CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" \
  574.                                          % (self.index, self.host, self.source, self.sourcetype))
  575.                             self._out.refreshconfig(self)
  576.                         self._out.send(lines[lineno].replace('NEWLINEREPLACEDHERE!!!', '\n'))
  577.                     logger.debug("Completed bundlelines event.  Flushing.")
  578.                     self._out.flush()
  579.                 else:
  580.                     # logger.debug("Sample Index: %s Host: %s Source: %s Sourcetype: %s" % (self.index, self.host, self.source, self.sourcetype))
  581.                     # logger.debug("Event Index: %s Host: %s Source: %s Sourcetype: %s" % (sampleDict[x]['index'], sampleDict[x]['host'], sampleDict[x]['source'], sampleDict[x]['sourcetype']))
  582.                     if self.sampletype == 'csv' and (eventsDict[x]['index'] != self.index or \
  583.                                                     eventsDict[x]['host'] != self.host or \
  584.                                                     eventsDict[x]['source'] != self.source or \
  585.                                                     eventsDict[x]['sourcetype'] != self.sourcetype):
  586.                         # Flush events before we change all the various parameters
  587.                         logger.debug("Sampletype CSV, parameters changed at event %s.  Flushing output." % x)
  588.                         self._out.flush()
  589.                         self.index = sampleDict[x]['index']
  590.                         self.host = sampleDict[x]['host']
  591.                         # Allow randomizing the host:
  592.                         if(self.hostToken):
  593.                             self.host = self.hostToken.replace(self.host)
  594.  
  595.                         self.source = sampleDict[x]['source']
  596.                         self.sourcetype = sampleDict[x]['sourcetype']
  597.                         logger.debug("Sampletype CSV.  Setting self._out to CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" \
  598.                                     % (self.index, self.host, self.source, self.sourcetype))
  599.                         self._out.refreshconfig(self)
  600.                     self._out.send(event)
  601.  
  602.             ## Close file handles
  603.             self._out.flush()
  604.             sampleFH.close()
  605.  
  606.             endTime = datetime.datetime.now()
  607.             timeDiff = endTime - startTime
  608.  
  609.             if self.mode == 'sample':
  610.                 # timeDiffSecs = timeDelta2secs(timeDiff)
  611.                 timeDiffSecs = float("%d.%06d" % (timeDiff.seconds, timeDiff.microseconds))
  612.                 wholeIntervals = timeDiffSecs / self.interval
  613.                 partialInterval = timeDiffSecs % self.interval
  614.                 if partialInterval > 0:
  615.                     self._eventIntervals.append(partialInterval)
  616.  
  617.                 if wholeIntervals > 1:
  618.                     logger.warn("Generation of sample '%s' in app '%s' took longer than interval (%s seconds vs. %s seconds); consider adjusting interval" \
  619.                                 % (self.name, self.app, timeDiff, self.interval) )
  620.  
  621.                 partialInterval = self.interval - partialInterval
  622.            
  623.             # No rest for the wicked!  Or while we're doing backfill
  624.             if self.backfill != None and not self._backfilldone:
  625.                 # Since we would be sleeping, increment the timestamp by the amount of time we're sleeping
  626.                 incsecs = round(partialInterval / 1, 0)
  627.                 incmicrosecs = partialInterval % 1
  628.                 self._backfillts += datetime.timedelta(seconds=incsecs, microseconds=incmicrosecs)
  629.                 partialInterval = 0
  630.  
  631.             self._timeSinceSleep += timeDiff
  632.             if partialInterval > 0:
  633.                 timeDiffFrac = "%d.%06d" % (self._timeSinceSleep.seconds, self._timeSinceSleep.microseconds)
  634.                 logger.info("Generation of sample '%s' in app '%s' completed in %s seconds.  Sleeping for %f seconds" \
  635.                             % (self.name, self.app, timeDiffFrac, partialInterval) )
  636.                 self._timeSinceSleep = datetime.timedelta()
  637.             return partialInterval
  638.         else:
  639.             logger.warn("Sample '%s' in app '%s' contains no data" % (self.name, self.app) )
  640.        
  641.     ## Replaces $SPLUNK_HOME w/ correct pathing
  642.     def pathParser(self, path):
  643.         greatgreatgrandparentdir = os.path.dirname(os.path.dirname(self._c.grandparentdir))
  644.         sharedStorage = ['$SPLUNK_HOME/etc/apps', '$SPLUNK_HOME/etc/users/', '$SPLUNK_HOME/var/run/splunk']
  645.  
  646.         ## Replace windows os.sep w/ nix os.sep
  647.         path = path.replace('\\', '/')
  648.         ## Normalize path to os.sep
  649.         path = os.path.normpath(path)
  650.  
  651.         ## Iterate special paths
  652.         for x in range(0, len(sharedStorage)):
  653.             sharedPath = os.path.normpath(sharedStorage[x])
  654.  
  655.             if path.startswith(sharedPath):
  656.                 path.replace('$SPLUNK_HOME', greatgreatgrandparentdir)
  657.                 break
  658.  
  659.         ## Split path
  660.         path = path.split(os.sep)
  661.  
  662.         ## Iterate path segments
  663.         for x in range(0, len(path)):
  664.             segment = path[x].lstrip('$')
  665.             ## If segement is an environment variable then replace
  666.             if os.environ.has_key(segment):
  667.                 path[x] = os.environ[segment]
  668.  
  669.         ## Join path
  670.         path = os.sep.join(path)
  671.  
  672.         return path
  673.  
  674.     def _getTSFromEvent(self, event):
  675.         currentTime = None
  676.         formats = [ ]
  677.         # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp?
  678.         # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0.
  679.         for token in self.tokens:
  680.             try:
  681.                 formats.append(token.token)
  682.                 # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event))
  683.                 results = token._search(event)
  684.                 if results:
  685.                     timeFormat = token.replacement
  686.                     group = 0 if len(results.groups()) == 0 else 1
  687.                     timeString = results.group(group)
  688.                     # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat))
  689.                     if timeFormat == "%s":
  690.                         ts = float(timeString) if len(timeString) < 10 else float(timeString) / (10**(len(timeString)-10))
  691.                         currentTime = datetime.datetime.fromtimestamp(ts)
  692.                     else:
  693.                         currentTime = datetime.datetime.strptime(timeString, timeFormat)
  694.                     logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime))
  695.                     if type(currentTime) == datetime.datetime:
  696.                         break
  697.             except ValueError:
  698.                 logger.debug("Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" % (timeString, timeFormat, event))
  699.         if type(currentTime) != datetime.datetime:
  700.             # Total fail
  701.             logger.error("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
  702.             raise ValueError("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
  703.         # Check to make sure we parsed a year
  704.         if currentTime.year == 1900:
  705.             currentTime = currentTime.replace(year=self.now().year)
  706.         return currentTime
  707.    
  708.     def saveState(self):
  709.         """Saves state of all integer IDs of this sample to a file so when we restart we'll pick them up"""
  710.         for token in self.tokens:
  711.             if token.replacementType == 'integerid':
  712.                 stateFile = open(os.path.join(self._c.sampleDir, 'state.'+urllib.pathname2url(token.token)), 'w')
  713.                 stateFile.write(token.replacement)
  714.                 stateFile.close()
  715.  
  716.     def now(self, utcnow=False, realnow=False):
  717.         # logger.info("Getting time (timezone %s)" % (self.timezone))
  718.         if not self._backfilldone and not self._backfillts == None and not realnow:
  719.             return self._backfillts
  720.         elif self.timezone.days > 0:
  721.             return datetime.datetime.now()
  722.         else:
  723.             return datetime.datetime.utcnow() + self.timezone
  724.  
  725.     def utcnow(self):
  726.         return self.now(utcnow=True)
  727.  
  728.        
  729. class Token:
  730.     """Contains data and methods for replacing a token in a given sample"""
  731.     token = None
  732.     replacementType = None
  733.     replacement = None
  734.     replacementIncrementAmount = 100
  735.     replacementIncrementAmountRandomness = 0.1
  736.     hourOfDayMultiplier = None
  737.     dayOfWeekMultiplier = None 
  738.     sample = None
  739.     mvhash = { }
  740.    
  741.     _replaytd = None
  742.     _lastts = None
  743.     _tokenre = None
  744.     _tokenfile = None
  745.     _tokents = None
  746.     _earliestTime = None
  747.     _latestTime = None
  748.     _replacementFile = None
  749.     _replacementColumn = None
  750.     _integerMatch = None
  751.     _floatMatch = None
  752.     _hexMatch = None
  753.     _stringMatch = None
  754.     _listMatch = None
  755.    
  756.     def __init__(self, sample):
  757.         self.sample = sample
  758.        
  759.         # Logger already setup by config, just get an instance
  760.         logger = logging.getLogger('eventgen')
  761.         globals()['logger'] = logger
  762.        
  763.         self._earliestTime = (None, None)
  764.         self._latestTime = (None, None)
  765.        
  766.     def __str__(self):
  767.         """Only used for debugging, outputs a pretty printed representation of this token"""
  768.         # Eliminate recursive going back to parent
  769.         temp = dict([ (key, value) for (key, value) in self.__dict__.items() if key != 'sample' ])
  770.         return pprint.pformat(temp)
  771.  
  772.     def __repr__(self):
  773.         return self.__str__()
  774.    
  775.     def _match(self, event):
  776.         """Executes regular expression match and returns the re.Match object"""
  777.         if self._tokenre == None:
  778.             self._tokenre = re.compile(self.token)
  779.         return self._tokenre.match(event)
  780.        
  781.     def _search(self, event):
  782.         """Executes regular expression search and returns the re.Match object"""
  783.         if self._tokenre == None:
  784.             self._tokenre = re.compile(self.token)
  785.         return self._tokenre.search(event)
  786.        
  787.     def _finditer(self, event):
  788.         """Executes regular expression finditer and returns the re.Match object"""
  789.         if self._tokenre == None:
  790.             self._tokenre = re.compile(self.token)
  791.         return self._tokenre.finditer(event)
  792.  
  793.     def _findall(self, event):
  794.         """Executes regular expression finditer and returns the re.Match object"""
  795.         if self._tokenre == None:
  796.             self._tokenre = re.compile(self.token)
  797.         return self._tokenre.findall(event)
  798.        
  799.     def replace(self, event):
  800.         """Replaces all instances of this token in provided event and returns event"""
  801.         offset = 0
  802.         tokenMatch = list(self._finditer(event))
  803.         # logger.debug("Checking for match for token: '%s'" % (self.token))
  804.  
  805.         if len(tokenMatch) > 0:
  806.             # 9/7/13  Trying to determine the logic for doing two regex
  807.             # searches, one to find the list of potential replacements and
  808.             # another to find the actual string to replace, so commenting
  809.             # out and recoding... may cause regressions.
  810.  
  811.             # # 5/28/12 Changing logic to account for needing old to match
  812.             # # the right token we're actually replacing
  813.             # # This will call getReplacement for every match which is more
  814.             # # expensive, but necessary.
  815.            
  816.             # # Find old in case of error
  817.             # oldMatch = self._search(event)
  818.  
  819.             # if oldMatch:
  820.             #     # old = event[oldMatch.start(group):oldMatch.end(group)]
  821.             #     group = 0 if len(oldMatch.groups()) == 0 else 1
  822.             #     old = oldMatch.group(group)
  823.             # else:
  824.             #     old = ""
  825.            
  826.             # logger.debug("Got match for token: '%s'" % (self.token))
  827.             # replacement = self._getReplacement(old)
  828.  
  829.             replacement = self._getReplacement(event[tokenMatch[0].start(0):tokenMatch[0].end(0)])
  830.            
  831.             if replacement is not None:
  832.                 # logger.debug("Replacement: '%s'" % replacement)
  833.                 ## Iterate matches
  834.                 for match in tokenMatch:
  835.                     # logger.debug("Match: %s" % (match))
  836.                     try:
  837.                         matchStart = match.start(1) + offset
  838.                         matchEnd = match.end(1) + offset
  839.                         startEvent = event[:matchStart]
  840.                         endEvent = event[matchEnd:]
  841.                         # In order to not break legacy which might replace the same timestamp
  842.                         # with the same value in multiple matches, here we'll include
  843.                         # ones that need to be replaced for every match
  844.                         if self.replacementType in ('replaytimestamp'):
  845.                             replacement = self._getReplacement(event[matchStart:matchEnd])
  846.                         offset += len(replacement) - len(match.group(1))
  847.                     except:
  848.                         matchStart = match.start(0) + offset
  849.                         matchEnd = match.end(0) + offset
  850.                         startEvent = event[:matchStart]
  851.                         endEvent = event[matchEnd:]
  852.                         # In order to not break legacy which might replace the same timestamp
  853.                         # with the same value in multiple matches, here we'll include
  854.                         # ones that need to be replaced for every match
  855.                         if self.replacementType in ('replaytimestamp'):
  856.                             replacement = self._getReplacement(event[matchStart:matchEnd])
  857.                         offset += len(replacement) - len(match.group(0))
  858.                     # logger.debug("matchStart %d matchEnd %d offset %d" % (matchStart, matchEnd, offset))
  859.                     event = startEvent + replacement + endEvent
  860.                
  861.                 # Reset replay internal variables for this token
  862.                 self._replaytd = None
  863.                 self._lastts = None
  864.         return event
  865.                    
  866.     def _getReplacement(self, old=None):
  867.         if self.replacementType == 'static':
  868.             return self.replacement
  869.         elif self.replacementType in ('timestamp', 'replaytimestamp'):
  870.             if self.sample.earliest and self.sample.latest:
  871.                 # First optimization, we need only store earliest and latest
  872.                 # as an offset of now if they're relative times
  873.                 if self.sample._earliestParsed != None:
  874.                     earliestTime = self.sample.now() - self.sample._earliestParsed
  875.                     logger.debug("Using cached earliest time: %s" % earliestTime)
  876.                 else:
  877.                     if self.sample.earliest.strip()[0:1] == '+' or \
  878.                             self.sample.earliest.strip()[0:1] == '-' or \
  879.                             self.sample.earliest == 'now':
  880.                         tempearliest = timeParser(self.sample.earliest, timezone=self.sample.timezone)
  881.                         temptd = self.sample.now(realnow=True) - tempearliest
  882.                         self.sample._earliestParsed = datetime.timedelta(days=temptd.days, seconds=temptd.seconds)
  883.                         earliestTime = self.sample.now() - self.sample._earliestParsed
  884.                         logger.debug("Calulating earliestParsed as '%s' with earliestTime as '%s' and self.sample.earliest as '%s'" % (self.sample._earliestParsed, earliestTime, tempearliest))
  885.                     else:
  886.                         earliestTime = timeParser(self.sample.earliest, timezone=self.sample.timezone)
  887.                         logger.debug("earliestTime as absolute time '%s'" % earliestTime)
  888.  
  889.                 if self.sample._latestParsed != None:
  890.                     latestTime = self.sample.now() - self.sample._latestParsed
  891.                     logger.debug("Using cached latestTime: %s" % latestTime)
  892.                 else:
  893.                     if self.sample.latest.strip()[0:1] == '+' or \
  894.                             self.sample.latest.strip()[0:1] == '-' or \
  895.                             self.sample.latest == 'now':
  896.                         templatest = timeParser(self.sample.latest, timezone=self.sample.timezone)
  897.                         temptd = self.sample.now(realnow=True) - templatest
  898.                         self.sample._latestParsed = datetime.timedelta(days=temptd.days, seconds=temptd.seconds)
  899.                         latestTime = self.sample.now() - self.sample._latestParsed
  900.                         logger.debug("Calulating latestParsed as '%s' with latestTime as '%s' and self.sample.latest as '%s'" % (self.sample._latestParsed, latestTime, templatest))
  901.                     else:
  902.                         latestTime = timeParser(self.sample.latest, timezone=self.sample.timezone)
  903.                         logger.debug("latstTime as absolute time '%s'" % latestTime)
  904.  
  905.                 if earliestTime and latestTime:
  906.                     if latestTime>=earliestTime:
  907.                         minDelta = 0
  908.  
  909.                         ## Compute timeDelta as total_seconds
  910.                         td = latestTime - earliestTime
  911.                         maxDelta = timeDelta2secs(td)
  912.  
  913.                         ## Get random timeDelta
  914.                         randomDelta = datetime.timedelta(seconds=random.randint(minDelta, maxDelta))
  915.  
  916.                         ## Compute replacmentTime
  917.                         replacementTime = latestTime - randomDelta
  918.  
  919.                         # logger.debug("Generating timestamp for sample '%s' with randomDelta %s, minDelta %s, maxDelta %s, earliestTime %s, latestTime %s, earliest: %s, latest: %s" % (self.sample.name, randomDelta, minDelta, maxDelta, earliestTime, latestTime, self.sample.earliest, self.sample.latest))
  920.                        
  921.                         if self.replacementType == 'replaytimestamp':
  922.                             if old != None and len(old) > 0:
  923.                                 # Determine type of timestamp to use for this token
  924.                                 # We can either be a string with one strptime format
  925.                                 # or we can be a json formatted list of strptime formats
  926.                                 currentts = None
  927.                                 try:
  928.                                     strptimelist = json.loads(self.replacement)  
  929.                                     for currentformat in strptimelist:
  930.                                         try:
  931.                                             timeformat = currentformat
  932.                                             if timeformat == "%s":
  933.                                                 ts = float(old) if  len(old) < 10 else float(old) / (10**(len(old)-10))
  934.                                                 currentts = datetime.datetime.fromtimestamp(ts)
  935.                                             else:
  936.                                                 currentts = datetime.datetime.strptime(old, timeformat)
  937.                                             # logger.debug("Old '%s' Timeformat '%s' currentts '%s'" % (old, timeformat, currentts))
  938.                                             if type(currentts) == datetime.datetime:
  939.                                                 break
  940.                                         except ValueError:
  941.                                             pass
  942.                                     if type(currentts) != datetime.datetime:
  943.                                         # Total fail
  944.                                         logger.error("Can't find strptime format for this timestamp '%s' in the list of formats.  Returning original value" % old)
  945.                                         return old
  946.                                 except ValueError:
  947.                                     # Not JSON, try to read as text
  948.                                     timeformat = self.replacement
  949.                                     try:
  950.                                         if timeformat == "%s":
  951.                                             ts = float(old) if  len(old) < 10 else float(old) / (10**(len(old)-10))
  952.                                             currentts = datetime.datetime.fromtimestamp(ts)
  953.                                         else:
  954.                                             currentts = datetime.datetime.strptime(old, timeformat)
  955.                                         # logger.debug("Timeformat '%s' currentts '%s'" % (timeformat, currentts))
  956.                                     except ValueError:
  957.                                         # Total fail
  958.                                         logger.error("Can't match strptime format ('%s') to this timestamp '%s'.  Returning original value" % (timeformat, old))
  959.                                         return old
  960.                                    
  961.                                     # Can't parse as strptime, try JSON
  962.                                
  963.                                 # Check to make sure we parsed a year
  964.                                 if currentts.year == 1900:
  965.                                     currentts = currentts.replace(year=self.sample.now().year)
  966.                                 # We should now know the timeformat and currentts associated with this event
  967.                                 # If we're the first, save those values        
  968.                                 if self._replaytd == None:
  969.                                     self._replaytd = replacementTime - currentts
  970.                                
  971.                                 # logger.debug("replaytd %s" % self._replaytd)
  972.                                 replacementTime = currentts + self._replaytd
  973.                                
  974.                                 # Randomize time a bit between last event and this one
  975.                                 # Note that we'll always end up shortening the time between
  976.                                 # events because we don't know when the next timestamp is going to be
  977.                                 if self.sample.bundlelines:
  978.                                     if self._lastts == None:
  979.                                         self._lastts = replacementTime
  980.                                     oldtd = replacementTime - self._lastts
  981.                                     randomsecs = random.randint(0, oldtd.seconds)
  982.                                     if oldtd.seconds > 0:
  983.                                         randommicrosecs = random.randint(0, 1000000)
  984.                                     else:
  985.                                         randommicrosecs = random.randint(0, oldtd.microseconds)
  986.                                     randomtd = datetime.timedelta(seconds=randomsecs, microseconds=randommicrosecs)
  987.                                     replacementTime -= randomtd
  988.                                 else:
  989.                                     randomtd = datetime.timedelta()
  990.                                 self._lastts = replacementTime
  991.                                 replacementTime = replacementTime.strftime(timeformat)
  992.                                 # logger.debug("Old '%s' Timeformat '%s' currentts '%s' replacementTime '%s' replaytd '%s' randomtd '%s'" \
  993.                                 #             % (old, timeformat, currentts, replacementTime, self._replaytd, randomtd))
  994.                             else:
  995.                                 logger.error("Could not find old value, needed for replaytimestamp")
  996.                                 return old
  997.                         else:
  998.                             replacementTime = replacementTime.strftime(self.replacement)
  999.                         ## replacementTime == replacement for invalid strptime specifiers
  1000.                         if replacementTime != self.replacement.replace('%', ''):
  1001.                             return replacementTime
  1002.                         else:
  1003.                             logger.error("Invalid strptime specifier '%s' detected; will not replace" \
  1004.                                         % (self.replacement) )
  1005.                             return old
  1006.                     ## earliestTime/latestTime not proper
  1007.                     else:
  1008.                         logger.error("Earliest specifier '%s', value '%s' is greater than latest specifier '%s', value '%s' for sample '%s'; will not replace" \
  1009.                                     % (self.sample.earliest, earliestTime, self.sample.latest, latestTime, self.sample.name) )
  1010.                         return old
  1011.             ## earliest/latest not proper
  1012.             else:
  1013.                 logger.error('Earliest or latest specifier were not set; will not replace')
  1014.                 return old
  1015.         elif self.replacementType in ('random', 'rated', 'randomRated'):
  1016.             ## Validations:
  1017.             if self._integerMatch != None:
  1018.                 integerMatch = self._integerMatch
  1019.             else:
  1020.                 integerRE = re.compile('integer\[([-]?\d+):([-]?\d+)\]', re.I)
  1021.                 integerMatch = integerRE.match(self.replacement)
  1022.                 self._integerMatch = integerMatch
  1023.            
  1024.             if self._floatMatch != None:
  1025.                 floatMatch = self._floatMatch
  1026.             else:
  1027.                 floatRE = re.compile('float\[(\d+)\.(\d+):(\d+)\.(\d+)\]', re.I)
  1028.                 floatMatch = floatRE.match(self.replacement)
  1029.                 self._floatMatch = floatMatch
  1030.  
  1031.             if self._stringMatch != None:
  1032.                 stringMatch = self._stringMatch
  1033.             else:
  1034.                 stringRE = re.compile('string\((\d+)\)', re.I)
  1035.                 stringMatch = stringRE.match(self.replacement)
  1036.                 self._stringMatch = stringMatch
  1037.  
  1038.             if self._hexMatch != None:
  1039.                 hexMatch = self._hexMatch
  1040.             else:      
  1041.                 hexRE = re.compile('hex\((\d+)\)', re.I)
  1042.                 hexMatch = hexRE.match(self.replacement)
  1043.                 self._hexMatch = hexMatch
  1044.  
  1045.             if self._listMatch != None:
  1046.                 listMatch = self._listMatch
  1047.             else:
  1048.                 listRE = re.compile('list(\[[^\]]+\])', re.I)
  1049.                 listMatch = listRE.match(self.replacement)
  1050.                 self._listMatch = listMatch
  1051.  
  1052.             ## Valid replacements: ipv4 | ipv6 | integer[<start>:<end>] | string(<i>)
  1053.             if self.replacement.lower() == 'ipv4':
  1054.                 x = 0
  1055.                 replacement = ''
  1056.  
  1057.                 while x < 4:
  1058.                     replacement += str(random.randint(0, 255)) + '.'
  1059.                     x += 1
  1060.  
  1061.                 replacement = replacement.strip('.')
  1062.                 return replacement
  1063.             elif self.replacement.lower() == 'ipv6':
  1064.                 x = 0
  1065.                 replacement = ''
  1066.  
  1067.                 while x < 8:
  1068.                     replacement += hex(random.randint(0, 65535))[2:] + ':'
  1069.                     x += 1
  1070.  
  1071.                 replacement = replacement.strip(':')
  1072.                 return replacement
  1073.             elif self.replacement.lower() == 'mac':
  1074.                 x = 0
  1075.                 replacement = ''
  1076.  
  1077.                 ## Give me 6 blocks of 2 hex
  1078.                 while x < 6:
  1079.                     y = 0
  1080.                     while y < 2:
  1081.                         replacement += hex(random.randint(0, 15))[2:]
  1082.                         y += 1
  1083.                     replacement += ':'
  1084.                     x += 1
  1085.  
  1086.                 replacement = replacement.strip(':')
  1087.                 return replacement
  1088.             elif self.replacement.lower() == 'guid':
  1089.                 return str(uuid.uuid4())
  1090.             elif integerMatch:
  1091.                 startInt = int(integerMatch.group(1))
  1092.                 endInt = int(integerMatch.group(2))
  1093.  
  1094.                 if endInt >= startInt:
  1095.                     replacementInt = random.randint(startInt, endInt)
  1096.                     if self.replacementType == 'rated':
  1097.                         rateFactor = 1.0
  1098.                         if type(self.sample.hourOfDayRate) == dict:
  1099.                             try:
  1100.                                 rateFactor *= self.sample.hourOfDayRate[str(self.sample.now())]
  1101.                             except KeyError:
  1102.                                 import traceback
  1103.                                 stack =  traceback.format_exc()
  1104.                                 logger.error("Hour of day rate failed for token %s.  Stacktrace %s" % stack)
  1105.                         if type(self.sample.dayOfWeekRate) == dict:
  1106.                             try:
  1107.                                 weekday = datetime.date.weekday(self.sample.now())
  1108.                                 if weekday == 6:
  1109.                                     weekday = 0
  1110.                                 else:
  1111.                                     weekday += 1
  1112.                                 rateFactor *= self.sample.dayOfWeekRate[str(weekday)]
  1113.                             except KeyError:
  1114.                                 import traceback
  1115.                                 stack =  traceback.format_exc()
  1116.                                 logger.error("Day of week rate failed.  Stacktrace %s" % stack)
  1117.                         replacementInt = int(round(replacementInt * rateFactor, 0))
  1118.                     if self.replacementType == 'randomRated':
  1119.                         multiplier = 1.0
  1120.                         now = self.sample.now()
  1121.                         if type(self.hourOfDayMultiplier) == dict:
  1122.                             try:
  1123.                                 multiplier *= self.hourOfDayMultiplier[str(now.hour)]
  1124.                             except KeyError:
  1125.                                 import traceback
  1126.                                 stack =  traceback.format_exc()
  1127.                                 logger.error("randomRated: Hour of day rate failed for token %s.  Stacktrace %s" % (self.token, stack))
  1128.                         if type(self.dayOfWeekMultiplier) == dict:
  1129.                             try:
  1130.                                 weekday = datetime.date.weekday(self.sample.now())
  1131.                                 if weekday == 6:
  1132.                                     weekday = 0
  1133.                                 else:
  1134.                                     weekday += 1
  1135.                                 multiplier *= self.dayOfWeekMultiplier[str(weekday)]
  1136.                             except KeyError:
  1137.                                 import traceback
  1138.                                 stack =  traceback.format_exc()
  1139.                                 logger.error("randomRated: Day of week rate failed.  Stacktrace %s" % stack)
  1140.                         replacementInt = int(round(replacementInt * multiplier, 0))
  1141.                     replacement = str(replacementInt)
  1142.                     return replacement
  1143.                 else:
  1144.                     logger.error("Start integer %s greater than end integer %s; will not replace" % (startInt, endInt) )
  1145.                     return old
  1146.             elif floatMatch:
  1147.                 try:
  1148.                     startFloat = float(floatMatch.group(1)+'.'+floatMatch.group(2))
  1149.                     endFloat = float(floatMatch.group(3)+'.'+floatMatch.group(4))
  1150.                    
  1151.                     if endFloat >= startFloat:
  1152.                         floatret = round(random.uniform(startFloat,endFloat), len(floatMatch.group(2)))
  1153.                         if self.replacementType == 'rated':
  1154.                             rateFactor = 1.0
  1155.                             now = self.sample.now()
  1156.                             if type(self.sample.hourOfDayRate) == dict:
  1157.                                 try:
  1158.                                     rateFactor *= self.sample.hourOfDayRate[str(now.hour)]
  1159.                                 except KeyError:
  1160.                                     import traceback
  1161.                                     stack =  traceback.format_exc()
  1162.                                     logger.error("Hour of day rate failed for token %s.  Stacktrace %s" % stack)
  1163.                             if type(self.sample.dayOfWeekRate) == dict:
  1164.                                 try:
  1165.                                     weekday = datetime.date.weekday(now)
  1166.                                     if weekday == 6:
  1167.                                         weekday = 0
  1168.                                     else:
  1169.                                         weekday += 1
  1170.                                     rateFactor *= self.sample.dayOfWeekRate[str(weekday)]
  1171.                                 except KeyError:
  1172.                                     import traceback
  1173.                                     stack =  traceback.format_exc()
  1174.                                     logger.error("Day of week rate failed.  Stacktrace %s" % stack)
  1175.                             floatret = round(floatret * rateFactor, len(floatMatch.group(2)))
  1176.                         if self.replacementType == 'randomRated':
  1177.                             multiplier = 1.0
  1178.                             now = self.sample.now()
  1179.                             if type(self.hourOfDayMultiplier) == dict:
  1180.                                 try:
  1181.                                     multiplier *= self.hourOfDayMultiplier[str(now.hour)]
  1182.                                 except KeyError:
  1183.                                     import traceback
  1184.                                     stack =  traceback.format_exc()
  1185.                                     logger.error("randomRated: Hour of day rate failed for token %s.  Stacktrace %s" % (self.token, stack))
  1186.                             if type(self.dayOfWeekMultiplier) == dict:
  1187.                                 try:
  1188.                                     weekday = datetime.date.weekday(self.sample.now())
  1189.                                     if weekday == 6:
  1190.                                         weekday = 0
  1191.                                     else:
  1192.                                         weekday += 1
  1193.                                     multiplier *= self.dayOfWeekMultiplier[str(weekday)]
  1194.                                 except KeyError:
  1195.                                     import traceback
  1196.                                     stack =  traceback.format_exc()
  1197.                                     logger.error("randomRated: Day of week rate failed.  Stacktrace %s" % stack)
  1198.                             floatret = round(floatret * multiplier, len(floatMatch.group(2)))
  1199.                         floatret = str(floatret)
  1200.                         return floatret
  1201.                     else:
  1202.                         logger.error("Start float %s greater than end float %s; will not replace" % (startFloat, endFloat))
  1203.                         return old
  1204.                 except ValueError:
  1205.                     logger.error("Could not parse float[%s.%s:%s.%s]" % (floatMatch.group(1), floatMatch.group(2), \
  1206.                                 floatMatch.group(3), floatMatch.group(4)))
  1207.                     return old
  1208.             elif stringMatch:
  1209.                 strLength = int(stringMatch.group(1))
  1210.                 if strLength == 0:
  1211.                     return ''
  1212.                 elif strLength > 0:
  1213.                     replacement = ''
  1214.                     while len(replacement) < strLength:
  1215.                         ## Generate a random ASCII between dec 33->126
  1216.                         replacement += chr(random.randint(33, 126))
  1217.                         ## Practice safe strings
  1218.                         replacement = re.sub('%[0-9a-fA-F]+', '', urllib.quote(replacement))
  1219.                    
  1220.                     return replacement
  1221.                 else:
  1222.                     logger.error("Length specifier %s for string replacement must be greater than 0; will not replace" % (strLength) )
  1223.                     return old
  1224.             elif hexMatch:
  1225.                 strLength = int(hexMatch.group(1))
  1226.  
  1227.                 replacement = ''
  1228.                 hexList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
  1229.                 while len(replacement) < strLength:
  1230.                     replacement += hexList[random.randint(0, 15)]
  1231.  
  1232.                 return replacement
  1233.             elif listMatch:
  1234.                 try:
  1235.                     value = json.loads(listMatch.group(1))
  1236.                 except:
  1237.                     logger.error("Could not parse json for '%s' in sample '%s'" % (listMatch.group(1), self.sample.name))
  1238.                     return old
  1239.                 return random.choice(value)
  1240.  
  1241.             else:
  1242.                 logger.error("Unknown replacement value '%s' for replacementType '%s'; will not replace" % (self.replacement, self.replacementType) )
  1243.                 return old
  1244.         elif self.replacementType in ('file', 'mvfile'):
  1245.             if self._replacementFile != None:
  1246.                 replacementFile = self._replacementFile
  1247.                 replacementColumn = self._replacementColumn
  1248.             else:
  1249.                 try:
  1250.                     paths = self.replacement.split(':')
  1251.                     if(len(paths) == 1):
  1252.                         replacementColumn = 0
  1253.                     else:
  1254.                         try: # When it's not a mvfile, there's no number on the end:
  1255.                             replacementColumn = int(paths[-1])
  1256.                         except (ValueError):
  1257.                             replacementColumn = 0
  1258.                     if(replacementColumn > 0):
  1259.                         # This supports having a drive-letter colon
  1260.                         replacementFile = self.sample.pathParser(":".join(paths[0:-1]))
  1261.                     else:
  1262.                         replacementFile = self.sample.pathParser(self.replacement)
  1263.                 except ValueError, e:
  1264.                     logger.error("Replacement string '%s' improperly formatted.  Should be /path/to/file or /path/to/file:column" % (self.replacement))
  1265.                     return old
  1266.                 self._replacementFile = replacementFile
  1267.                 self._replacementColumn = replacementColumn
  1268.  
  1269.             # If we've seen this file before, simply return already read results
  1270.             # This applies only if we're looking at a multivalue file and we want to
  1271.             # return the same random pick on every iteration
  1272.             if replacementColumn > 0 and replacementFile in self.mvhash:
  1273.                 if replacementColumn > len(self.mvhash[replacementFile]):
  1274.                     logger.error("Index for column '%s' in replacement file '%s' is out of bounds" % (replacementColumn, replacementFile))
  1275.                     return old
  1276.                 else:
  1277.                     # logger.debug("Returning mvhash: %s" % self.mvhash[replacementFile][replacementColumn-1])
  1278.                     return self.mvhash[replacementFile][replacementColumn-1]
  1279.             else:
  1280.                 # Adding caching of the token file to avoid reading it every iteration
  1281.                 if self._tokenfile != None:
  1282.                     replacementLines = self._tokenfile
  1283.                 ## Otherwise, lets read the file and build our cached results, pick a result and return it
  1284.                 else:
  1285.                     # logger.debug("replacementFile: %s replacementColumn: %s" % (replacementFile, replacementColumn))
  1286.                     if os.path.exists(replacementFile) and os.path.isfile(replacementFile):
  1287.                         replacementFH = open(replacementFile, 'rU')
  1288.                         replacementLines = replacementFH.readlines()
  1289.                         replacementFH.close()
  1290.  
  1291.                         if len(replacementLines) == 0:
  1292.                             logger.error("Replacement file '%s' is empty; will not replace" % (replacementFile) )
  1293.                             return old
  1294.                         else:
  1295.                             self._tokenfile = replacementLines
  1296.                     else:
  1297.                         logger.error("File '%s' does not exist" % (replacementFile))
  1298.                         return old
  1299.  
  1300.                 replacement = replacementLines[random.randint(0, len(replacementLines)-1)].strip()
  1301.  
  1302.                 if replacementColumn > 0:
  1303.                     self.mvhash[replacementFile] = replacement.split(',')
  1304.  
  1305.                     if replacementColumn > len(self.mvhash[replacementFile]):
  1306.                         logger.error("Index for column '%s' in replacement file '%s' is out of bounds" % (replacementColumn, replacementFile))
  1307.                         return old
  1308.                     else:
  1309.                         return self.mvhash[replacementFile][replacementColumn-1]
  1310.                 else:
  1311.                     return replacement
  1312.         elif self.replacementType == 'integerid':
  1313.             temp = self.replacement
  1314.             if self.replacementIncrementAmountRandomness > 0:
  1315.                 incrementAmount = int(self.replacementIncrementAmount)
  1316.                 incrementAmountRandomness = float(self.replacementIncrementAmountRandomness)
  1317.                 lowerRange = int(incrementAmount - (incrementAmount * incrementAmountRandomness/2))
  1318.                 upperRange = int(incrementAmount + (incrementAmount * incrementAmountRandomness/2))
  1319.                 incrementAmount = random.randint(lowerRange, upperRange)
  1320.             else:
  1321.                 incrementAmount = self.replacementIncrementAmount
  1322.             self.replacement = str(int(self.replacement) + int(incrementAmount))
  1323.             return temp
  1324.         else:
  1325.             logger.error("Unknown replacementType '%s'; will not replace" % (self.replacementType) )
  1326.             return old
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement