Untitled

#!/usr/bin/python
import time, requests, json, csv, sys
from datetime import datetime
import argparse
import splunk.Intersplunk
from requests.auth import HTTPBasicAuth

#TODO
# filtering results
# end scroll
# support for errors
# support for auth (encrypted storage?)
# support for generating commands - for partial results (ticket?)

timeFormat = '%Y-%m-%dT%H:%M:%S.%fZ'
si = None

#test = open("/tmp/TEST" + str(time.time()), "w+")
for row in sys.stdin:
    if row[:9] == "infoPath:":
	for i in csv.DictReader(open(row[9:].rstrip())):
	    si = i
	    break
#    else:
#	test.write(row)

# parse timestamps from time input, and fallback to 'All Time'
earliest = float(si.get('_search_et', '0'))
earliest2 = time.strftime(timeFormat, time.gmtime(earliest))
earliest = earliest2.replace("%fZ", str(round(earliest % 1, 6))[2:])
latest = float(si.get('_search_lt', '2000000000'))
latest2 = time.strftime(timeFormat, time.gmtime(latest))
latest = latest2.replace("%fZ", str(round(latest % 1, 6))[2:])

# Parse params from SPL command
# sourcetype, source, index are obligatory
parser = argparse.ArgumentParser(description='Query Elasticsearch.')
parser.add_argument('--server', dest='server', default='http://127.0.0.1:9200', required=False,
    help='URI to the Elasticsearch server, defaults to http://127.0.0.1:9200')
parser.add_argument('--index', dest='index', required=True,
    help='index to query the data from, defaults to all')
parser.add_argument('--sourcetype', dest='stype', default='', required=False,
    help='sourcetype to query the data from, defaults to all')
parser.add_argument('--onlyraw', dest='onlyraw', default=False, required=False,
    help='do not get field extractions, defaults to False')
parser.add_argument('query', nargs=1,
    help='search terms (must be enclosed in quotation marks)')

args = parser.parse_args()

scroll_id = False
csvWriter = False
results = []

ts = time.time()
offset = (datetime.fromtimestamp(ts) - datetime.utcfromtimestamp(ts)).total_seconds()
q = args.query[0].replace("\'","\"")
if len(q)>1:
    q=q+","

while True:
    # get data
    if not scroll_id:
	url = '{}/{}/{}/_search?size=10000&scroll=1h'.format(args.server, args.index, args.stype)
	resp = requests.post(url, '{ "sort": {"@timestamp": "desc"}, "query": { "bool": { %s "filter": { "range": { "@timestamp": { "gte": "%s", "lte": "%s" } } } } } }' % (q, earliest, latest), auth=('splunk-read', 'iefaiZ8ael'))
	open("/tmp/DEBUG1","w+").write(resp.text)
    else:
	url = '{}/_search/scroll'.format(args.server)
	resp = requests.post(url, '{"scroll": "15m", "scroll_id": "%s"}' % scroll_id, auth=('splunk-read', 'iefaiZ8ael') )
	open("/tmp/DEBUG2","w+").write(resp.text)
    try:
	resp = json.loads(resp.text)
    except:
	break
    # check if there are correct results
    try:
	scroll_id = resp['_scroll_id']
        if not len(resp['hits']['hits']):
	    break
    except:
        break
    # write rows
    for row in resp['hits']['hits']:
	tmp = { '_raw': json.dumps(row['_source']),
		#'sourcetype': row.get('_type',''),
		'sourcetype': "_json",
		'index': row.get('_index','')
	      }
	try:
	    tmp['_time'] = time.mktime(time.strptime(row['_source']['@timestamp'], timeFormat)) + offset
	except:
	    if args.onlyraw:
		tmp['_time'] = ''
	try:
	    tmp['host'] = row['_source']['host']
	except:
	    if args.onlyraw:
		tmp['_host'] = ''
	try:
	    tmp['source'] = row['_source']['source']
	except:
	    if args.onlyraw:
		tmp['source'] = 'ELASTIC'
	if args.onlyraw:
	    if not csvWriter:
		csvWriter = csv.writer(sys.stdout)
		csvWriter.writerow(['_time','index','host','sourcetype','source','_raw'])
	    csvWriter.writerow([tmp['_time'], tmp['index'], tmp['host'], tmp['sourcetype'], tmp['source'], tmp['_raw']])
	else:
	    # add each field to the results
	    for key in row['_source'].keys():
		tmp[key] = row['_source'][key]
	    results.append(tmp)
    if args.onlyraw:
	sys.stdout.flush()

if not args.onlyraw:
    splunk.Intersplunk.outputResults(results)