Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import glob
- import gzip
- import logging
- import os
- import pymysql
- import subprocess
- # path to gz directory
- GZ_DIR = "gz"
- # Database Infomation
- DB_HOST = 'locahost'
- DB_USER = 'dbuser'
- DB_PASS = 'password'
- DB_NAME = 'dbname'
- LOGFILE = "exception.log"
- BULKSQLROW=1000
- SQL={
- "url":"INSERT INTO PADIAGDB.url ("
- "Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
- "Config_Version,Generate_Time,Source_address,Destination_address,"
- "NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
- "Destination_User,Application,Virtual_System,Source_Zone,"
- "Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
- "Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
- "NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
- "URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
- "Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
- "cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
- "filetype,xff,referer,sender,subject,recipient,reportid,"
- "dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
- "Virtual_System_Name,Device_Name,file_url )"
- ""
- "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
- "); ",
- "threat":"INSERT INTO PADIAGDB.threat ("
- "Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
- "Config_Version,Generate_Time,Source_address,Destination_address,"
- "NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
- "Destination_User,Application,Virtual_System,Source_Zone,"
- "Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
- "Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
- "NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
- "URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
- "Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
- "cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
- "filetype,xff,referer,sender,subject,recipient,reportid,"
- "dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
- "Virtual_System_Name,Device_Name,file_url )"
- ""
- "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
- "); ",
- "traffic":"INSERT INTO PADIAGDB.traffic ("
- "Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
- "Generate_Time,Source_address,Destination_address,NAT_Source_IP,"
- "NAT_Destination_IP,Rule,Source_User,Destination_User,Application,"
- "Virtual_System,Source_Zone,Destination_Zone,Inbound_Interface,"
- "Outbound_Interface,Log_Action,Time_Logged,Session_ID,Repeat_Count,"
- "Source_Port,Destination_Port,NAT_Source_Port,NAT_Destination_Port,"
- "Flags,IP_Protocol,Action,Bytes,Bytes_Sent,Bytes_Received,Packets,"
- "Start_Time,Elapsed_Time,Category,Padding,Sequence_Number,Action_Flags,"
- "Source_Country,Destination_Country,cpadding,pkts_sent,pkts_received,"
- "session_end_reason,dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,"
- "dg_hier_level_4,Virtual_System_Name,Device_Name,action_source )"
- ""
- "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
- "); ",
- "system":"INSERT INTO PADIAGDB.system ("
- "Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
- "Generate_Time,Virtual_System,Event_ID,Object,fmt,id,module,Severity,"
- "Description,Sequence_Number,Action_Flags,dg_hier_level_1,"
- "dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,Virtual_System_Name,"
- "Device_Name )"
- ""
- "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
- "%s,%s,%s );"
- }
- def csv_reader(file, header=False):
- with open(file, "r") as f:
- reader = csv.reader(f)
- if header:
- next(reader)
- for row in reader:
- yield row
- # function to connect sql
- def import_sql(filename, dbHostName, dbUser, dbPassword, databaseName):
- rowcount=0
- row=""
- # Open database connection
- db = pymysql.connect(host=dbHostName,
- user=dbUser,
- password=dbPassword,
- db=databaseName,
- charset='utf8')
- print "importing %s ..." % filename
- for row in csv_reader(filename, False):
- rowcount+=1
- if row[3] == "THREAT" and row[4] == "url":
- try:
- row_url.append(row)
- except NameError:
- row_url=[row]
- elif row[3] == "THREAT":
- try:
- row_threat.append(row)
- except NameError:
- row_threat=[row]
- elif row[3] == "TRAFFIC":
- try:
- row_traffic.append(row)
- except NameError:
- row_traffic=[row]
- elif row[3] == "SYSTEM":
- try:
- row_system.append(row)
- except NameError:
- row_system=[row]
- else:
- continue
- # prepare a cursor object using cursor() method
- with db.cursor() as cursor:
- if rowcount % BULKSQLROW == 0 :
- try:
- if 'row_url' in locals() :
- r = cursor.executemany(SQL["url"], row_url)
- del row_url
- if 'row_threat' in locals() :
- r = cursor.executemany(SQL["threat"], row_threat)
- del row_threat
- if 'row_traffic' in locals() :
- r = cursor.executemany(SQL["traffic"], row_traffic)
- del row_traffic
- if 'row_system' in locals() :
- r = cursor.executemany(SQL["system"], row_system)
- del row_system
- except Exception as e:
- logging.exception(e)
- if BULKSQLROW == 1 :
- logging.exception("---------------- SQL start ----------------")
- logging.exception(cursor._executed)
- logging.exception("---------------- SQL end ----------------")
- db.rollback()
- # disconnect from server
- db.commit()
- db.close()
- print "import complete. ( %s imported)" % rowcount
- #####################################################################
- # Main
- #####################################################################
- # List of all gz files
- gz_files = (gz for gz in glob.glob(os.path.join(GZ_DIR, '*.gz')))
- # Loop through all gz files
- for gz_file in gz_files:
- sql_file = gz_file[:-3]
- sql_file = sql_file[:-4] + '.csv'
- print "decomplessing %s ..." % gz_file
- with open(sql_file, 'wb') as out_file:
- with gzip.open(gz_file, 'rb') as in_file:
- while True:
- chunk = in_file.read(1024)
- if not chunk:
- break
- out_file.write(chunk)
- out_file.close()
- # Step 2: import to sql
- import_sql(sql_file, DB_HOST, DB_USER, DB_PASS, DB_NAME)
- # Step 3: remove uncompresed file
- os.remove(sql_file)
- # Step 4: in loop, back to step 1 automatically for another gz files
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement