Advertisement
Guest User

Untitled

a guest
May 8th, 2018
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.90 KB | None | 0 0
  1. import csv
  2. import glob
  3. import gzip
  4. import logging
  5. import os
  6. import pymysql
  7. import subprocess
  8.  
  9. # path to gz directory
  10. GZ_DIR = "gz"
  11. # Database Infomation
  12. DB_HOST = 'locahost'
  13. DB_USER = 'dbuser'
  14. DB_PASS = 'password'
  15. DB_NAME = 'dbname'
  16. LOGFILE = "exception.log"
  17. BULKSQLROW=1000
  18. SQL={
  19.     "url":"INSERT INTO PADIAGDB.url ("
  20.         "Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
  21.         "Config_Version,Generate_Time,Source_address,Destination_address,"
  22.         "NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
  23.         "Destination_User,Application,Virtual_System,Source_Zone,"
  24.         "Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
  25.         "Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
  26.         "NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
  27.         "URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
  28.         "Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
  29.         "cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
  30.         "filetype,xff,referer,sender,subject,recipient,reportid,"
  31.         "dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
  32.         "Virtual_System_Name,Device_Name,file_url )"
  33.         ""
  34.         "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  35.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  36.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
  37.         "); ",
  38.  
  39.     "threat":"INSERT INTO PADIAGDB.threat ("
  40.         "Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
  41.         "Config_Version,Generate_Time,Source_address,Destination_address,"
  42.         "NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
  43.         "Destination_User,Application,Virtual_System,Source_Zone,"
  44.         "Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
  45.         "Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
  46.         "NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
  47.         "URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
  48.         "Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
  49.         "cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
  50.         "filetype,xff,referer,sender,subject,recipient,reportid,"
  51.         "dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
  52.         "Virtual_System_Name,Device_Name,file_url )"
  53.         ""
  54.         "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  55.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  56.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
  57.         "); ",
  58.  
  59.     "traffic":"INSERT INTO PADIAGDB.traffic ("
  60.         "Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
  61.         "Generate_Time,Source_address,Destination_address,NAT_Source_IP,"
  62.         "NAT_Destination_IP,Rule,Source_User,Destination_User,Application,"
  63.         "Virtual_System,Source_Zone,Destination_Zone,Inbound_Interface,"
  64.         "Outbound_Interface,Log_Action,Time_Logged,Session_ID,Repeat_Count,"
  65.         "Source_Port,Destination_Port,NAT_Source_Port,NAT_Destination_Port,"
  66.         "Flags,IP_Protocol,Action,Bytes,Bytes_Sent,Bytes_Received,Packets,"
  67.         "Start_Time,Elapsed_Time,Category,Padding,Sequence_Number,Action_Flags,"
  68.         "Source_Country,Destination_Country,cpadding,pkts_sent,pkts_received,"
  69.         "session_end_reason,dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,"
  70.         "dg_hier_level_4,Virtual_System_Name,Device_Name,action_source )"
  71.         ""
  72.         "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  73.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  74.         "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
  75.         "); ",
  76.     "system":"INSERT INTO PADIAGDB.system ("
  77.         "Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
  78.         "Generate_Time,Virtual_System,Event_ID,Object,fmt,id,module,Severity,"
  79.         "Description,Sequence_Number,Action_Flags,dg_hier_level_1,"
  80.         "dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,Virtual_System_Name,"
  81.         "Device_Name )"
  82.         ""
  83.         "VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
  84.         "%s,%s,%s );"
  85. }
  86.  
  87. def csv_reader(file, header=False):
  88.     with open(file, "r") as f:
  89.         reader = csv.reader(f)
  90.  
  91.         if header:
  92.             next(reader)
  93.  
  94.         for row in reader:
  95.             yield row
  96.  
  97.  
  98. # function to connect sql
  99. def import_sql(filename, dbHostName, dbUser, dbPassword, databaseName):
  100.     rowcount=0
  101.     row=""
  102.  
  103.     # Open database connection
  104.     db = pymysql.connect(host=dbHostName,
  105.                          user=dbUser,
  106.                          password=dbPassword,
  107.                          db=databaseName,
  108.                          charset='utf8')
  109.  
  110.     print "importing %s ..." % filename
  111.     for row in csv_reader(filename, False):
  112.         rowcount+=1
  113.         if row[3] == "THREAT" and row[4] == "url":
  114.             try:
  115.                 row_url.append(row)
  116.             except NameError:
  117.                 row_url=[row]
  118.  
  119.         elif row[3] == "THREAT":
  120.             try:
  121.                 row_threat.append(row)
  122.             except NameError:
  123.                 row_threat=[row]
  124.  
  125.         elif row[3] == "TRAFFIC":
  126.             try:
  127.                 row_traffic.append(row)
  128.             except NameError:
  129.                 row_traffic=[row]
  130.                
  131.         elif row[3] == "SYSTEM":
  132.             try:
  133.                 row_system.append(row)
  134.             except NameError:
  135.                 row_system=[row]
  136.         else:
  137.             continue
  138.  
  139.         # prepare a cursor object using cursor() method
  140.         with db.cursor() as cursor:
  141.             if rowcount % BULKSQLROW == 0 :
  142.                 try:
  143.                     if 'row_url' in locals() :
  144.                         r = cursor.executemany(SQL["url"], row_url)
  145.                         del row_url
  146.  
  147.                     if 'row_threat' in locals() :
  148.                         r = cursor.executemany(SQL["threat"], row_threat)
  149.                         del row_threat
  150.  
  151.                     if 'row_traffic' in locals() :
  152.                         r = cursor.executemany(SQL["traffic"], row_traffic)
  153.                         del row_traffic
  154.  
  155.                     if 'row_system' in locals() :
  156.                         r = cursor.executemany(SQL["system"], row_system)
  157.                         del row_system
  158.                
  159.                 except Exception as e:
  160.                     logging.exception(e)
  161.                     if BULKSQLROW == 1 :
  162.                         logging.exception("---------------- SQL start ----------------")
  163.                         logging.exception(cursor._executed)
  164.                         logging.exception("---------------- SQL  end  ----------------")
  165.                     db.rollback()
  166.                    
  167.     # disconnect from server
  168.     db.commit()
  169.     db.close()
  170.  
  171.     print "import complete. ( %s imported)" % rowcount
  172.  
  173. #####################################################################
  174. # Main
  175. #####################################################################
  176.  
  177. # List of all gz files
  178. gz_files = (gz for gz in glob.glob(os.path.join(GZ_DIR, '*.gz')))
  179.  
  180. # Loop through all gz files
  181. for gz_file in gz_files:
  182.     sql_file = gz_file[:-3]
  183.     sql_file = sql_file[:-4] + '.csv'
  184.  
  185.     print "decomplessing %s ..." % gz_file
  186.     with open(sql_file, 'wb') as out_file:
  187.         with gzip.open(gz_file, 'rb') as in_file:
  188.             while True:
  189.                 chunk = in_file.read(1024)
  190.                 if not chunk:
  191.                     break
  192.                 out_file.write(chunk)
  193.         out_file.close()
  194.  
  195.     # Step 2: import to sql
  196.     import_sql(sql_file, DB_HOST, DB_USER, DB_PASS, DB_NAME)
  197.  
  198.     # Step 3: remove uncompresed file
  199.     os.remove(sql_file)
  200.  
  201.     # Step 4: in loop, back to step 1 automatically for another gz files
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement