Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pickle
- import time
- from datetime import datetime, timedelta
- import ftplib
- import MySQLdb
- import sys
- from sys import argv
- import os
- import urllib
- import subprocess
- from ftplib import FTP
- from time import sleep
- import requests
- from rets import Session
- import re
- db2 = MySQLdb.connect(host="127.0.0.1", # your host, usually localhost
- charset='utf8',
- user="", # your username
- passwd="", # your password
- db="") # name of the data base
- cur2 = db2.cursor()
- cur2.execute('SET NAMES utf8;')
- cur2.execute('SET CHARACTER SET utf8;')
- cur2.execute('SET character_set_connection=utf8;')
- cur2.execute("SELECT MAX(lastupdate) FROM updates where name = 'media'")
- max_time_string = cur2.fetchone()[0]
- max_time_string = str(max_time_string)
- max_datetime = datetime.strptime(max_time_string, "%Y-%m-%d %H:%M:%S")
- search_datetime = max_datetime - timedelta(minutes=15)
- search_time_string = datetime.strftime(search_datetime, "%Y-%m-%dT%H:%M:%S")
- login_url = ''
- username = ''
- password = ''
- rets_client = Session(login_url, username, password)
- rets_client.login()
- system_data = rets_client.get_system_metadata()
- system_data
- resources = rets_client.get_resource_metadata(resource='Agent')
- resources
- while True:
- try:
- search_results = rets_client.search(resource='media', resource_class='media', dmql_query='(Modified='+search_time_string+'+)') #change search query here
- print "Inserting", len(search_results), "records now"
- break
- except:
- time.sleep(15)
- chunk_size = 40000
- chunks = []
- chunk_size = max(1, chunk_size)
- for i in range(0, len(search_results), chunk_size):
- chunk = search_results[i:i+chunk_size]
- chunks.append(chunk)
- for chunk in chunks:
- placeholders = ', '.join(['%s'] * len(chunk[0]))
- columns = "`%s`" % '`,`'.join(chunk[0].keys())
- update_string = ""
- for key in chunk[0]:
- update_string = update_string + "`" + key + "`=VALUES(`" + key + "`),"
- update_string = update_string[:-1]
- sql = "INSERT IGNORE INTO media ( %s ) VALUES ( %s ) ON DUPLICATE KEY UPDATE %s" % (columns, placeholders, update_string) #change table here
- chunk_values = []
- for result in chunk:
- values = result.values()
- values_fixed = []
- for val in values:
- is_datetime = re.match("(\d{4}-[01]\d-[0-3]\d)T([0-2]\d:[0-5]\d:[0-5]\d)", val)
- if is_datetime:
- values_fixed.append(is_datetime.group(1) + " " + is_datetime.group(2))
- else:
- values_fixed.append(val)
- chunk_values.append(tuple(values_fixed))
- success = False
- try:
- cur2.executemany(sql, chunk_values)
- db2.commit()
- print ("Records inserted")
- success = True
- except:
- db2.rollback()
- if not success:
- for result in chunk:
- placeholders = ', '.join(['%s'] * len(result))
- columns = "`%s`" % '`,`'.join(result.keys())
- update_string = ""
- for key in result:
- update_string = update_string + "`" + key + "`=VALUES(`" + key + "`),"
- update_string = update_string[:-1]
- sql = "INSERT INTO media ( %s ) VALUES ( %s ) ON DUPLICATE KEY UPDATE %s" % (columns, placeholders, update_string) #change table here
- values = result.values()
- values_fixed = []
- for val in values:
- is_datetime = re.match("(\d{4}-[01]\d-[0-3]\d)T([0-2]\d:[0-5]\d:[0-5]\d)", val)
- if is_datetime:
- values_fixed.append(is_datetime.group(1) + " " + is_datetime.group(2))
- else:
- values_fixed.append(val)
- print (sql)
- cur2.execute(sql, tuple(values_fixed))
- print ("Record inserted")
- db2.commit()
- print "Processed", len(search_results), "results, quitting."
- print "Processed", len(search_results), "results, quitting."
- cur2.execute("INSERT INTO updates (name, lastupdatecount) VALUES (%s, %s)",("media", len(search_results)))
- db2.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement