Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime
- import pickle
- import time
- from datetime import timedelta
- import ftplib
- import MySQLdb
- import sys
- from sys import argv
- import os
- import urllib
- import subprocess
- from ftplib import FTP
- from time import sleep
- import requests
- from rets import Session
- a = datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%m:%S')
- db2 = MySQLdb.connect(host="127.0.0.1", # your host, usually localhost
- charset='utf8',
- user="", # your username
- passwd="", # your password
- db="") # name of the data base
- cur2 = db2.cursor()
- cur2.execute('SET NAMES utf8;')
- cur2.execute('SET CHARACTER SET utf8;')
- cur2.execute('SET character_set_connection=utf8;')
- cur2.execute("SELECT MAX(lastupdate) FROM updates where name = 'media'")
- max_time_string = cur2.fetchone()[0]
- max_time_string = str(max_time_string)
- max_datetime = datetime.strptime(max_time_string, "%Y-%m-%d %H:%M:%S")
- search_datetime = max_datetime - timedelta(minutes=0)
- search_time_string = datetime.strftime(search_datetime, "%Y-%m-%dT%H:%M:%S")
- login_url = ''
- username = ''
- password = ''
- rets_client = Session(login_url, username, password)
- rets_client.login()
- system_data = rets_client.get_system_metadata()
- print system_data
- resources = rets_client.get_resource_metadata(resource='Agent')
- resources
- print "Getting records since", max_time_string
- search_results = rets_client.search(resource='media', resource_class='media', dmql_query='(Modified='+search_time_string+'+)') #change search query here
- print "Inserting", len(search_results), "records now"
- chunk_size = 40000
- chunks = []
- chunk_size = max(1, chunk_size)
- for i in range(0, len(search_results), chunk_size):
- chunk = search_results[i:i+chunk_size]
- chunks.append(chunk)
- for chunk in chunks:
- placeholders = ', '.join(['%s'] * len(chunk[0]))
- columns = "`%s`" % '`,`'.join(chunk[0].keys())
- update_string = ""
- for key in chunk[0]:
- update_string = update_string + "`" + key + "`=VALUES(`" + key + "`),"
- update_string = update_string[:-1]
- sql = "INSERT IGNORE INTO media ( %s ) VALUES ( %s ) ON DUPLICATE KEY UPDATE %s" % (columns, placeholders, update_string) #change table here
- chunk_values = []
- for result in chunk:
- chunk_values.append(tuple(result.values()))
- success = False
- try:
- cur2.executemany(sql, chunk_values)
- db2.commit()
- print ("Records inserted")
- success = True
- except:
- db2.rollback()
- if not success:
- for result in chunk:
- placeholders = ', '.join(['%s'] * len(result))
- columns = "`%s`" % '`,`'.join(result.keys())
- update_string = ""
- for key in result:
- update_string = update_string + "`" + key + "`=VALUES(`" + key + "`),"
- update_string = update_string[:-1]
- sql = "INSERT INTO media ( %s ) VALUES ( %s ) ON DUPLICATE KEY UPDATE %s" % (columns, placeholders, update_string) #change table here
- print (sql)
- cur2.execute(sql, result.values())
- print len(search_results), "records inserted!"
- db2.commit()
- print "Processed", len(search_results), "results, quitting."
- cur2.execute("INSERT INTO updates (name, lastupdate, lastupdatecount) VALUES (%s, %s, '%s')",("media", len(search_results), a))
- db2.commit()
- exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement