Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import dbUtils as db
- import pandas as pd
- import re
- from datetime import datetime
- from exchangelib import DELEGATE, Account, Credentials, Message, Mailbox, HTMLBody
- processName = 'OPIS Denaturant Wire Scraper'
- subProcessName = 'Scraping Raw Emails'
- #Check to see if this job has already run, if so then quit
- if db.checkForSuccess(processName, True) == True:
- raise SystemExit
- try:
- credentials = Credentials(username='GPRE.local\\' + db.getCreds()['Username'], password = db.getCreds()['Password'])
- account = Account(primary_smtp_address = 'someone@someplace.com', credentials = credentials, autodiscover = True, access_type = DELEGATE)
- folder = account.inbox / 'brokerWires'
- for email in folder.filter(subject__icontains = 'opis lp report'): #Find all OPIS emails
- try:
- with open('FILEPATH/lp_' + str(email.datetime_sent.year) + '-' + str(email.datetime_sent.month) + '-' + str(email.datetime_sent.day) + '.txt', 'wb') as f:
- f.write(email.attachments[0].content)
- rawfile = open('FILEPATH/lp_' + str(email.datetime_sent.year) + '-' + str(email.datetime_sent.month) + '-' + str(email.datetime_sent.day) + '.txt') #Open file just saved
- rawtextdata = rawfile.read() #Read raw file as string
- splitrawtextdata = rawtextdata.split('\n') #Split string line by line
- currentproduct = ''
- currentprice = ''
- outputdataframe = []
- for line in splitrawtextdata: #For each line
- if re.findall('opis lp prices', line, re.I): #Found product name line
- productdata = [line.strip()]
- elif re.findall('\d', line, re.I) and not re.findall('Avg', line, re.I): #Found current price
- splitline = line.split()
- productdata.append(datetime.strptime(splitline[0] + ' ' + splitline[1] + ' ' + splitline[2], '%B %d, %Y'))
- productdata.append(float(splitline[-3]))
- productdata.append(float(splitline[-2]))
- productdata.append(float(splitline[-1]))
- elif re.findall('\d', line, re.I) and re.findall('Avg', line, re.I): #Found average price
- splitline = line.split()
- productdata.append(float(splitline[-3]))
- productdata.append(float(splitline[-2]))
- productdata.append(float(splitline[-1]))
- outputdataframe.append(productdata)
- outputdataframe = pd.DataFrame(outputdataframe, columns = ['Product', 'Price_Date', 'Daily_Low', 'Daily_High', 'Daily_Avg', 'Monthly_Low', 'Monthly_High', 'Monthly_Avg'])
- outputdataframe = outputdataframe[['Price_Date', 'Product', 'Daily_Low', 'Daily_High', 'Daily_Avg', 'Monthly_Low', 'Monthly_High', 'Monthly_Avg']]
- datesuploaded = db.sqlToDataFrame('RiskSandbox', 'SELECT distinct pricedate FROM [DATABASE]') #Get distinct price dates already uploaded to SQL table
- if outputdataframe['Price_Date'].iloc[0].to_datetime() not in [uploadeddate.to_datetime() for uploadeddate in datesuploaded['pricedate']]: #This data was not already uploaded
- print 'Uploading data for: ' + str(outputdataframe['Price_Date'].iloc[0])
- db.dataFrameToSQL('ODBCCONN', outputdataframe, 'opisDenaturant', "//FILEPATH/opisPrices.csv", True) #Insert new data to SQL table
- else: #This data was already uploaded
- print 'Already uploaded data for: ' + str(outputdataframe['Price_Date'].iloc[0])
- except:
- pass
- subProcessName = 'Job Complete'
- db.processLog(processName, subProcessName, None)
- except Exception, e:
- db.processLog(processName, subProcessName, errorLog=str(e))
Add Comment
Please, Sign In to add comment