Advertisement
Guest User

Untitled

a guest
Jan 14th, 2016
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.24 KB | None | 0 0
  1. import email
  2. import getpass, imaplib
  3. import os
  4. import sys
  5. import re
  6. import sqlite3
  7.  
  8. from datetime import datetime
  9.  
  10. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  11. from pdfminer.converter import TextConverter
  12. from pdfminer.layout import LAParams
  13. from pdfminer.pdfpage import PDFPage
  14. from cStringIO import StringIO
  15.  
  16. sqlite_db = 'thames_flow.sqlite'
  17.  
  18. detach_dir = '.'
  19. db_dir = '.'
  20.  
  21. #userName = raw_input('Enter your GMail username:')
  22. userName = 'flowdata@richmondcanoeclub.com'
  23. #passwd = getpass.getpass('Enter your password: ')
  24. passwd = ''
  25.  
  26. def download_attachments():
  27. imapSession = imaplib.IMAP4_SSL('mail.richmondcanoeclub.com')
  28. typ, accountDetails = imapSession.login(userName, passwd)
  29. if typ != 'OK':
  30. print 'Not able to sign in!'
  31. raise
  32.  
  33. imapSession.select('INBOX')
  34. #typ, data = imapSession.search(None, 'ALL')
  35. typ, data = imapSession.search(None, '(UNSEEN)')
  36. if typ != 'OK':
  37. print 'Error searching Inbox.'
  38. raise
  39.  
  40. # Iterating over all emails
  41. for msgNum in data[0].split():
  42. typ, messageParts = imapSession.fetch(msgNum, '(RFC822)')
  43. if typ != 'OK':
  44. print 'Error fetching mail.'
  45. raise
  46.  
  47. emailBody = messageParts[0][1]
  48. mail = email.message_from_string(emailBody)
  49. #print '--- Message %s ---' % (msgNum)
  50. #for (k, v) in mail.items():
  51. # print '%s: %s' % (k, v)
  52. msgId = mail.get('Message-ID').strip('<>')
  53. for part in mail.walk():
  54. if part.get_content_maintype() == 'multipart':
  55. # print part.as_string()
  56. continue
  57. if part.get('Content-Disposition') is None:
  58. # print part.as_string()
  59. continue
  60. fileName = part.get_filename()
  61.  
  62. if bool(fileName) and re.match('Report \\w+.pdf', fileName):
  63. dirPath = os.path.join(detach_dir, 'attachments', msgId)
  64. if not os.path.isdir(dirPath) :
  65. os.mkdir(dirPath)
  66. filePath = os.path.join(detach_dir, 'attachments', msgId, fileName)
  67. if not os.path.isfile(filePath) :
  68. print '%s/%s' % (msgId, fileName)
  69. fp = open(filePath, 'wb')
  70. fp.write(part.get_payload(decode=True))
  71. fp.close()
  72. imapSession.close()
  73. imapSession.logout()
  74.  
  75. def convert_pdf_to_txt(path):
  76. rsrcmgr = PDFResourceManager()
  77. retstr = StringIO()
  78. codec = 'utf-8'
  79. laparams = LAParams()
  80. device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
  81. fp = file(path, 'rb')
  82. interpreter = PDFPageInterpreter(rsrcmgr, device)
  83. password = ""
  84. maxpages = 0
  85. caching = True
  86. pagenos=set()
  87. for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
  88. interpreter.process_page(page)
  89. fp.close()
  90. device.close()
  91. str = retstr.getvalue()
  92. retstr.close()
  93. return str
  94.  
  95. def get_pdf_lines(path):
  96. lines = []
  97. for line in convert_pdf_to_txt(path).splitlines():
  98. if len(line.strip()) > 0:
  99. lines.append(line.strip())
  100. return lines
  101.  
  102. def parse_flowdata(lines):
  103. stations = []
  104. values = []
  105. time = ''
  106. date = ''
  107. for line in lines:
  108. if line.endswith(' Flow'):
  109. stations.append(line)
  110. elif line.endswith(' m3/s'):
  111. values.append(float(line.replace(' m3/s', '')))
  112. elif re.match('\\d{2}/\\d{2}/\\d{4}', line):
  113. date = line
  114. elif re.match('\\d{2}:\\d{2}:\\d{2}', line):
  115. time = line
  116. return [datetime.strptime('%s %s' % (date, time), '%d/%m/%Y %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S'), zip(stations, values)]
  117.  
  118. def parse_temperature(lines):
  119. stations = []
  120. values = []
  121. times = []
  122. for line in lines:
  123. if line.endswith(' Lock'):
  124. stations.append(line)
  125. elif line.endswith(' deg C'):
  126. values.append(float(line.replace(' deg C', '')))
  127. elif re.match('\\d{2}/\\d{2}/\\d{4} +\\d{1}:\\d{2}', line):
  128. times.append(datetime.strptime(re.sub(' +', ' ', re.sub(' (\\d:)', ' 0\\1', line)), '%m/%d/%Y %H:%M').strftime('%Y-%m-%d %H:%M:%S'))
  129. return zip(times, stations, values)
  130.  
  131. def store_flowdata(data):
  132. time = data[0]
  133. items = data[1]
  134. conn = sqlite3.connect(os.path.join(db_dir, sqlite_db))
  135. c = conn.cursor()
  136. c.execute('CREATE TABLE IF NOT EXISTS flowrate (measured_at TEXT NOT NULL, station_name TEXT NOT NULL, value REAL NOT NULL, PRIMARY KEY (measured_at, station_name))')
  137. print [[time, d[0], d[1]] for d in items]
  138. c.executemany('INSERT INTO flowrate VALUES (?,?,?)', [[time, d[0], d[1]] for d in items])
  139. conn.commit()
  140. conn.close()
  141.  
  142. def store_temperature(data):
  143. conn = sqlite3.connect(os.path.join(db_dir, sqlite_db))
  144. c = conn.cursor()
  145. c.execute('CREATE TABLE IF NOT EXISTS temperature (measured_at TEXT NOT NULL, station_name TEXT NOT NULL, value REAL NOT NULL, PRIMARY KEY (measured_at, station_name))')
  146. print data
  147. c.executemany('INSERT INTO temperature VALUES (?,?,?)', data)
  148. conn.commit()
  149. conn.close()
  150.  
  151. def process_attachments():
  152. for d in os.listdir(os.path.join(detach_dir, 'attachments')):
  153. dirPath = os.path.join(detach_dir, 'attachments', d)
  154. if os.path.isdir(dirPath):
  155. for f in os.listdir(dirPath):
  156. if f.endswith('.pdf'):
  157. #print d
  158. filePath = os.path.join(detach_dir, 'attachments', d, f)
  159. lines = get_pdf_lines(filePath)
  160. if len(lines) > 0 and lines[0] == 'River Thames Flows':
  161. print '---------------------'
  162. data = parse_flowdata(lines)
  163. store_flowdata(data)
  164. elif len(lines) > 1 and lines[1] == 'Water Temperature':
  165. print '---------------------'
  166. data = parse_temperature(lines)
  167. store_temperature(data)
  168. # Remove the file now we are done with it
  169. os.remove(filePath)
  170. if len(os.listdir(dirPath)) == 0: # Remove the parent directory if all attachments have been removed
  171. os.rmdir(dirPath)
  172.  
  173. download_attachments()
  174. process_attachments()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement