Advertisement
iama_alpaca

Write_FatWallet_Database.py

Oct 6th, 2017
332
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.54 KB | None | 0 0
  1. #! /usr/bin/env python3
  2. import sqlite3
  3. import os
  4. import re
  5.  
  6. conn = sqlite3.connect("Threads.db")
  7. c = conn.cursor()
  8. c.execute('''CREATE TABLE IF NOT EXISTS threads(ID INTEGER, Title TEXT, Author TEXT, Date TEXT)''')
  9.  
  10. for i in os.listdir('Threads'):
  11.     print('Adding file: Threads/{}'.format(i))
  12.     file = open('Threads/'+i, 'r').read()
  13.  
  14.     findauthor = re.findall(r"Posted By:</b>[\s\S]*?\w+", file)
  15.     author = findauthor[0].replace("Posted By:</b>","").replace(' ','')
  16.     # print(author)
  17.  
  18.     findtitle = re.findall(r'<font color="navy">[\s\S]*?</font>', file)
  19.     title = findtitle[0].replace('<font color="navy">','').replace('</font>','')
  20.     # print(title)
  21.  
  22.     finddate = re.findall(r'Date Posted:</b>[\s\S]*?M<br>', file)
  23.     date = finddate[0].replace('Date Posted:</b>','').replace('                     ','').replace('<br>','').replace('\n','')
  24.     date_split = date.split('/')
  25.     month = date_split[0]
  26.     day = int(date_split[1])
  27.     year = int(date_split[2].split(' ')[0])
  28.     time = ' '.join(date_split[2].split(' ')[1:])
  29.     hours = int(time.split(':')[0])
  30.     minutes = int(''.join(time.split(':')[1:]).split(' ')[0])
  31.     am_pm = ''.join(time.split(':')[1:]).split(' ')[1]
  32.     months={
  33.         'Jan': 1,
  34.         'Feb': 2,
  35.         'Mar': 3,
  36.         'Apr': 4,
  37.         'May': 5,
  38.         'Jun': 6,
  39.         'Jul': 7,
  40.         'Aug': 8,
  41.         'Sep': 9,
  42.         'Oct': 10,
  43.         'Nov': 11,
  44.         'Dec': 12
  45.     }
  46.  
  47.     month = months[month]
  48.     date_final = '{}-{:02d}-{:02d} {:02d}:{:02d} {}'.format(year, month, day, hours, minutes, am_pm)
  49.  
  50.     c.execute("INSERT INTO threads VALUES (?, ?, ?, ?)", (int(i.replace('.html','')), title, author, date_final))
  51. conn.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement