Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import praw
- import datetime
- import sqlite3
- import time
- #run on python 2 or 3
- try:
- import urllib.request as urllib2
- except ImportError:
- import urllib2
- import re
- colsql = sqlite3.connect('collegalnotices233.db')
- colsql.text_factory = str
- colcur = colsql.cursor()
- colcur.execute('CREATE TABLE IF NOT EXISTS notices(date TEXT, url TEXT, content TEXT)')
- colsql.commit()
- r = urllib2.Request("http://classifieds.columbian.com/legals/")
- r.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0')
- colulegal = BeautifulSoup(urllib2.build_opener().open(r).read())
- x = colulegal.find_all("a", class_="view_post")
- colpostcontent = 'Recent Clark County Legal Notices:\n\n\n\n\n'
- for each in x:
- noticesdb = colcur.execute("SELECT url FROM notices").fetchall()
- date = time.strftime("%Y %m %d %H:%M:%S %A")
- print each['href']
- newr = urllib2.Request('http://classifieds.columbian.com/' + str(each['href']))
- newr.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0')
- newsoup = BeautifulSoup(urllib2.build_opener().open(newr).read())
- '''for each in newsoup.find_all(class_='panel panel-primary'):
- print each'''
- if str(each['href']) in str(noticesdb):
- print str() + "\n \n in database"
- else:
- colcur.execute('INSERT INTO notices VALUES(?, ?, ?)', [date, each['href'], str(newsoup.find_all(class_="ad-content-container")[0].get_text(' ',strip=True).encode('utf8'))])
- colsql.commit()
- print str() + "\n \n added to database"
- colpostcontent += '\n \n' + newsoup.find_all(class_="ad-content-container")[0].get_text(' ',strip=True).encode('utf8') + '\n\n\n\n\n'
- print colpostcontent
- PTITLE = "Clark County Legal Notices for " + time.strftime("%A, %B %d %Y")
- SUBREDDIT = "legalnoticesPDX"
- WAITS = 40000
- PTIME = "05:00"
- #reddit part
- r = praw.Reddit(client_id=APIKEY,
- client_secret=APIKEYGOESHERE,
- redirect_uri='http://localhost:8080',
- user_agent='/r/legalnoticesPDX poster', username=USERNAME, password=PASSWORD)
- ptime = PTIME.split(':')
- ptime = (60*int(ptime[0])) + int(ptime[1])
- sql = sqlite3.connect('collegalpostsfinal1.db')
- sql.text_factory = str
- cur = sql.cursor()
- cur.execute('CREATE TABLE IF NOT EXISTS posts(ID TEXT, STAMP TEXT, CREATED TEXT, POST TEXT)')
- sql.commit()
- def dailypost():
- now = datetime.datetime.now()
- daystamp = datetime.datetime.strftime(now, "%d%b%Y")
- cur.execute('SELECT * FROM posts WHERE STAMP=?', [daystamp])
- nowtime = (60*now.hour) + now.minute
- print('Now: ' + str(nowtime) + ' ' + datetime.datetime.strftime(now, "%H:%M"))
- print('Pst: ' + str(ptime) + ' ' + PTIME)
- if not cur.fetchone():
- diff = nowtime-ptime
- if diff > 0:
- print('t+ ' + str(abs(diff)) + ' minutes')
- makepost(now, daystamp)
- else:
- print('t- ' + str(diff) + ' minutes')
- makepost(now, daystamp)
- else:
- print("Already made today's post")
- def makepost(now, daystamp):
- print('Making post...')
- newpost = r.subreddit(SUBREDDIT).submit(title=PTITLE, selftext=colpostcontent, send_replies=True)
- print('Success')
- cur.execute('INSERT INTO posts VALUES(?, ?, ?, ?)', [daystamp, newpost.id, newpost.created_utc, str(colpostcontent)])
- sql.commit()
- while True:
- try:
- dailypost()
- except Exception as e:
- print("ERROR:", e)
- #print('Sleeping ' + str(WAITS) + ' seconds.\n')
- time.sleep(5)
- print ('quitting...')
- quit()
- #time.sleep(WAITS)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement