Advertisement
Guest User

Pull-A858

a guest
Dec 14th, 2015
545
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.43 KB | None | 0 0
  1. #/bin/python
  2. from bs4 import BeautifulSoup
  3. import urllib.request
  4. import time
  5.  
  6. currMain = "./currMain.html"
  7. redditURL = "https://www.reddit.com/"
  8. posts = open('./posts/allPosts', 'a+') #read, append, byte
  9. postsRead = open('./posts/allPosts', 'r')
  10. postsRead = postsRead.readlines()
  11.  
  12. soup = BeautifulSoup(open(currMain))
  13. # f = urllib.request.urlopen(redditURL)
  14.  
  15. # print(f.read().decode('utf-8'))
  16.  
  17. things = soup.findAll("div", class_="thing")
  18.  
  19.  
  20. i=0
  21. for thing in soup.find("div", class_="thing").next_siblings:
  22.     titleSoup = thing.find("p", class_="title")
  23.     if titleSoup:
  24.         title = list(titleSoup.children)[0].string
  25.         url = redditURL[:-1] + list(titleSoup)[0]['href']
  26.     else:
  27.         continue
  28.  
  29.     i=i+1
  30.     print(str(i) + ": Waiting for next pull")
  31.     if (url+"\n") in postsRead:
  32.         print("Already done")
  33.         continue
  34.     posts.write(url + "\n")
  35.     f = urllib.request.urlopen(url)
  36.     soup = BeautifulSoup(f.read().decode('utf-8'))
  37.     content = soup.find("div", class_="md")
  38.     content = content.find("p").string
  39.    
  40.     curPost = open('./posts/' + title, 'w+')
  41.  
  42.     curPost.write("========== URL ==========\n")
  43.     curPost.write(url+"\n")
  44.     curPost.write("========== URL ==========\n")
  45.     curPost.write("========== TITLE ==========\n")
  46.     curPost.write(title+"\n")
  47.     curPost.write("========== TITLE ==========\n")
  48.     curPost.write("========== CONTENT ==========\n")
  49.     curPost.write(content+"\n")
  50.     curPost.write("========== CONTENT ==========\n")
  51.  
  52.     time.sleep(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement