SHARE
TWEET

Untitled

a guest Jan 11th, 2019 85 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from bs4 import BeautifulSoup
  2. import requests
  3.  
  4. def get_stories(url):
  5.     r = requests.get(url)
  6.     html = r.text
  7.     soup = BeautifulSoup(html)
  8.     title_rows = soup.findAll(attrs={'class':'title'})
  9.     stories = []
  10.  
  11.     for title_row in title_rows:
  12.         links = title_row.findChildren('a')
  13.        
  14.         article_href = None
  15.         title = None
  16.         if links:
  17.             link = links[0]
  18.             title = link.contents[0]
  19.             if title == 'More':
  20.                 continue
  21.            
  22.             article_href = link['href']
  23.             if article_href[0:4] == "item":
  24.                 article_href = 'https://news.ycombinator.com/' + article_href
  25.                
  26.         byline = title_row.findParent().findNextSibling()
  27.         links = byline.findChildren('a')
  28.         comment_href = None
  29.         for link in links:
  30.             if "comment" in link.contents[0]:
  31.                 comment_href =  'https://news.ycombinator.com/' + link['href']
  32.                
  33.         if title and (article_href or comment_href):
  34.             stories.append((title, article_href, comment_href))
  35.    
  36.     return stories
  37.  
  38. def write_stories(stories):
  39.     fh = open('o.txt', 'w')
  40.     for segment in stories:
  41.         for i, story in enumerate(segment):
  42.             fh.write("%2d. %s\n    %s\n"
  43.                      % (i, story[0].encode('ascii', 'ignore'),
  44.                         story[1].encode('ascii', 'ignore')))
  45.             if story[2] != None and story[1] != story[2]:
  46.                 fh.write("    %s\n"
  47.                          % (story[2].encode('ascii', 'ignore')))
  48.         fh.write('\n')
  49.     fh.close()
  50.  
  51. if __name__ == "__main__":
  52.     stories = []
  53.    
  54.     stories.append(get_stories('http://news.ycombinator.com'))
  55.     stories.append(get_stories('http://news.ycombinator.com/ask'))
  56.     stories.append(get_stories('http://news.ycombinator.com/newest'))
  57.     stories.append(get_stories('https://news.ycombinator.com/show'))
  58.    
  59.     write_stories(stories)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top