Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- def get_stories(url):
- r = requests.get(url)
- html = r.text
- soup = BeautifulSoup(html)
- title_rows = soup.findAll(attrs={'class':'title'})
- stories = []
- for title_row in title_rows:
- links = title_row.findChildren('a')
- article_href = None
- title = None
- if links:
- link = links[0]
- title = link.contents[0]
- if title == 'More':
- continue
- article_href = link['href']
- if article_href[0:4] == "item":
- article_href = 'https://news.ycombinator.com/' + article_href
- byline = title_row.findParent().findNextSibling()
- links = byline.findChildren('a')
- comment_href = None
- for link in links:
- if "comment" in link.contents[0]:
- comment_href = 'https://news.ycombinator.com/' + link['href']
- if title and (article_href or comment_href):
- stories.append((title, article_href, comment_href))
- return stories
- def write_stories(stories):
- fh = open('o.txt', 'w')
- for segment in stories:
- for i, story in enumerate(segment):
- fh.write("%2d. %s\n %s\n"
- % (i, story[0].encode('ascii', 'ignore'),
- story[1].encode('ascii', 'ignore')))
- if story[2] != None and story[1] != story[2]:
- fh.write(" %s\n"
- % (story[2].encode('ascii', 'ignore')))
- fh.write('\n')
- fh.close()
- if __name__ == "__main__":
- stories = []
- stories.append(get_stories('http://news.ycombinator.com'))
- stories.append(get_stories('http://news.ycombinator.com/ask'))
- stories.append(get_stories('http://news.ycombinator.com/newest'))
- stories.append(get_stories('https://news.ycombinator.com/show'))
- write_stories(stories)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement