Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import os, re
- import sys, traceback
- #это тхт файл со списками линков на все посты жж
- #он имеет вид:
- #November 15th, 2006 - Волков Сергей Владимирович
- #(no subject)
- #http://salery.livejournal.com/339.html
- #November 19th, 2006 - Волков Сергей Владимирович
- #Впечатления от комментариев
- #http://salery.livejournal.com/767.html и т.д. до конца жж
- LINKS = 'C:\Users\...\Desktop\to.txt'
- def main():
- f = open(LINKS, 'r')
- for i in f:
- try:
- match = re.match('http', i)
- if match:
- r = requests.get(i, 'html.parser')
- soup = BeautifulSoup(r.content, 'html.parser')
- t = (soup.find('td', colspan='2').text)
- print(t, 'n', i)
- input()
- else:
- pass
- except ValueError:
- print(traceback.format_exception(*sys.exc_info())[1])
- input()
- input()
- File "C:Users....DesktopVolkovposts.py", line 22, in main
- print(t, 'n', i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement