Advertisement
Guest User

Untitled

a guest
Mar 28th, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.16 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import os, re
  4. import sys, traceback
  5.  
  6.  
  7. #это тхт файл со списками линков на все посты жж
  8. #он имеет вид:
  9. #November 15th, 2006 - Волков Сергей Владимирович
  10. #(no subject)
  11. #http://salery.livejournal.com/339.html
  12.  
  13. #November 19th, 2006 - Волков Сергей Владимирович
  14. #Впечатления от комментариев
  15. #http://salery.livejournal.com/767.html и т.д. до конца жж
  16.  
  17. LINKS = 'C:\Users\...\Desktop\to.txt'
  18.  
  19.  
  20. def main():
  21. f = open(LINKS, 'r')
  22.  
  23. for i in f:
  24.  
  25. try:
  26. match = re.match('http', i)
  27.  
  28. if match:
  29. r = requests.get(i, 'html.parser')
  30. soup = BeautifulSoup(r.content, 'html.parser')
  31. t = (soup.find('td', colspan='2').text)
  32. print(t, 'n', i)
  33. input()
  34.  
  35. else:
  36. pass
  37.  
  38. except ValueError:
  39. print(traceback.format_exception(*sys.exc_info())[1])
  40. input()
  41.  
  42. input()
  43.  
  44. File "C:Users....DesktopVolkovposts.py", line 22, in main
  45. print(t, 'n', i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement