Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.79 KB | None | 0 0
  1. import re
  2. from Post import Post
  3. import urllib.request as urllib
  4. from bs4 import BeautifulSoup
  5.  
  6.  
  7.  
  8. def getHTML(url):
  9. try:
  10. f = urllib.urlopen(url)
  11. return f.read()
  12. f.close()
  13. except urllib.error.HTTPError as e:
  14. print("Ocurrió un error")
  15. print(e.code)
  16. return ""
  17. except urllib.error.URLError as e:
  18. print("Ocurrió un error")
  19. print(e.reason)
  20. return ""
  21.  
  22. link1 = "https://foros.derecho.com/foro/20-Derecho-Civil-General"
  23. soup = BeautifulSoup(getHTML(link1), 'html.parser')
  24. ol = soup.find(id="threads")
  25. li_list = ol.find_all("li", {"class": "threadbit"})
  26.  
  27. titles = []
  28. links = []
  29. authors = []
  30. pubdates = []
  31. responses = []
  32. views = []
  33. for li in li_list:
  34. titulo = ""
  35. linkd = ""
  36. fecha = ""
  37. autor = ""
  38. visitas = ""
  39. respuestas = ""
  40. h3 = li.find("h3", {"class": "threadtitle"})
  41. a = h3.find('a')
  42. titulo = a.string
  43.  
  44.  
  45. h3 = li.find("h3", {"class": "threadtitle"})
  46. a = h3.find('a')
  47. linkd = "https://foros.derecho.com/"+a['href']
  48.  
  49.  
  50.  
  51. div_threadmeat = li.find("div", {"class": "threadmeta"})
  52. label = div_threadmeat.find("span", {"class": "label"})
  53. a = label.find('a')
  54. meta = a['title']
  55. authorYdate = re.match("Iniciado por (.+), el (.+)", meta)
  56. autor = authorYdate.group(1)
  57. fecha = authorYdate.group(2)
  58.  
  59.  
  60. ul_stats = li.find("ul", {"class": "threadstats td alt"})
  61. i = 0
  62. for li2 in ul_stats.find_all('li'):
  63. if(i == 0):
  64. respuestas = li2.find('a').string
  65. i += 1
  66. elif(i == 1):
  67. visitas = re.match("Visitas: (.+)", li2.string).group(1)
  68. i += 1
  69. else:
  70. i = 0
  71. post = Post(titulo, linkd, fecha, autor, respuestas, visitas)
  72. print(post)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement