SHARE
TWEET

Untitled

a guest Oct 21st, 2019 61 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import re
  2. from Post import Post
  3. import urllib.request as urllib
  4. from bs4 import BeautifulSoup
  5.  
  6.  
  7.  
  8. def getHTML(url):
  9.     try:
  10.         f = urllib.urlopen(url)
  11.         return f.read()
  12.         f.close()
  13.     except urllib.error.HTTPError as e:
  14.         print("OcurriĆ³ un error")
  15.         print(e.code)
  16.         return ""
  17.     except urllib.error.URLError as e:
  18.         print("OcurriĆ³ un error")
  19.         print(e.reason)
  20.         return ""
  21.  
  22. link1 = "https://foros.derecho.com/foro/20-Derecho-Civil-General"
  23. soup = BeautifulSoup(getHTML(link1), 'html.parser')
  24. ol = soup.find(id="threads")
  25. li_list = ol.find_all("li", {"class": "threadbit"})
  26.  
  27. titles = []
  28. links = []
  29. authors = []
  30. pubdates = []
  31. responses = []
  32. views = []
  33. for li in li_list:
  34.     titulo = ""
  35.     linkd = ""
  36.     fecha = ""
  37.     autor = ""
  38.     visitas = ""
  39.     respuestas = ""
  40.     h3 = li.find("h3", {"class": "threadtitle"})
  41.     a = h3.find('a')
  42.     titulo = a.string
  43.  
  44.  
  45.     h3 = li.find("h3", {"class": "threadtitle"})
  46.     a = h3.find('a')
  47.     linkd = "https://foros.derecho.com/"+a['href']
  48.  
  49.  
  50.  
  51.     div_threadmeat = li.find("div", {"class": "threadmeta"})
  52.     label = div_threadmeat.find("span", {"class": "label"})
  53.     a = label.find('a')
  54.     meta = a['title']
  55.     authorYdate = re.match("Iniciado por (.+), el (.+)", meta)
  56.     autor = authorYdate.group(1)
  57.     fecha = authorYdate.group(2)
  58.  
  59.  
  60.     ul_stats = li.find("ul", {"class": "threadstats td alt"})
  61.     i = 0
  62.     for li2 in ul_stats.find_all('li'):
  63.         if(i == 0):
  64.             respuestas = li2.find('a').string
  65.             i += 1
  66.         elif(i == 1):
  67.             visitas = re.match("Visitas: (.+)", li2.string).group(1)
  68.             i += 1
  69.         else:
  70.             i = 0
  71.     post = Post(titulo, linkd, fecha, autor, respuestas, visitas)
  72.     print(post)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top