Advertisement
Guest User

Untitled

a guest
May 19th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.22 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import urllib.request
  3. import re
  4.  
  5. import requests
  6.  
  7. id = 250817
  8. url = "http://fasteignir.visir.is/property/"
  9.  
  10.  
  11.  
  12. for i in range(100):
  13. try:
  14. print("ID IS:", id - i)
  15.  
  16. r = requests.get(url + str(id - i))
  17. data = r.text
  18. soup = BeautifulSoup(data)
  19.  
  20. def cleanhtml(raw_html):
  21. cleanr = re.compile('<.*?>')
  22. cleantext = re.sub(cleanr, '', raw_html)
  23. return cleantext
  24.  
  25. ## size, built year, bedrooms, bathrooms
  26. feature_div = soup.findAll("ul", {"class": "features"})
  27. list_of_li = feature_div[0].findAll("li")
  28.  
  29. ##gets the size
  30. size = str(list_of_li[0])[4:].split(" m")[0].split(",")[0]
  31. size = int(size)
  32. ##gets the year
  33. year_built = int(str(list_of_li[1])[4:].split(" ")[1][:4])
  34. ##bedrooms
  35. beds_number = int(str(list_of_li[2])[4:].split(" ")[0])
  36. #bathrooms
  37. baths_number = int(str(list_of_li[3])[4:].split(" ")[0])
  38. #bull fasteignanúmer
  39. property_registration = "15"
  40.  
  41. address_span = soup.findAll("span", {"class": "address"})
  42. address = str(address_span[-1]).split(">")[1].split("<")[0]
  43. ## city ogo zip!!
  44. zip = int(address.split(" ")[0])
  45. city = address.split(" ")[1]
  46.  
  47. #type
  48. type_span = soup.findAll("div", {"class": "col-description"})[0]
  49. type = str(type_span.findAll("h2")).split(", ")[1]
  50.  
  51. #description
  52. description = str(type_span.findAll("div", {"class": "description"})[0]).replace("\n", "")
  53. 3#description
  54. description = cleanhtml(description)
  55.  
  56. #price
  57. price_span = soup.findAll("span", {"class": "price"})[0]
  58. price = int(str(price_span).split(" kr")[0].split(">")[1].replace(".", ""))
  59.  
  60. feature_div = soup.findAll("div", {"class": "image-tiles"})[0]
  61. imgs = feature_div.findAll("img")
  62.  
  63. for i in range(len(imgs)):
  64. imgs[i] = imgs[i].get("data-src")
  65.  
  66. print(size, year_built, beds_number, baths_number, zip, city, type, price, imgs[0])
  67.  
  68.  
  69. except Exception as e:
  70. print(e)
  71. print("villa")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement