Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib.request
- import re
- import requests
- id = 250817
- url = "http://fasteignir.visir.is/property/"
- for i in range(100):
- try:
- print("ID IS:", id - i)
- r = requests.get(url + str(id - i))
- data = r.text
- soup = BeautifulSoup(data)
- def cleanhtml(raw_html):
- cleanr = re.compile('<.*?>')
- cleantext = re.sub(cleanr, '', raw_html)
- return cleantext
- ## size, built year, bedrooms, bathrooms
- feature_div = soup.findAll("ul", {"class": "features"})
- list_of_li = feature_div[0].findAll("li")
- ##gets the size
- size = str(list_of_li[0])[4:].split(" m")[0].split(",")[0]
- size = int(size)
- ##gets the year
- year_built = int(str(list_of_li[1])[4:].split(" ")[1][:4])
- ##bedrooms
- beds_number = int(str(list_of_li[2])[4:].split(" ")[0])
- #bathrooms
- baths_number = int(str(list_of_li[3])[4:].split(" ")[0])
- #bull fasteignanúmer
- property_registration = "15"
- address_span = soup.findAll("span", {"class": "address"})
- address = str(address_span[-1]).split(">")[1].split("<")[0]
- ## city ogo zip!!
- zip = int(address.split(" ")[0])
- city = address.split(" ")[1]
- #type
- type_span = soup.findAll("div", {"class": "col-description"})[0]
- type = str(type_span.findAll("h2")).split(", ")[1]
- #description
- description = str(type_span.findAll("div", {"class": "description"})[0]).replace("\n", "")
- 3#description
- description = cleanhtml(description)
- #price
- price_span = soup.findAll("span", {"class": "price"})[0]
- price = int(str(price_span).split(" kr")[0].split(">")[1].replace(".", ""))
- feature_div = soup.findAll("div", {"class": "image-tiles"})[0]
- imgs = feature_div.findAll("img")
- for i in range(len(imgs)):
- imgs[i] = imgs[i].get("data-src")
- print(size, year_built, beds_number, baths_number, zip, city, type, price, imgs[0])
- except Exception as e:
- print(e)
- print("villa")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement