Advertisement
Guest User

Python Error

a guest
Sep 26th, 2018
144
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.76 KB | None | 0 0
  1. import requests
  2. import csv
  3. from tqdm import tqdm
  4. from bs4 import BeautifulSoup
  5.  
  6. BASE_URL = "https://freelansim.ru/"
  7.  
  8.  
  9. def get_html(url):
  10.     response = requests.get(url)
  11.     return response.text
  12.  
  13.  
  14. def get_pages(html):
  15.     soup = BeautifulSoup(html, "lxml")
  16.     page = soup.find("div", class_="pagination").find_all("a")[-2].get("href")
  17.     total_page = page.split("=")[1]
  18.     return int(total_page)
  19.  
  20.  
  21. def write_csv(dictOfData):
  22.     with open("weblancer.csv", "w") as csv_file:
  23.         writer = csv.writer(csv_file)
  24.         writer.writerow(('Название', 'Цена', 'Ссылка'))
  25.         for data in dictOfData:
  26.             writer.writerows((data["title"],
  27.                               data["price"],
  28.                               data["url"]))
  29.  
  30.  
  31. def get_data(html):
  32.     soup = BeautifulSoup(html, "lxml")
  33.     uls = soup.find("ul", class_="content-list")
  34.     header = uls.find("header", class_="task__header")
  35.     li = header.find_all("li")
  36.     dictOfData = []
  37.     for tasks in li:
  38.             title = tasks.find("div", class_="task__title").find("a").text.strip()
  39.             price = tasks.find("div", class_="task__price").find("span", class_="count").text.strip()
  40.             url = "https://freelansim.ru" + tasks.find("div", class_="task__title").find("a").get("href")
  41.             dictOfData.append({"title": title,
  42.                               "price": price,
  43.                                 "url": url})
  44.     return dictOfData
  45.  
  46.  
  47. def main():
  48.     links = []
  49.     total_page = get_pages(get_html(BASE_URL))
  50.     for i in tqdm(range(1, total_page + 1)):
  51.         links.extend(get_data(get_html(BASE_URL + "tasks?page=1".format(str(i)))))
  52.  
  53.     for i in links:
  54.         print(i)
  55.     write_csv(links)
  56.  
  57.  
  58. if __name__ == "__main__":
  59.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement