Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import csv
- from tqdm import tqdm
- from bs4 import BeautifulSoup
- BASE_URL = "https://freelansim.ru/"
- def get_html(url):
- response = requests.get(url)
- return response.text
- def get_pages(html):
- soup = BeautifulSoup(html, "lxml")
- page = soup.find("div", class_="pagination").find_all("a")[-2].get("href")
- total_page = page.split("=")[1]
- return int(total_page)
- def write_csv(dictOfData):
- with open("weblancer.csv", "w") as csv_file:
- writer = csv.writer(csv_file)
- writer.writerow(('Название', 'Цена', 'Ссылка'))
- for data in dictOfData:
- writer.writerows((data["title"],
- data["price"],
- data["url"]))
- def get_data(html):
- soup = BeautifulSoup(html, "lxml")
- uls = soup.find("ul", class_="content-list")
- header = uls.find("header", class_="task__header")
- li = header.find_all("li")
- dictOfData = []
- for tasks in li:
- title = tasks.find("div", class_="task__title").find("a").text.strip()
- price = tasks.find("div", class_="task__price").find("span", class_="count").text.strip()
- url = "https://freelansim.ru" + tasks.find("div", class_="task__title").find("a").get("href")
- dictOfData.append({"title": title,
- "price": price,
- "url": url})
- return dictOfData
- def main():
- links = []
- total_page = get_pages(get_html(BASE_URL))
- for i in tqdm(range(1, total_page + 1)):
- links.extend(get_data(get_html(BASE_URL + "tasks?page=1".format(str(i)))))
- for i in links:
- print(i)
- write_csv(links)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement