Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import csv
- def getHtml(url):
- response = requests.get(url)
- return response.text
- def pageData(html):
- soup = BeautifulSoup(html, 'lxml')
- arts = soup.find('div', class_='post-list-container').find_all('article', class_ = 'post-list-container-item')
- for art in arts:
- title = art.find('div', class_='post-list-container-item-text').find('h2').text.strip()
- url = 'https://point.md/ru/' + art.find('div', class_='post-list-container-item-text').find('h2').find('a').get('href')
- time = art.find('div', class_='post-list-container-item-text').find('div', class_='post-list-container-item-text-info').find('span').find('time').text.strip()
- photo = art.find('figure').find('img').get('src')
- data = {'title' : title,
- 'url' : url,
- 'time' : time,
- 'photo' : photo}
- writeCsv(data)
- def writeCsv(data):
- with open('pointm.csv', 'a') as f:
- writer = csv.writer(f)
- writer.writerow((data['title'],
- data['time'],
- data['photo'],
- data['url']))
- def main():
- URL = 'https://point.md/ru/'
- html = getHtml(URL)
- pageData(html)
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment