Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import requests
- import json
- import sqlite3
- from time import gmtime, strftime, sleep
- def request(page):
- url = 'https://pikabu.ru'
- params = {
- 'twitmode': 1,
- 'of': 'v2',
- 'page': page
- }
- headers = {
- 'X-CSRF-Token': 'Krasavchik',
- 'Accept': 'application/json',
- 'User-Agent': 'Safari/642'
- }
- cookies = {
- 'PHPSESS': 'Krasavchik'
- }
- response = requests.get(url, headers=headers, params=params, cookies=cookies)
- return response.text
- def parse(text):
- parsed = json.loads(text)
- stories = parsed['data']['stories']
- summary = {}
- for story in stories:
- story_id = int(story['id'])
- raw_story_rating = list(filter(lambda x: 'data-rating' in x, story['html'].split()))[0]
- story_rating = int(''.join(list(filter(lambda c: '0' <= c and c <= '9', raw_story_rating))))
- summary[story_id] = story_rating
- return summary
- def persist(summary):
- dbname = 'pikabu.db'
- conn = sqlite3.connect(dbname)
- c = conn.cursor()
- c.execute('create table if not exists rating_stat(fetch_time text, story_id integer, story_rating integer)')
- timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
- for story_id, story_rating in summary.items():
- query = "insert into rating_stat values ('{}', {}, {})".format(timestamp, story_id, story_rating)
- c.execute(query)
- conn.commit()
- conn.close()
- def fetch(max_page):
- summary = {}
- for page in range(1, max_page + 1):
- curr = parse(request(page))
- summary.update(curr)
- print('iteration:', page)
- print(curr)
- print()
- print('summary:', summary)
- persist(summary)
- if __name__ == '__main__':
- # change amount of pages here
- fetch(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement