Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.82 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import requests
  4. import json
  5. import sqlite3
  6. from time import gmtime, strftime, sleep
  7.  
  8. def request(page):
  9.     url = 'https://pikabu.ru'
  10.     params = {
  11.         'twitmode': 1,
  12.         'of': 'v2',
  13.         'page': page
  14.     }
  15.     headers = {
  16.         'X-CSRF-Token': 'Krasavchik',
  17.         'Accept': 'application/json',
  18.         'User-Agent': 'Safari/642'
  19.     }
  20.     cookies = {
  21.         'PHPSESS': 'Krasavchik'
  22.     }
  23.     response = requests.get(url, headers=headers, params=params, cookies=cookies)
  24.     return response.text
  25.  
  26. def parse(text):
  27.     parsed = json.loads(text)
  28.     stories = parsed['data']['stories']
  29.     summary = {}
  30.     for story in stories:
  31.         story_id = int(story['id'])
  32.         raw_story_rating = list(filter(lambda x: 'data-rating' in x, story['html'].split()))[0]
  33.         story_rating = int(''.join(list(filter(lambda c: '0' <= c and c <= '9', raw_story_rating))))
  34.         summary[story_id] = story_rating
  35.     return summary
  36.  
  37. def persist(summary):
  38.     dbname = 'pikabu.db'
  39.     conn = sqlite3.connect(dbname)
  40.     c = conn.cursor()
  41.     c.execute('create table if not exists rating_stat(fetch_time text, story_id integer, story_rating integer)')
  42.  
  43.     timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
  44.     for story_id, story_rating in summary.items():
  45.         query = "insert into rating_stat values ('{}', {}, {})".format(timestamp, story_id, story_rating)
  46.         c.execute(query)
  47.     conn.commit()
  48.     conn.close()
  49.  
  50. def fetch(max_page):
  51.     summary = {}
  52.     for page in range(1, max_page + 1):
  53.         curr = parse(request(page))
  54.         summary.update(curr)
  55.  
  56.         print('iteration:', page)
  57.         print(curr)
  58.         print()
  59.  
  60.     print('summary:', summary)
  61.     persist(summary)
  62.  
  63. if __name__ == '__main__':
  64.     # change amount of pages here
  65.     fetch(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement