SHARE
TWEET

Untitled

a guest Apr 25th, 2019 81 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2.  
  3. import requests
  4. import json
  5. import sqlite3
  6. from time import gmtime, strftime, sleep
  7.  
  8. def request(page):
  9.     url = 'https://pikabu.ru'
  10.     params = {
  11.         'twitmode': 1,
  12.         'of': 'v2',
  13.         'page': page
  14.     }
  15.     headers = {
  16.         'X-CSRF-Token': 'Krasavchik',
  17.         'Accept': 'application/json',
  18.         'User-Agent': 'Safari/642'
  19.     }
  20.     cookies = {
  21.         'PHPSESS': 'Krasavchik'
  22.     }
  23.     response = requests.get(url, headers=headers, params=params, cookies=cookies)
  24.     return response.text
  25.  
  26. def parse(text):
  27.     parsed = json.loads(text)
  28.     stories = parsed['data']['stories']
  29.     summary = {}
  30.     for story in stories:
  31.         story_id = int(story['id'])
  32.         raw_story_rating = list(filter(lambda x: 'data-rating' in x, story['html'].split()))[0]
  33.         story_rating = int(''.join(list(filter(lambda c: '0' <= c and c <= '9', raw_story_rating))))
  34.         summary[story_id] = story_rating
  35.     return summary
  36.  
  37. def persist(summary):
  38.     dbname = 'pikabu.db'
  39.     conn = sqlite3.connect(dbname)
  40.     c = conn.cursor()
  41.     c.execute('create table if not exists rating_stat(fetch_time text, story_id integer, story_rating integer)')
  42.  
  43.     timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
  44.     for story_id, story_rating in summary.items():
  45.         query = "insert into rating_stat values ('{}', {}, {})".format(timestamp, story_id, story_rating)
  46.         c.execute(query)
  47.     conn.commit()
  48.     conn.close()
  49.  
  50. def fetch(max_page):
  51.     summary = {}
  52.     for page in range(1, max_page + 1):
  53.         curr = parse(request(page))
  54.         summary.update(curr)
  55.  
  56.         print('iteration:', page)
  57.         print(curr)
  58.         print()
  59.  
  60.     print('summary:', summary)
  61.     persist(summary)
  62.  
  63. if __name__ == '__main__':
  64.     # change amount of pages here
  65.     fetch(5)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top