Orleon

Untitled

Sep 22nd, 2021 (edited)
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.31 KB | None | 0 0
  1. import scrapy
  2. import re
  3. from scrapy.http import HtmlResponse
  4. from urllib.parse import urlencode
  5. from instaparser.items import InstaparserItem
  6. from copy import deepcopy
  7. import json
  8.  
  9.  
  10. class InstaSpider(scrapy.Spider):
  11.     name = 'insta'
  12.     allowed_domains = ['instagram.com']
  13.     start_urls = ['https://www.instagram.com/']
  14.  
  15.     reg_url = 'https://www.instagram.com/accounts/login/ajax/'
  16.  
  17.     user_name = 'desqip'
  18.     user_pass = '#PWD_INSTAGRAM_BROWSER:10:1630833865:AV5QAJM2bG/IoognU1Wwq3LJORHcT13nEGkfSFNsuB33WFViDZhopvay4L3gXAhC/OygqauHEo9/6YfmoccEozTE5Ti5wiCPGYMgFGpQE9xzcrg+tYmcZDRfmCWgX935ZPoKuVbyh1DuuhkX5RQHmA=='
  19.  
  20.     query = '8c2a529969ee035a5063f2fc8602a0fd'
  21.     headers = {'User-Agent': 'Instagram 155.0.0.37.107'}
  22.     api_url = 'https://i.instagram.com/api/v1/friendships/'
  23.     users = ['onliskill_udm', 'olga780328']
  24.  
  25.     def parse(self, response:HtmlResponse):
  26.         csrf = self.token(response.text)
  27.         yield scrapy.FormRequest(self.reg_url,
  28.                            method='POST',
  29.                            callback=self.login,
  30.                            formdata={'username': self.user_name, 'enc_password': self.user_pass},
  31.                            headers={'x-csrftoken': csrf})
  32.  
  33.     def login(self, response:HtmlResponse):
  34.         j_data = response.json()
  35.         if j_data['authenticated']:
  36.             for user in self.users:
  37.                 yield response.follow(f"/{user}",
  38.                                       callback=self.read_subscribes,
  39.                                       cb_kwargs={'user': user})
  40.  
  41.  
  42.     def token(self, text):
  43.         search = re.search('\"csrf_token\":\"\w+\"', text).group()
  44.         return search.split(':')[1].replace('"', '')
  45.  
  46.  
  47.  
  48.     def read_subscribes(self, response:HtmlResponse, user):
  49.         # id = re.search('\"id\":\"\w+\"', response.text).group().split(':')[1].replace('"', '')
  50.         id = self.fetch_user_id(response.text, user)
  51.  
  52.         yield response.follow(f'https://i.instagram.com/api/v1/friendships/{id}/followers/?count=12',
  53.                               callback=self.my_subscribe, cb_kwargs={'user': deepcopy(user), 'id_user': deepcopy(id)}, headers=self.headers)
  54.  
  55.     def my_subscribe(self, response: HtmlResponse, user, id_user):
  56.         J_data = response.json()
  57.         if J_data.get('next_max_id'):
  58.             next = J_data['next_max_id']
  59.             url_follow = f'{self.api_url}{id_user}/followers/?count=12&max_id={next}'
  60.             yield response.follow(url_follow,
  61.                                   callback=self.my_subscribe,
  62.                                   cb_kwargs={'user': user,
  63.                                              'id_user': id_user},
  64.                                   headers={'User-Agent': 'Instagram 155.0.0.37.107'})
  65.         for i in J_data['users']:
  66.             status = 'in'
  67.             main_user = user
  68.             main_id = id_user
  69.             user_name = i['username']
  70.             id = i['pk']
  71.             photo = i['profile_pic_url']
  72.             item = InstaparserItem(status=status, main_user=main_user, main_id=main_id, user_name=user_name, id=id, photo=photo)
  73.             yield item
  74.  
  75.     def fetch_user_id(self, text, username):
  76.         matched = re.search(
  77.             '{\"id\":\"\\d+\",\"username\":\"%s\"}' % username, text
  78.         ).group()
  79.         return json.loads(matched).get('id')
Add Comment
Please, Sign In to add comment