Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- import re
- from scrapy.http import HtmlResponse
- from urllib.parse import urlencode
- from instaparser.items import InstaparserItem
- from copy import deepcopy
- import json
- class InstaSpider(scrapy.Spider):
- name = 'insta'
- allowed_domains = ['instagram.com']
- start_urls = ['https://www.instagram.com/']
- reg_url = 'https://www.instagram.com/accounts/login/ajax/'
- user_name = 'desqip'
- user_pass = '#PWD_INSTAGRAM_BROWSER:10:1630833865:AV5QAJM2bG/IoognU1Wwq3LJORHcT13nEGkfSFNsuB33WFViDZhopvay4L3gXAhC/OygqauHEo9/6YfmoccEozTE5Ti5wiCPGYMgFGpQE9xzcrg+tYmcZDRfmCWgX935ZPoKuVbyh1DuuhkX5RQHmA=='
- query = '8c2a529969ee035a5063f2fc8602a0fd'
- headers = {'User-Agent': 'Instagram 155.0.0.37.107'}
- api_url = 'https://i.instagram.com/api/v1/friendships/'
- users = ['onliskill_udm', 'olga780328']
- def parse(self, response:HtmlResponse):
- csrf = self.token(response.text)
- yield scrapy.FormRequest(self.reg_url,
- method='POST',
- callback=self.login,
- formdata={'username': self.user_name, 'enc_password': self.user_pass},
- headers={'x-csrftoken': csrf})
- def login(self, response:HtmlResponse):
- j_data = response.json()
- if j_data['authenticated']:
- for user in self.users:
- yield response.follow(f"/{user}",
- callback=self.read_subscribes,
- cb_kwargs={'user': user})
- def token(self, text):
- search = re.search('\"csrf_token\":\"\w+\"', text).group()
- return search.split(':')[1].replace('"', '')
- def read_subscribes(self, response:HtmlResponse, user):
- # id = re.search('\"id\":\"\w+\"', response.text).group().split(':')[1].replace('"', '')
- id = self.fetch_user_id(response.text, user)
- yield response.follow(f'https://i.instagram.com/api/v1/friendships/{id}/followers/?count=12',
- callback=self.my_subscribe, cb_kwargs={'user': deepcopy(user), 'id_user': deepcopy(id)}, headers=self.headers)
- def my_subscribe(self, response: HtmlResponse, user, id_user):
- J_data = response.json()
- if J_data.get('next_max_id'):
- next = J_data['next_max_id']
- url_follow = f'{self.api_url}{id_user}/followers/?count=12&max_id={next}'
- yield response.follow(url_follow,
- callback=self.my_subscribe,
- cb_kwargs={'user': user,
- 'id_user': id_user},
- headers={'User-Agent': 'Instagram 155.0.0.37.107'})
- for i in J_data['users']:
- status = 'in'
- main_user = user
- main_id = id_user
- user_name = i['username']
- id = i['pk']
- photo = i['profile_pic_url']
- item = InstaparserItem(status=status, main_user=main_user, main_id=main_id, user_name=user_name, id=id, photo=photo)
- yield item
- def fetch_user_id(self, text, username):
- matched = re.search(
- '{\"id\":\"\\d+\",\"username\":\"%s\"}' % username, text
- ).group()
- return json.loads(matched).get('id')
Add Comment
Please, Sign In to add comment