Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from content import iFunnyContent
- import urllib
- import json
- import re
- class Media():
- valid_context = ["user", "hashtag"]
- after = None
- def __init__(self, query, context="user"):
- self.item_id = self.findtop(query, context)
- def findtop(self, query, context="user"):
- if context not in self.valid_context:
- raise Exception("not a valid context")
- self.context = context
- query_url = "https://www.instagram.com/web/search/topsearch?context={}&query={}".format(context, urllib.quote_plus(query))
- search_data = requests.get(query_url).json()
- category = search_data[context+"s"]
- if not category:
- raise Exception("no results for {} in {}s".format(query, context))
- if context == "user":
- topitem_id = category[0][context]["pk"]
- else:
- topitem_id = category[0][context]["name"]
- return topitem_id
- def list_media(self, item_id, context="user", per_page=50, after=None):
- if context not in self.valid_context:
- raise Exception("not a valid context")
- self.context = context
- self.variables = json.dumps({"id":item_id,"first":per_page,"after":after})
- media_url = "https://www.instagram.com/graphql/query/?query_id=17888483320059182&variables="+self.variables
- if self.context == "hashtag":
- self.variables = json.dumps({"tag_name":item_id,"first":per_page,"after":after})
- media_url = "https://www.instagram.com/graphql/query/?query_hash=3e7706b09c6184d5eafd8b032dbcf487&variables="+self.variables
- print(media_url)
- media_data = requests.get(media_url).json()
- return media_data
- def next_page(self, per_page=50):
- status = "fail"
- while status == "fail":
- media_data = self.list_media(self.item_id, self.context, per_page=per_page, after=self.after)
- status = media_data["status"]
- edge = "edge_owner_to_timeline_media"
- if self.context == "hashtag":
- edge = "edge_hashtag_to_media"
- self.after = media_data["data"][self.context][edge]["page_info"]["end_cursor"]
- return media_data["data"][self.context][edge]["edges"]
- def parse_media(self, media_list):
- for count, post in enumerate(media_list):
- node = post["node"]
- description = node["edge_media_to_caption"]["edges"][0]["node"]["text"]
- description = description.replace("\n", " ")
- description = description[:description.lower().find("via")].strip()
- description = description[:description.lower().find("#")].strip()
- description = " ".join([re.sub("\W", "", word) for word in description.split(" ")])
- post_url = "https://www.instagram.com/p/"+node["shortcode"]
- if not node["is_video"]:
- post_url = node["display_url"]
- media_list[count] = (post_url, description)
- return media_list
- media = Media("meme", "hashtag")#pick either "user" or "hashtag"
- while True:
- media_list = media.next_page(per_page=50)
- media_list = media.parse_media(media_list)
- for i in media_list:
- print(i[0], i[1])
- break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement