Advertisement
Guest User

Instagram content finder

a guest
Jun 15th, 2019
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.27 KB | None | 0 0
  1. import requests
  2. from content import iFunnyContent
  3. import urllib
  4. import json
  5. import re
  6.  
  7.  
  8. class Media():
  9.  
  10.     valid_context = ["user", "hashtag"]
  11.     after = None
  12.  
  13.     def __init__(self, query, context="user"):
  14.         self.item_id = self.findtop(query, context)
  15.  
  16.        
  17.     def findtop(self, query, context="user"):
  18.  
  19.         if context not in self.valid_context:
  20.             raise Exception("not a valid context")
  21.         self.context = context
  22.         query_url = "https://www.instagram.com/web/search/topsearch?context={}&query={}".format(context, urllib.quote_plus(query))
  23.         search_data = requests.get(query_url).json()
  24.         category = search_data[context+"s"]
  25.         if not category:
  26.             raise Exception("no results for {} in {}s".format(query, context))
  27.         if context == "user":
  28.             topitem_id = category[0][context]["pk"]
  29.         else:
  30.             topitem_id = category[0][context]["name"]
  31.         return topitem_id
  32.    
  33.  
  34.     def list_media(self, item_id, context="user", per_page=50, after=None):
  35.  
  36.         if context not in self.valid_context:
  37.             raise Exception("not a valid context")
  38.         self.context = context
  39.         self.variables = json.dumps({"id":item_id,"first":per_page,"after":after})
  40.         media_url = "https://www.instagram.com/graphql/query/?query_id=17888483320059182&variables="+self.variables
  41.         if self.context == "hashtag":
  42.             self.variables = json.dumps({"tag_name":item_id,"first":per_page,"after":after})
  43.             media_url = "https://www.instagram.com/graphql/query/?query_hash=3e7706b09c6184d5eafd8b032dbcf487&variables="+self.variables
  44.             print(media_url)
  45.         media_data = requests.get(media_url).json()
  46.         return media_data
  47.    
  48.    
  49.     def next_page(self, per_page=50):
  50.  
  51.         status = "fail"
  52.         while status == "fail":
  53.             media_data = self.list_media(self.item_id, self.context, per_page=per_page, after=self.after)
  54.             status = media_data["status"]
  55.         edge = "edge_owner_to_timeline_media"
  56.         if self.context == "hashtag":
  57.             edge = "edge_hashtag_to_media"
  58.         self.after = media_data["data"][self.context][edge]["page_info"]["end_cursor"]
  59.         return media_data["data"][self.context][edge]["edges"]
  60.  
  61.     def parse_media(self, media_list):
  62.  
  63.         for count, post in enumerate(media_list):
  64.             node = post["node"]
  65.             description = node["edge_media_to_caption"]["edges"][0]["node"]["text"]
  66.             description = description.replace("\n", " ")
  67.             description = description[:description.lower().find("via")].strip()
  68.             description = description[:description.lower().find("#")].strip()
  69.             description = " ".join([re.sub("\W", "", word) for word in description.split(" ")])
  70.             post_url = "https://www.instagram.com/p/"+node["shortcode"]
  71.             if not node["is_video"]:
  72.                 post_url = node["display_url"]
  73.             media_list[count] = (post_url, description)
  74.         return media_list
  75.  
  76.  
  77.  
  78. media = Media("meme", "hashtag")#pick either "user" or "hashtag"
  79.  
  80. while True:
  81.     media_list = media.next_page(per_page=50)
  82.     media_list = media.parse_media(media_list)
  83.  
  84.     for i in media_list:
  85.         print(i[0], i[1])
  86.  
  87.     break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement