Advertisement
Guest User

Untitled

a guest
Dec 15th, 2017
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.16 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: Blacklistede
  4. """
  5. import requests
  6. import random
  7. import time
  8. import argparse
  9.  
  10.  
  11. class HashTagResearch():
  12.  
  13. def __init__(self):
  14. self.s = requests.Session()
  15. # self.s.verify = False
  16. self.browser_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
  17. 'Host': 'www.instagram.com',
  18. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  19. 'Upgrade-Insecure-Requests': '1'}
  20.  
  21. self.request_headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
  22. 'X-Instagram-AJAX' : '1',
  23. 'X-CSRFToken' : 'csrftoken',
  24. 'Origin' : 'https://www.instagram.com',
  25. 'Referer' : 'https://www.instagram.com'}
  26.  
  27. self.explore_headers = {'Host' : 'www.instagram.com',
  28. 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
  29. 'Accept' : '*/*',
  30. 'Accept-Language' : 'en-US;q=0.7,en;q=0.3',
  31. 'Accept-Encoding' : 'gzip, deflate, br',
  32. 'X-Requested-With' : 'XMLHttpRequest',
  33. 'Referer' : 'https://www.instagram.com/',
  34. 'Connection' : 'keep-alive'}
  35.  
  36. self.s.headers = self.browser_headers
  37. self.base_url = 'https://www.instagram.com/'
  38. self.query_url = self.base_url + 'query/'
  39. self.login_url = self.base_url + 'accounts/login/ajax/'
  40. self.search_url = self.base_url + 'web/search/topsearch/'
  41. self.explore_url = self.base_url + 'explore/tags/'
  42.  
  43. def post_request(self, url, data, **kwargs):
  44. """Make a POST request"""
  45. request = self.s.post(url, data = data, **kwargs)
  46. return self.analyze_request(request)
  47.  
  48. def get_request(self, url, params = None, **kwargs):
  49. """Make a GET request"""
  50. request = self.s.get(url, params = params, **kwargs)
  51. return self.analyze_request(request)
  52.  
  53. def analyze_request(self, request):
  54. """Check if the request was successful"""
  55. if request.status_code == 200:
  56. return request
  57. else:
  58. raise requests.HTTPError(str(request.status_code))
  59.  
  60. def setup_session(self):
  61. """Go to instagram.com to fetch crsf token and session cookies"""
  62. self.get_request(self.base_url)
  63. self.request_headers['X-CSRFToken'] = self.s.cookies.get_dict()['csrftoken']
  64.  
  65. def login(self, username, password):
  66. """Login to Instagram account. Required to explore hashtags"""
  67. payload = {'username': username,
  68. 'password': password}
  69. response = self.post_request(self.login_url, data = payload, headers = self.request_headers).json()
  70. if response['status'] == 'ok' and response['authenticated']:
  71. return True
  72. else:
  73. print(response)
  74. raise Exception('Failed to login')
  75.  
  76. def explore_hashtags(self, hashtag, min_posts = None, max_posts = None):
  77. """Get recommended hashtags based on a hashtag. Takes a string, returns a list"""
  78. if hashtag[0] != '#':
  79. hashtag = '#' + hashtag
  80. params = {'context' : 'blended',
  81. 'query' : hashtag,
  82. 'rank_token' : random.uniform(0, 1)}
  83. response = self.get_request(self.search_url, params = params, headers = self.explore_headers)
  84. tag_list = response.json()['hashtags']
  85. tags = []
  86. for tag in tag_list:
  87. if min_posts:
  88. if tag['hashtag']['media_count'] < min_posts:
  89. continue
  90. if max_posts:
  91. if tag['hashtag']['media_count'] > max_posts:
  92. continue
  93. tags.append(tag['hashtag']['name'])
  94. return tags
  95.  
  96. def trim_hashtags(self, hashtags, amount):
  97. """Returns a list with the first x items"""
  98. return hashtags[:amount]
  99.  
  100. def get_hashtag_info(self, hashtag):
  101. """Get the posts of a hashtag and the like/comment range of the popular pictures"""
  102. url = self.explore_url + str(hashtag) + '/'
  103. params = {'__a': 1}
  104. response = self.get_request(url, params = params).json()
  105. post_amount = response['tag']['media']['count']
  106. top_posts = response['tag']['top_posts']['nodes']
  107. """Get min/max comments/likes"""
  108. min_likes, min_comments = 9999999999999, 9999999999999 #very big number, just temporary
  109. max_likes, max_comments = 0, 0
  110. for post in top_posts:
  111. likes = post['likes']['count']
  112. comments = post['comments']['count']
  113. if likes < min_likes:
  114. min_likes = likes
  115. if likes > max_likes:
  116. max_likes = likes
  117. if comments < min_comments:
  118. min_comments = comments
  119. if comments > max_comments:
  120. max_comments = comments
  121. hashtag_info = {'name' : hashtag,
  122. 'post_amount' : post_amount,
  123. 'min_likes' : min_likes,
  124. 'max_likes' : max_likes,
  125. 'min_comments' : min_comments,
  126. 'max_comments' : max_comments}
  127. return hashtag_info
  128.  
  129.  
  130. def main(username, password, hashtags, max_hashtags = None, min_posts = None, max_posts = None, suggestions = True, file = False):
  131. htr = HashTagResearch()
  132. """Setup"""
  133. htr.setup_session()
  134. htr.login(username, password)
  135. if file:
  136. """ Read file into list """
  137. with open(file) as f:
  138. hashtags = f.read().splitlines()
  139. """Get suggested hashtags"""
  140. hashtag_list = []
  141. if suggestions:
  142. for tag in hashtags:
  143. print('Getting suggested hashtags for ' + str(str(tag).encode()))
  144. try:
  145. new_tags = htr.explore_hashtags(tag, min_posts = min_posts, max_posts = max_posts)
  146. """Cut list if max_hashtags are set"""
  147. if max_hashtags:
  148. new_tags = htr.trim_hashtags(new_tags, max_hashtags)
  149. """Add tags to hashtag list to be analyzed later"""
  150. hashtag_list.extend(new_tags)
  151. except Exception as ex:
  152. print(ex)
  153. print('Error fetching recommended hashtags for ' + str(tag))
  154. time.sleep(1.5)
  155. else:
  156. hashtag_list = hashtags
  157. """Remove duplicates"""
  158. hashtag_list = list(set(hashtag_list))
  159. """Analyze hashtags"""
  160. hashtag_infos = []
  161. for tag in hashtag_list:
  162. print('Getting infos for ' + str(str(tag).encode()))
  163. try:
  164. tag_info = htr.get_hashtag_info(tag)
  165. hashtag_infos.append(tag_info)
  166. except Exception as ex:
  167. print(ex)
  168. print('Failed to get informations for ' + str(str(tag).encode()))
  169. error = 'ERROR'
  170. tag_info = {'name' : str(str(tag).encode()),
  171. 'post_amount' : error,
  172. 'min_likes' : error,
  173. 'max_likes' : error,
  174. 'min_comments' : error,
  175. 'max_comments' : error}
  176. hashtag_infos.append(tag_info)
  177. """Security delay"""
  178. time.sleep(1.5)
  179. """Write to file"""
  180. with open('hashtaginfo.csv', 'w+', encoding = 'utf-8') as file:
  181. """Legend"""
  182. print('Writing to file...')
  183. file.write('HASHTAG,POST AMOUNT,MIN LIKES, MAX LIKES, MIN COMMENTS, MAX COMMENTS\n')
  184. for info in hashtag_infos:
  185. file.write(str(info['name']) + ',' +
  186. str(info['post_amount']) + ',' +
  187. str(info['min_likes']) + ',' +
  188. str(info['max_likes']) + ',' +
  189. str(info['min_comments']) + ',' +
  190. str(info['max_comments']) + '\n')
  191. print('Scraped ' + str(len(hashtag_infos)) + ' tags!')
  192. print('DONE!')
  193.  
  194.  
  195. if __name__ == '__main__':
  196. parser = argparse.ArgumentParser()
  197. parser.add_argument('username', help = 'Your IG username')
  198. parser.add_argument('password', help = 'Your IG password')
  199. parser.add_argument('hashtags', help = 'The hashtags you want to analyze. Seperate by comma. Example: car,boat,plane')
  200. parser.add_argument('--max_tags', help = 'The maximum of suggested tags to fetch per hashtag', type = int)
  201. parser.add_argument('--min_posts', help = 'Check only tags with minimum posts', type = int)
  202. parser.add_argument('--max_posts', help = 'Check only tags with maximum posts', type = int)
  203. parser.add_argument('--nosuggestions', help = 'Don\'t get suggestions for the inputted hashtags', dest = 'suggestions', action = 'store_false')
  204. parser.add_argument('--file', help = 'You need this flag if you are using a .txt file to import hashtags.', dest = 'file', action = 'store_true')
  205. parser.set_defaults(suggestions = True, file = False)
  206. args = parser.parse_args()
  207. if not args.file:
  208. hashtags = list(filter(None, args.hashtags.split(','))) #Create list and remove empty elements
  209. else:
  210. hashtags = []
  211. if args.file:
  212. file = args.hashtags
  213. else:
  214. file = False
  215. main(args.username, args.password, hashtags, args.max_tags, args.min_posts, args.max_posts, args.suggestions, file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement