Guest User

like util py cleve

a guest
Feb 13th, 2018
144
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.93 KB | None | 0 0
  1. import re
  2. import random
  3.  
  4. """Module that handles the like features"""
  5. from math import ceil
  6. from re import findall
  7. from selenium.webdriver.common.keys import Keys
  8.  
  9. from .time_util import sleep
  10. from .util import update_activity
  11. from .util import add_user_to_blacklist
  12. from .util import click_element
  13.  
  14.  
  15. def get_links_from_feed(browser, amount, num_of_search, logger):
  16. """Fetches random number of links from feed and returns a list of links"""
  17.  
  18. browser.get('https://www.instagram.com')
  19. # update server calls
  20. update_activity()
  21. sleep(2)
  22.  
  23. for i in range(num_of_search + 1):
  24. browser.execute_script(
  25. "window.scrollTo(0, document.body.scrollHeight);")
  26. sleep(2)
  27.  
  28. # get links
  29. link_elems = browser.find_elements_by_xpath(
  30. "//article/div[2]/div[2]/a")
  31.  
  32. total_links = len(link_elems)
  33. logger.info("Total of links feched for analysis: {}".format(total_links))
  34. links = []
  35. try:
  36. if link_elems:
  37. links = [link_elem.get_attribute('href') for link_elem in link_elems]
  38. logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  39. for i, link in enumerate(links):
  40. print(i, link)
  41. logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  42.  
  43. except BaseException as e:
  44. logger.error("link_elems error {}".format(str(e)))
  45.  
  46. return links
  47.  
  48.  
  49. def get_links_for_location(browser,
  50. location,
  51. amount,
  52. logger,
  53. media=None,
  54. skip_top_posts=True):
  55.  
  56. """Fetches the number of links specified
  57. by amount and returns a list of links"""
  58. if media is None:
  59. # All known media types
  60. media = ['', 'Post', 'Video']
  61. elif media == 'Photo':
  62. # Include posts with multiple images in it
  63. media = ['', 'Post']
  64. else:
  65. # Make it an array to use it in the following part
  66. media = [media]
  67.  
  68. browser.get('https://www.instagram.com/explore/locations/' + location)
  69. # update server calls
  70. update_activity()
  71. sleep(2)
  72.  
  73. # clicking load more
  74. body_elem = browser.find_element_by_tag_name('body')
  75. sleep(2)
  76.  
  77. abort = True
  78. try:
  79. load_button = body_elem.find_element_by_xpath(
  80. '//a[contains(@class, "_1cr2e _epyes")]')
  81. except:
  82. try:
  83. # scroll down to load posts
  84. for i in range(int(ceil(amount/12))):
  85. browser.execute_script(
  86. "window.scrollTo(0, document.body.scrollHeight);")
  87. sleep(2)
  88. except:
  89. logger.warning(
  90. 'Load button not found, working with current images!')
  91. else:
  92. abort = False
  93. body_elem.send_keys(Keys.END)
  94. sleep(2)
  95. # update server calls
  96. update_activity()
  97. else:
  98. abort = False
  99. body_elem.send_keys(Keys.END)
  100. sleep(2)
  101. click_element(browser, load_button) # load_button.click()
  102. # update server calls
  103. update_activity()
  104.  
  105. body_elem.send_keys(Keys.HOME)
  106. sleep(1)
  107.  
  108. # Get links
  109. if skip_top_posts:
  110. main_elem = browser.find_element_by_xpath('//main/article/div[2]')
  111. else:
  112. main_elem = browser.find_element_by_tag_name('main')
  113.  
  114. link_elems = main_elem.find_elements_by_tag_name('a')
  115. total_links = len(link_elems)
  116. links = [link_elem.get_attribute('href') for link_elem in link_elems
  117. if link_elem.text in media]
  118. filtered_links = len(links)
  119.  
  120. while (filtered_links < amount) and not abort:
  121. amount_left = amount - filtered_links
  122. # Average items of the right media per page loaded
  123. new_per_page = ceil(12 * filtered_links / total_links)
  124. if new_per_page == 0:
  125. # Avoid division by zero
  126. new_per_page = 1. / 12.
  127. # Number of page load needed
  128. new_needed = int(ceil(amount_left / new_per_page))
  129.  
  130. if new_needed > 12:
  131. # Don't go bananas trying to get all of instagram!
  132. new_needed = 12
  133.  
  134. for i in range(new_needed): # add images x * 12
  135. # Keep the latest window active while loading more posts
  136. before_load = total_links
  137. body_elem.send_keys(Keys.END)
  138. # update server calls
  139. update_activity()
  140. sleep(1)
  141. body_elem.send_keys(Keys.HOME)
  142. sleep(1)
  143. link_elems = main_elem.find_elements_by_tag_name('a')
  144. total_links = len(link_elems)
  145. abort = (before_load == total_links)
  146. if abort:
  147. break
  148.  
  149. links = [link_elem.get_attribute('href') for link_elem in link_elems
  150. if link_elem.text in media]
  151. filtered_links = len(links)
  152.  
  153. return links[:amount]
  154.  
  155.  
  156. def get_links_for_tag(browser,
  157. tag,
  158. amount,
  159. logger,
  160. media=None,
  161. skip_top_posts=True):
  162. """Fetches the number of links specified
  163. by amount and returns a list of links"""
  164. if media is None:
  165. # All known media types
  166. media = ['', 'Post', 'Video']
  167. elif media == 'Photo':
  168. # Include posts with multiple images in it
  169. media = ['', 'Post']
  170. else:
  171. # Make it an array to use it in the following part
  172. media = [media]
  173.  
  174. browser.get('https://www.instagram.com/explore/tags/'
  175. + (tag[1:] if tag[:1] == '#' else tag))
  176. # update server calls
  177. update_activity()
  178. sleep(2)
  179.  
  180. # clicking load more
  181. body_elem = browser.find_element_by_tag_name('body')
  182. sleep(2)
  183.  
  184. abort = True
  185.  
  186. # Get links
  187. if skip_top_posts:
  188. main_elem = browser.find_element_by_xpath('//main/article/div[2]')
  189. else:
  190. main_elem = browser.find_element_by_tag_name('main')
  191. total_links = 0
  192. links = []
  193. filtered_links = 0
  194. try_again = 0
  195. default_load = 21 if not skip_top_posts else 12
  196.  
  197. while filtered_links < amount:
  198. if amount >= default_load:
  199. if filtered_links >= default_load:
  200. for i in range(3):
  201. browser.execute_script(
  202. "window.scrollTo(0, document.body.scrollHeight);")
  203. update_activity()
  204. sleep(1.5)
  205. link_elems = main_elem.find_elements_by_tag_name('a')
  206. if not link_elems:
  207. main_elem2 = browser.find_element_by_xpath('//main/article/div[1]')
  208. link_elems = main_elem2.find_elements_by_tag_name('a')
  209. total_links += len(link_elems)
  210.  
  211. try:
  212. if link_elems:
  213. new_links = [link_elem.get_attribute('href') for link_elem in link_elems
  214. if link_elem and link_elem.text in media]
  215. for new_link in new_links:
  216. links.append(new_link)
  217.  
  218. links_all = links
  219. s = set()
  220. links = []
  221. for i in links_all:
  222. if i not in s:
  223. s.add(i)
  224. links.append(i)
  225.  
  226. if len(links) == filtered_links:
  227. try_again += 1
  228. if try_again > 1 :
  229. logger.info("This tag has less pictures than intended..")
  230. break
  231. else:
  232. filtered_links = len(links)
  233. try_again = 0
  234. if filtered_links < default_load and amount > filtered_links:
  235. logger.info("This tag has so less pictures than expected...")
  236. break
  237. else:
  238. logger.warning("This tag does not contain a picture")
  239. break
  240.  
  241. except BaseException as e:
  242. logger.error("link_elems error {}".format(str(e)))
  243. break
  244.  
  245.  
  246. while (filtered_links < amount) and not abort:
  247. amount_left = amount - filtered_links
  248. # Average items of the right media per page loaded
  249. new_per_page = ceil(12 * filtered_links / total_links)
  250. if new_per_page == 0:
  251. # Avoid division by zero
  252. new_per_page = 1. / 12.
  253. # Number of page load needed
  254. new_needed = int(ceil(amount_left / new_per_page))
  255.  
  256. if new_needed > 12:
  257. # Don't go bananas trying to get all of instagram!
  258. new_needed = 12
  259.  
  260. for i in range(new_needed): # add images x * 12
  261. # Keep the latest window active while loading more posts
  262. before_load = total_links
  263. body_elem.send_keys(Keys.END)
  264. # update server calls
  265. update_activity()
  266. sleep(1)
  267. body_elem.send_keys(Keys.HOME)
  268. sleep(1)
  269. link_elems = main_elem.find_elements_by_tag_name('a')
  270. total_links = len(link_elems)
  271. abort = (before_load == total_links)
  272. if abort:
  273. break
  274.  
  275. links = [link_elem.get_attribute('href') for link_elem in link_elems
  276. if link_elem.text in media]
  277. filtered_links = len(links)
  278.  
  279. return links[:amount]
  280.  
  281. def get_links_for_username(browser,
  282. username,
  283. amount,
  284. logger,
  285. randomize=False,
  286. media=None):
  287.  
  288. """Fetches the number of links specified
  289. by amount and returns a list of links"""
  290. if media is None:
  291. # All known media types
  292. media = ['', 'Post', 'Video']
  293. elif media == 'Photo':
  294. # Include posts with multiple images in it
  295. media = ['', 'Post']
  296. else:
  297. # Make it an array to use it in the following part
  298. media = [media]
  299.  
  300. logger.info('Getting {} image list...'.format(username))
  301.  
  302. # Get user profile page
  303. browser.get('https://www.instagram.com/' + username)
  304. # update server calls
  305. update_activity()
  306.  
  307. body_elem = browser.find_element_by_tag_name('body')
  308.  
  309. try:
  310. is_private = body_elem.find_element_by_xpath(
  311. '//h2[@class="_kcrwx"]')
  312. except:
  313. logger.info('Interaction begin...')
  314. else:
  315. if is_private:
  316. logger.warning('This user is private...')
  317. return False
  318.  
  319. abort = True
  320.  
  321. try:
  322. load_button = body_elem.find_element_by_xpath(
  323. '//a[contains(@class, "_1cr2e _epyes")]')
  324. except:
  325. try:
  326. # scroll down to load posts
  327. for i in range(int(ceil(amount/12))):
  328. browser.execute_script(
  329. "window.scrollTo(0, document.body.scrollHeight);")
  330. sleep(2)
  331. except:
  332. logger.warning(
  333. 'Load button not found, working with current images!')
  334. else:
  335. abort = False
  336. body_elem.send_keys(Keys.END)
  337. sleep(2)
  338. # update server calls
  339. update_activity()
  340. else:
  341. abort = False
  342. body_elem.send_keys(Keys.END)
  343. sleep(2)
  344. click_element(browser, load_button) # load_button.click()
  345. # update server calls
  346. update_activity()
  347.  
  348. body_elem.send_keys(Keys.HOME)
  349. sleep(2)
  350.  
  351. # Get Links
  352. main_elem = browser.find_element_by_tag_name('main')
  353. link_elems = main_elem.find_elements_by_tag_name('a')
  354. total_links = len(link_elems)
  355. links = []
  356. filtered_links = 0
  357. try:
  358. if link_elems:
  359. links = [link_elem.get_attribute('href') for link_elem in link_elems
  360. if link_elem and link_elem.text in media]
  361. filtered_links = len(links)
  362.  
  363. except BaseException as e:
  364. logger.error("link_elems error {}}".format(str(e)))
  365.  
  366. if randomize:
  367. # Expanding the pooulation for better random distribution
  368. amount = amount * 5
  369.  
  370. while (filtered_links < amount) and not abort:
  371. amount_left = amount - filtered_links
  372. # Average items of the right media per page loaded
  373. new_per_page = ceil(12 * filtered_links / total_links)
  374. if new_per_page == 0:
  375. # Avoid division by zero
  376. new_per_page = 1. / 12.
  377. # Number of page load needed
  378. new_needed = int(ceil(amount_left / new_per_page))
  379.  
  380. if new_needed > 12:
  381. # Don't go bananas trying to get all of instagram!
  382. new_needed = 12
  383.  
  384. for i in range(new_needed): # add images x * 12
  385. # Keep the latest window active while loading more posts
  386. before_load = total_links
  387. body_elem.send_keys(Keys.END)
  388. # update server calls
  389. update_activity()
  390. sleep(1)
  391. body_elem.send_keys(Keys.HOME)
  392. sleep(1)
  393. link_elems = main_elem.find_elements_by_tag_name('a')
  394. total_links = len(link_elems)
  395. abort = (before_load == total_links)
  396. if abort:
  397. break
  398.  
  399. links = [link_elem.get_attribute('href') for link_elem in link_elems
  400. if link_elem.text in media]
  401. filtered_links = len(links)
  402.  
  403. if randomize:
  404. # Shuffle the population index
  405. links = random.sample(links, filtered_links)
  406.  
  407. return links[:amount]
  408.  
  409.  
  410. def check_link(browser,
  411. link,
  412. dont_like,
  413. ignore_if_contains,
  414. ignore_users,
  415. username,
  416. like_by_followers_upper_limit,
  417. like_by_followers_lower_limit,
  418. logger):
  419.  
  420. browser.get(link)
  421. # update server calls
  422. update_activity()
  423. sleep(2)
  424.  
  425. """Check if the Post is Valid/Exists"""
  426. post_page = browser.execute_script(
  427. "return window._sharedData.entry_data.PostPage")
  428. if post_page is None:
  429. logger.warning('Unavailable Page: {}'.format(link.encode('utf-8')))
  430. return True, None, None, 'Unavailable Page'
  431.  
  432. """Gets the description of the link and checks for the dont_like tags"""
  433. graphql = 'graphql' in post_page[0]
  434. if graphql:
  435. media = post_page[0]['graphql']['shortcode_media']
  436. is_video = media['is_video']
  437. user_name = media['owner']['username']
  438. image_text = media['edge_media_to_caption']['edges']
  439. image_text = image_text[0]['node']['text'] if image_text else None
  440. owner_comments = browser.execute_script('''
  441. latest_comments = window._sharedData.entry_data.PostPage[0].graphql.shortcode_media.edge_media_to_comment.edges;
  442. if (latest_comments === undefined) latest_comments = Array();
  443. owner_comments = latest_comments
  444. .filter(item => item.node.owner.username == '{}')
  445. .map(item => item.node.text)
  446. .reduce((item, total) => item + '\\n' + total, '');
  447. return owner_comments;
  448. '''.format(user_name))
  449. else:
  450. media = post_page[0]['media']
  451. is_video = media['is_video']
  452. user_name = media['owner']['username']
  453. image_text = media['caption']
  454. owner_comments = browser.execute_script('''
  455. latest_comments = window._sharedData.entry_data.PostPage[0].media.comments.nodes;
  456. if (latest_comments === undefined) latest_comments = Array();
  457. owner_comments = latest_comments
  458. .filter(item => item.user.username == '{}')
  459. .map(item => item.text)
  460. .reduce((item, total) => item + '\\n' + total, '');
  461. return owner_comments;
  462. '''.format(user_name))
  463.  
  464. if owner_comments == '':
  465. owner_comments = None
  466.  
  467. """Append owner comments to description as it might contain further tags"""
  468. if image_text is None:
  469. image_text = owner_comments
  470. elif owner_comments:
  471. image_text = image_text + '\n' + owner_comments
  472.  
  473. """If the image still has no description gets the first comment"""
  474. if image_text is None:
  475. if graphql:
  476. image_text = media['edge_media_to_comment']['edges']
  477. image_text = image_text[0]['node']['text'] if image_text else None
  478. else:
  479. image_text = media['comments']['nodes']
  480. image_text = image_text[0]['text'] if image_text else None
  481. if image_text is None:
  482. image_text = "No description"
  483.  
  484. logger.info('Image from: {}'.format(user_name.encode('utf-8')))
  485.  
  486. """Find the number of followes the user has"""
  487. if like_by_followers_upper_limit or like_by_followers_lower_limit:
  488. userlink = 'https://www.instagram.com/' + user_name
  489. browser.get(userlink)
  490. # update server calls
  491. update_activity()
  492. sleep(1)
  493. num_followers = browser.execute_script(
  494. "return window._sharedData.entry_data."
  495. "ProfilePage[0].user.followed_by.count")
  496. browser.get(link)
  497. # update server calls
  498. update_activity()
  499. sleep(1)
  500. logger.info('Number of Followers: {}'.format(num_followers))
  501.  
  502. if like_by_followers_upper_limit and \
  503. num_followers > like_by_followers_upper_limit:
  504. return True, user_name, is_video, \
  505. 'Number of followers exceeds limit'
  506.  
  507. if like_by_followers_lower_limit and \
  508. num_followers < like_by_followers_lower_limit:
  509. return True, user_name, is_video, \
  510. 'Number of followers does not reach minimum'
  511.  
  512. logger.info('Link: {}'.format(link.encode('utf-8')))
  513. logger.info('Description: {}'.format(image_text.encode('utf-8')))
  514.  
  515. """Check if the user_name is in the ignore_users list"""
  516. if (user_name in ignore_users) or (user_name == username):
  517. return True, user_name, is_video, 'Username'
  518.  
  519. if any((word in image_text for word in ignore_if_contains)):
  520. return True, user_name, is_video, 'None'
  521.  
  522. dont_like_regex = []
  523.  
  524. for dont_likes in dont_like:
  525. if dont_likes.startswith("#"):
  526. dont_like_regex.append(dont_likes + "([^\d\w]|$)")
  527. elif dont_likes.startswith("["):
  528. dont_like_regex.append("#" + dont_likes[1:] + "[\d\w]+([^\d\w]|$)")
  529. elif dont_likes.startswith("]"):
  530. dont_like_regex.append("#[\d\w]+" + dont_likes[1:] + "([^\d\w]|$)")
  531. else:
  532. dont_like_regex.append(
  533. "#[\d\w]*" + dont_likes + "[\d\w]*([^\d\w]|$)")
  534.  
  535. for dont_likes_regex in dont_like_regex:
  536. quash = re.search(dont_likes_regex, image_text, re.IGNORECASE)
  537. if quash:
  538. quashed = (quash.group(0)).split('#')[1]
  539. iffy = (re.split(r'\W+', dont_likes_regex))[3]
  540. inapp_unit = ('Inappropriate! ~ contains \'{}\''.format(quashed) if quashed == iffy else
  541. 'Inappropriate! ~ contains \'{}\' in \'{}\''.format(iffy, quashed))
  542. return True, user_name, is_video, inapp_unit
  543.  
  544. return False, user_name, is_video, 'None'
  545.  
  546.  
  547. def like_image(browser, username, blacklist, logger):
  548. """Likes the browser opened image"""
  549. like_elem = browser.find_elements_by_xpath(
  550. "//a[@role='button']/span[text()='Like']/..")
  551. liked_elem = browser.find_elements_by_xpath(
  552. "//a[@role='button']/span[text()='Unlike']")
  553.  
  554. if len(like_elem) == 1:
  555. # sleep real quick right before clicking the element
  556. sleep(2)
  557. click_element(browser, like_elem[0])
  558.  
  559. logger.info('--> Image Liked!')
  560. update_activity('likes')
  561. if blacklist['enabled'] is True:
  562. action = 'liked'
  563. add_user_to_blacklist(
  564. browser, username, blacklist['campaign'], action, logger
  565. )
  566. sleep(2)
  567. return True
  568. elif len(liked_elem) == 1:
  569. logger.info('--> Already Liked!')
  570. return False
  571. else:
  572. logger.info('--> Invalid Like Element!')
  573. return False
  574.  
  575.  
  576. def get_tags(browser, url):
  577. """Gets all the tags of the given description in the url"""
  578. browser.get(url)
  579. # update server calls
  580. update_activity()
  581. sleep(1)
  582.  
  583. graphql = browser.execute_script(
  584. "return ('graphql' in window._sharedData.entry_data.PostPage[0])")
  585. if graphql:
  586. image_text = browser.execute_script(
  587. "return window._sharedData.entry_data.PostPage[0].graphql."
  588. "shortcode_media.edge_media_to_caption.edges[0].node.text")
  589. else:
  590. image_text = browser.execute_script(
  591. "return window._sharedData.entry_data."
  592. "PostPage[0].media.caption.text")
  593.  
  594. tags = findall(r'#\w*', image_text)
  595. return tags
Advertisement
Add Comment
Please, Sign In to add comment