Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import random
- """Module that handles the like features"""
- from math import ceil
- from re import findall
- from selenium.webdriver.common.keys import Keys
- from .time_util import sleep
- from .util import update_activity
- from .util import add_user_to_blacklist
- from .util import click_element
- def get_links_from_feed(browser, amount, num_of_search, logger):
- """Fetches random number of links from feed and returns a list of links"""
- browser.get('https://www.instagram.com')
- # update server calls
- update_activity()
- sleep(2)
- for i in range(num_of_search + 1):
- browser.execute_script(
- "window.scrollTo(0, document.body.scrollHeight);")
- sleep(2)
- # get links
- link_elems = browser.find_elements_by_xpath(
- "//article/div[2]/div[2]/a")
- total_links = len(link_elems)
- logger.info("Total of links feched for analysis: {}".format(total_links))
- links = []
- try:
- if link_elems:
- links = [link_elem.get_attribute('href') for link_elem in link_elems]
- logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- for i, link in enumerate(links):
- print(i, link)
- logger.info("~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- except BaseException as e:
- logger.error("link_elems error {}".format(str(e)))
- return links
- def get_links_for_location(browser,
- location,
- amount,
- logger,
- media=None,
- skip_top_posts=True):
- """Fetches the number of links specified
- by amount and returns a list of links"""
- if media is None:
- # All known media types
- media = ['', 'Post', 'Video']
- elif media == 'Photo':
- # Include posts with multiple images in it
- media = ['', 'Post']
- else:
- # Make it an array to use it in the following part
- media = [media]
- browser.get('https://www.instagram.com/explore/locations/' + location)
- # update server calls
- update_activity()
- sleep(2)
- # clicking load more
- body_elem = browser.find_element_by_tag_name('body')
- sleep(2)
- abort = True
- try:
- load_button = body_elem.find_element_by_xpath(
- '//a[contains(@class, "_1cr2e _epyes")]')
- except:
- try:
- # scroll down to load posts
- for i in range(int(ceil(amount/12))):
- browser.execute_script(
- "window.scrollTo(0, document.body.scrollHeight);")
- sleep(2)
- except:
- logger.warning(
- 'Load button not found, working with current images!')
- else:
- abort = False
- body_elem.send_keys(Keys.END)
- sleep(2)
- # update server calls
- update_activity()
- else:
- abort = False
- body_elem.send_keys(Keys.END)
- sleep(2)
- click_element(browser, load_button) # load_button.click()
- # update server calls
- update_activity()
- body_elem.send_keys(Keys.HOME)
- sleep(1)
- # Get links
- if skip_top_posts:
- main_elem = browser.find_element_by_xpath('//main/article/div[2]')
- else:
- main_elem = browser.find_element_by_tag_name('main')
- link_elems = main_elem.find_elements_by_tag_name('a')
- total_links = len(link_elems)
- links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem.text in media]
- filtered_links = len(links)
- while (filtered_links < amount) and not abort:
- amount_left = amount - filtered_links
- # Average items of the right media per page loaded
- new_per_page = ceil(12 * filtered_links / total_links)
- if new_per_page == 0:
- # Avoid division by zero
- new_per_page = 1. / 12.
- # Number of page load needed
- new_needed = int(ceil(amount_left / new_per_page))
- if new_needed > 12:
- # Don't go bananas trying to get all of instagram!
- new_needed = 12
- for i in range(new_needed): # add images x * 12
- # Keep the latest window active while loading more posts
- before_load = total_links
- body_elem.send_keys(Keys.END)
- # update server calls
- update_activity()
- sleep(1)
- body_elem.send_keys(Keys.HOME)
- sleep(1)
- link_elems = main_elem.find_elements_by_tag_name('a')
- total_links = len(link_elems)
- abort = (before_load == total_links)
- if abort:
- break
- links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem.text in media]
- filtered_links = len(links)
- return links[:amount]
- def get_links_for_tag(browser,
- tag,
- amount,
- logger,
- media=None,
- skip_top_posts=True):
- """Fetches the number of links specified
- by amount and returns a list of links"""
- if media is None:
- # All known media types
- media = ['', 'Post', 'Video']
- elif media == 'Photo':
- # Include posts with multiple images in it
- media = ['', 'Post']
- else:
- # Make it an array to use it in the following part
- media = [media]
- browser.get('https://www.instagram.com/explore/tags/'
- + (tag[1:] if tag[:1] == '#' else tag))
- # update server calls
- update_activity()
- sleep(2)
- # clicking load more
- body_elem = browser.find_element_by_tag_name('body')
- sleep(2)
- abort = True
- # Get links
- if skip_top_posts:
- main_elem = browser.find_element_by_xpath('//main/article/div[2]')
- else:
- main_elem = browser.find_element_by_tag_name('main')
- total_links = 0
- links = []
- filtered_links = 0
- try_again = 0
- default_load = 21 if not skip_top_posts else 12
- while filtered_links < amount:
- if amount >= default_load:
- if filtered_links >= default_load:
- for i in range(3):
- browser.execute_script(
- "window.scrollTo(0, document.body.scrollHeight);")
- update_activity()
- sleep(1.5)
- link_elems = main_elem.find_elements_by_tag_name('a')
- if not link_elems:
- main_elem2 = browser.find_element_by_xpath('//main/article/div[1]')
- link_elems = main_elem2.find_elements_by_tag_name('a')
- total_links += len(link_elems)
- try:
- if link_elems:
- new_links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem and link_elem.text in media]
- for new_link in new_links:
- links.append(new_link)
- links_all = links
- s = set()
- links = []
- for i in links_all:
- if i not in s:
- s.add(i)
- links.append(i)
- if len(links) == filtered_links:
- try_again += 1
- if try_again > 1 :
- logger.info("This tag has less pictures than intended..")
- break
- else:
- filtered_links = len(links)
- try_again = 0
- if filtered_links < default_load and amount > filtered_links:
- logger.info("This tag has so less pictures than expected...")
- break
- else:
- logger.warning("This tag does not contain a picture")
- break
- except BaseException as e:
- logger.error("link_elems error {}".format(str(e)))
- break
- while (filtered_links < amount) and not abort:
- amount_left = amount - filtered_links
- # Average items of the right media per page loaded
- new_per_page = ceil(12 * filtered_links / total_links)
- if new_per_page == 0:
- # Avoid division by zero
- new_per_page = 1. / 12.
- # Number of page load needed
- new_needed = int(ceil(amount_left / new_per_page))
- if new_needed > 12:
- # Don't go bananas trying to get all of instagram!
- new_needed = 12
- for i in range(new_needed): # add images x * 12
- # Keep the latest window active while loading more posts
- before_load = total_links
- body_elem.send_keys(Keys.END)
- # update server calls
- update_activity()
- sleep(1)
- body_elem.send_keys(Keys.HOME)
- sleep(1)
- link_elems = main_elem.find_elements_by_tag_name('a')
- total_links = len(link_elems)
- abort = (before_load == total_links)
- if abort:
- break
- links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem.text in media]
- filtered_links = len(links)
- return links[:amount]
- def get_links_for_username(browser,
- username,
- amount,
- logger,
- randomize=False,
- media=None):
- """Fetches the number of links specified
- by amount and returns a list of links"""
- if media is None:
- # All known media types
- media = ['', 'Post', 'Video']
- elif media == 'Photo':
- # Include posts with multiple images in it
- media = ['', 'Post']
- else:
- # Make it an array to use it in the following part
- media = [media]
- logger.info('Getting {} image list...'.format(username))
- # Get user profile page
- browser.get('https://www.instagram.com/' + username)
- # update server calls
- update_activity()
- body_elem = browser.find_element_by_tag_name('body')
- try:
- is_private = body_elem.find_element_by_xpath(
- '//h2[@class="_kcrwx"]')
- except:
- logger.info('Interaction begin...')
- else:
- if is_private:
- logger.warning('This user is private...')
- return False
- abort = True
- try:
- load_button = body_elem.find_element_by_xpath(
- '//a[contains(@class, "_1cr2e _epyes")]')
- except:
- try:
- # scroll down to load posts
- for i in range(int(ceil(amount/12))):
- browser.execute_script(
- "window.scrollTo(0, document.body.scrollHeight);")
- sleep(2)
- except:
- logger.warning(
- 'Load button not found, working with current images!')
- else:
- abort = False
- body_elem.send_keys(Keys.END)
- sleep(2)
- # update server calls
- update_activity()
- else:
- abort = False
- body_elem.send_keys(Keys.END)
- sleep(2)
- click_element(browser, load_button) # load_button.click()
- # update server calls
- update_activity()
- body_elem.send_keys(Keys.HOME)
- sleep(2)
- # Get Links
- main_elem = browser.find_element_by_tag_name('main')
- link_elems = main_elem.find_elements_by_tag_name('a')
- total_links = len(link_elems)
- links = []
- filtered_links = 0
- try:
- if link_elems:
- links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem and link_elem.text in media]
- filtered_links = len(links)
- except BaseException as e:
- logger.error("link_elems error {}}".format(str(e)))
- if randomize:
- # Expanding the pooulation for better random distribution
- amount = amount * 5
- while (filtered_links < amount) and not abort:
- amount_left = amount - filtered_links
- # Average items of the right media per page loaded
- new_per_page = ceil(12 * filtered_links / total_links)
- if new_per_page == 0:
- # Avoid division by zero
- new_per_page = 1. / 12.
- # Number of page load needed
- new_needed = int(ceil(amount_left / new_per_page))
- if new_needed > 12:
- # Don't go bananas trying to get all of instagram!
- new_needed = 12
- for i in range(new_needed): # add images x * 12
- # Keep the latest window active while loading more posts
- before_load = total_links
- body_elem.send_keys(Keys.END)
- # update server calls
- update_activity()
- sleep(1)
- body_elem.send_keys(Keys.HOME)
- sleep(1)
- link_elems = main_elem.find_elements_by_tag_name('a')
- total_links = len(link_elems)
- abort = (before_load == total_links)
- if abort:
- break
- links = [link_elem.get_attribute('href') for link_elem in link_elems
- if link_elem.text in media]
- filtered_links = len(links)
- if randomize:
- # Shuffle the population index
- links = random.sample(links, filtered_links)
- return links[:amount]
- def check_link(browser,
- link,
- dont_like,
- ignore_if_contains,
- ignore_users,
- username,
- like_by_followers_upper_limit,
- like_by_followers_lower_limit,
- logger):
- browser.get(link)
- # update server calls
- update_activity()
- sleep(2)
- """Check if the Post is Valid/Exists"""
- post_page = browser.execute_script(
- "return window._sharedData.entry_data.PostPage")
- if post_page is None:
- logger.warning('Unavailable Page: {}'.format(link.encode('utf-8')))
- return True, None, None, 'Unavailable Page'
- """Gets the description of the link and checks for the dont_like tags"""
- graphql = 'graphql' in post_page[0]
- if graphql:
- media = post_page[0]['graphql']['shortcode_media']
- is_video = media['is_video']
- user_name = media['owner']['username']
- image_text = media['edge_media_to_caption']['edges']
- image_text = image_text[0]['node']['text'] if image_text else None
- owner_comments = browser.execute_script('''
- latest_comments = window._sharedData.entry_data.PostPage[0].graphql.shortcode_media.edge_media_to_comment.edges;
- if (latest_comments === undefined) latest_comments = Array();
- owner_comments = latest_comments
- .filter(item => item.node.owner.username == '{}')
- .map(item => item.node.text)
- .reduce((item, total) => item + '\\n' + total, '');
- return owner_comments;
- '''.format(user_name))
- else:
- media = post_page[0]['media']
- is_video = media['is_video']
- user_name = media['owner']['username']
- image_text = media['caption']
- owner_comments = browser.execute_script('''
- latest_comments = window._sharedData.entry_data.PostPage[0].media.comments.nodes;
- if (latest_comments === undefined) latest_comments = Array();
- owner_comments = latest_comments
- .filter(item => item.user.username == '{}')
- .map(item => item.text)
- .reduce((item, total) => item + '\\n' + total, '');
- return owner_comments;
- '''.format(user_name))
- if owner_comments == '':
- owner_comments = None
- """Append owner comments to description as it might contain further tags"""
- if image_text is None:
- image_text = owner_comments
- elif owner_comments:
- image_text = image_text + '\n' + owner_comments
- """If the image still has no description gets the first comment"""
- if image_text is None:
- if graphql:
- image_text = media['edge_media_to_comment']['edges']
- image_text = image_text[0]['node']['text'] if image_text else None
- else:
- image_text = media['comments']['nodes']
- image_text = image_text[0]['text'] if image_text else None
- if image_text is None:
- image_text = "No description"
- logger.info('Image from: {}'.format(user_name.encode('utf-8')))
- """Find the number of followes the user has"""
- if like_by_followers_upper_limit or like_by_followers_lower_limit:
- userlink = 'https://www.instagram.com/' + user_name
- browser.get(userlink)
- # update server calls
- update_activity()
- sleep(1)
- num_followers = browser.execute_script(
- "return window._sharedData.entry_data."
- "ProfilePage[0].user.followed_by.count")
- browser.get(link)
- # update server calls
- update_activity()
- sleep(1)
- logger.info('Number of Followers: {}'.format(num_followers))
- if like_by_followers_upper_limit and \
- num_followers > like_by_followers_upper_limit:
- return True, user_name, is_video, \
- 'Number of followers exceeds limit'
- if like_by_followers_lower_limit and \
- num_followers < like_by_followers_lower_limit:
- return True, user_name, is_video, \
- 'Number of followers does not reach minimum'
- logger.info('Link: {}'.format(link.encode('utf-8')))
- logger.info('Description: {}'.format(image_text.encode('utf-8')))
- """Check if the user_name is in the ignore_users list"""
- if (user_name in ignore_users) or (user_name == username):
- return True, user_name, is_video, 'Username'
- if any((word in image_text for word in ignore_if_contains)):
- return True, user_name, is_video, 'None'
- dont_like_regex = []
- for dont_likes in dont_like:
- if dont_likes.startswith("#"):
- dont_like_regex.append(dont_likes + "([^\d\w]|$)")
- elif dont_likes.startswith("["):
- dont_like_regex.append("#" + dont_likes[1:] + "[\d\w]+([^\d\w]|$)")
- elif dont_likes.startswith("]"):
- dont_like_regex.append("#[\d\w]+" + dont_likes[1:] + "([^\d\w]|$)")
- else:
- dont_like_regex.append(
- "#[\d\w]*" + dont_likes + "[\d\w]*([^\d\w]|$)")
- for dont_likes_regex in dont_like_regex:
- quash = re.search(dont_likes_regex, image_text, re.IGNORECASE)
- if quash:
- quashed = (quash.group(0)).split('#')[1]
- iffy = (re.split(r'\W+', dont_likes_regex))[3]
- inapp_unit = ('Inappropriate! ~ contains \'{}\''.format(quashed) if quashed == iffy else
- 'Inappropriate! ~ contains \'{}\' in \'{}\''.format(iffy, quashed))
- return True, user_name, is_video, inapp_unit
- return False, user_name, is_video, 'None'
- def like_image(browser, username, blacklist, logger):
- """Likes the browser opened image"""
- like_elem = browser.find_elements_by_xpath(
- "//a[@role='button']/span[text()='Like']/..")
- liked_elem = browser.find_elements_by_xpath(
- "//a[@role='button']/span[text()='Unlike']")
- if len(like_elem) == 1:
- # sleep real quick right before clicking the element
- sleep(2)
- click_element(browser, like_elem[0])
- logger.info('--> Image Liked!')
- update_activity('likes')
- if blacklist['enabled'] is True:
- action = 'liked'
- add_user_to_blacklist(
- browser, username, blacklist['campaign'], action, logger
- )
- sleep(2)
- return True
- elif len(liked_elem) == 1:
- logger.info('--> Already Liked!')
- return False
- else:
- logger.info('--> Invalid Like Element!')
- return False
- def get_tags(browser, url):
- """Gets all the tags of the given description in the url"""
- browser.get(url)
- # update server calls
- update_activity()
- sleep(1)
- graphql = browser.execute_script(
- "return ('graphql' in window._sharedData.entry_data.PostPage[0])")
- if graphql:
- image_text = browser.execute_script(
- "return window._sharedData.entry_data.PostPage[0].graphql."
- "shortcode_media.edge_media_to_caption.edges[0].node.text")
- else:
- image_text = browser.execute_script(
- "return window._sharedData.entry_data."
- "PostPage[0].media.caption.text")
- tags = findall(r'#\w*', image_text)
- return tags
Advertisement
Add Comment
Please, Sign In to add comment