Advertisement
Guest User

Instagram scraper

a guest
Jun 17th, 2017
1,642
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.23 KB | None | 0 0
  1. import bs4, requests
  2. import os
  3. import selenium.webdriver as webdriver
  4. import time
  5. from selenium.common.exceptions import NoSuchElementException
  6. from selenium.webdriver.common.action_chains import ActionChains
  7.  
  8. def checkAmount():
  9.     data = input("Around how many Pictures do u want? e.g: 25,50,100,150,...(Minimum is 25!)")
  10.     try:
  11.         number = int(data)    
  12.     except ValueError:
  13.         print("U need to enter a number!")      
  14.         return checkAmount()
  15.     if number < 25:
  16.         print("U need to enter a number greater or equal to 25!")
  17.         return checkAmount()
  18.     else:    
  19.         picAmount = round((number - 25) / 12)
  20.         return picAmount
  21.  
  22. def welcomeMessage():
  23.     print("This program requires firefox and is going to open the browser during the downloading progress.\nPlease do not close it.\n\nAuthor: .. 2016")
  24.     time.sleep(5)
  25.     os.system('cls')
  26.  
  27. def displayUserInfo(soup):
  28.     messages = soup.select('._e8fkl')
  29.     followers = soup.select('._pr3wx')
  30.     following = soup.select('._bgvpv')
  31.     if not messages:
  32.         print('\nUser does not exist!')
  33.         time.sleep(2)
  34.         main()
  35.     else:
  36.         messagesValue = messages[0].string
  37.         followersValue = followers[0].string
  38.         followingValue = following[0].string
  39.         print("\nUserinfo:\nMessages: " + messagesValue + "    Followers: " + followersValue + "    Following: " + followingValue + "\n")
  40.  
  41. def askUser():
  42.     user = input("Please insert the username of the user u want to download from: ")
  43.     if user == "":
  44.         user = "programmers_life"
  45.         print("No input detected, default user will be used.")
  46.        
  47.     url = 'https://www.instagram.com/' + user + '/' # default url
  48.     return url
  49.  
  50. def nameFolder(state):
  51.     while state == False:
  52.         folder = input("Name of folder u want to download them to?")
  53.         state = True
  54.         if folder == "":
  55.             folder = "Instagram"      
  56.         if os.path.isdir(folder):
  57.             print('Folder exists already!')
  58.             decision = input("Do u want to overwrite the images in this folder? yes or no    ").lower()
  59.             if decision == "yes":
  60.                 state = True
  61.             else:
  62.                 state = False
  63.         if state == True:
  64.             os.makedirs(folder, exist_ok=True)
  65.             return folder
  66.  
  67. def mostLikedPicture(soup, driver):
  68.     pictureElem = soup.select('img')
  69.  
  70.     ActionChains(driver).move_to_element(pictureElem[1])
  71.  
  72.     time.sleep(8)
  73.                
  74. def main():
  75.     #WelcomeMessage
  76.     welcomeMessage()
  77.  
  78.     #Url Input
  79.     url = askUser()
  80.  
  81.     #Amount Input
  82.     picAmount = checkAmount()
  83.  
  84.     #Name of folder
  85.     state = False
  86.     folder = nameFolder(state)
  87.  
  88.     # Download the page.
  89.     print('Downloading images from %s...' % url)
  90.     driver = webdriver.Firefox()
  91.     driver.maximize_window()
  92.     driver.get(url)
  93.  
  94.    
  95.  
  96.    
  97.     #Click Load More
  98.     try:
  99.         linkElem = driver.find_element_by_class_name('_oidfu')
  100.         linkElem.click()
  101.     except NoSuchElementException:
  102.         print("User has less than 25 photo\'s or is private\n")
  103.  
  104.    
  105.  
  106.     #Scroll
  107.     i = 0
  108.     while i < picAmount:
  109.         driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  110.         time.sleep(1)
  111.         i += 1
  112.  
  113.     #Put in html
  114.     soup = bs4.BeautifulSoup(driver.page_source, "html.parser")
  115.     mostLikedPicture(soup, driver)
  116.     driver.quit()
  117.     #UserInfo
  118.    # displayUserInfo(soup)
  119.  
  120.     # Find the URL of image.
  121.     pictureElem = soup.select('img')
  122.     if pictureElem == []:
  123.         print('Could not find any instagram image.')
  124.     else:
  125.         for i in range(len(pictureElem)):
  126.                 pictureUrl = pictureElem[i].get('src')
  127.                 # Download the image.
  128.                 print('Downloading image number: ' + str(i+1))
  129.                 res = requests.get(pictureUrl)
  130.                 res.raise_for_status()
  131.  
  132.                 # Save the image
  133.                 imageFile = open(os.path.join(folder, os.path.basename(str(i+1)) + ".jpg"), 'wb')
  134.                 for chunk in res.iter_content(100000):
  135.                     imageFile.write(chunk)
  136.                 imageFile.close()
  137.     print('\nAll images downloaded.\n')
  138.  
  139. if __name__ == "__main__":
  140.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement