Advertisement
Guest User

scraper

a guest
Dec 28th, 2018
180
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.76 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. from selenium import webdriver
  3. import lxml.html
  4. import time
  5. import mysql.connector
  6.  
  7. mydb = mysql.connector.connect(
  8.     host = "localhost",
  9.     user = "root",
  10.     password = "",
  11.     database = "lowesdb"
  12. )
  13.  
  14. mycursor = mydb.cursor()
  15.  
  16. def make_soup():
  17.     response = driver.execute_script('return document.documentElement.outerHTML')
  18.     soup = BeautifulSoup(response, 'html.parser')
  19.     return soup
  20.  
  21.  
  22. USERNAME = 'Perisicdoors'
  23. PASSWORD = 'Lowesperisic2015'
  24.  
  25. driver = webdriver.Firefox()
  26. driver.get('https://scs-lowes.microsoftcrmportals.com/Payments/')
  27.  
  28. usr_field = driver.find_element_by_id('Username')
  29. usr_field.send_keys(USERNAME)
  30.  
  31. pwd_field = driver.find_element_by_id('Password')
  32. pwd_field.send_keys(PASSWORD)
  33.  
  34. singin_btn = driver.find_element_by_id('submit-signin-local')
  35. singin_btn.submit()
  36.  
  37. ok = 0
  38. next_page = 2
  39. while (1<2):
  40.     time.sleep(12)
  41.     soup = make_soup()
  42.     for link in soup.findAll('a', {'class': 'details-link has-tooltip'}):
  43.         href = 'https://scs-lowes.microsoftcrmportals.com' + link.get('href')
  44.         driver.get(href)
  45.         query = "SELECT ID FROM paymentinfo WHERE link=%s"
  46.         mycursor.execute(query, (href, ))
  47.         res = str(mycursor.fetchone())
  48.         if (res != 'None'):
  49.             ok = 1
  50.             break
  51.         parsed_page = lxml.html.fromstring(driver.page_source)
  52.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymentinvoicenumber")]') #payment num
  53.         payment_num = doc[0].get('value')
  54.         doc = parsed_page.xpath('//input[contains(@id, "scs_vendor_name")]') # instaler
  55.         instaler = doc[0].get('value')
  56.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymentinitiator_name")]') # initiator
  57.         initiator = doc[0].get('value')
  58.         doc = parsed_page.xpath('//input[contains(@id, "scs_invoicenumber")]')  # invoice num
  59.         invoice_num = doc[0].get('value')
  60.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymentamount")]')  # req_amount
  61.         req_amount = doc[0].get('value')
  62.         doc = parsed_page.xpath('//input[contains(@id, "scs_purchaseorder_name")]')  # pur_order
  63.         pur_order = doc[0].get('value')
  64.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymentreason")]')  # description
  65.         description = doc[0].get('value')
  66.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymentinitiateddate")]') #date
  67.         date = doc[0].get('value')
  68.         doc = parsed_page.xpath('//input[contains(@id, "scs_laborcategory_name")]')  # labor_ctg
  69.         labor_ctg = doc[0].get('value')
  70.         doc = parsed_page.xpath('//input[contains(@id, "scs_storeid_name")]')  # store
  71.         store = doc[0].get('value')
  72.         doc = parsed_page.xpath('//input[contains(@id, "scs_paymenttype")]')  # type
  73.         type = doc[0].get('value')
  74.         doc = parsed_page.xpath('//input[contains(@id, "scs_customer_name")]') #customer
  75.         customer = doc[0].get('value')
  76.         paid = ""
  77.         doc = parsed_page.xpath('//input[contains(@id, "scs_checknumber")]')  # check_num
  78.         check_num = doc[0].get('value')
  79.         doc = parsed_page.xpath('//input[contains(@id, "scs_checkdate")]')  # check_date
  80.         check_date = doc[0].get('value')
  81.         doc = parsed_page.xpath('//input[contains(@id, "scs_checkamount")]')  # check_amount
  82.         check_amount = doc[0].get('value')
  83.         if check_amount == req_amount:
  84.             paid = "yes"
  85.         else:
  86.             paid = "no"
  87.         info = (href, payment_num, instaler, initiator, invoice_num, req_amount, pur_order, description, date, labor_ctg, store, type, customer, paid, check_num, check_date, check_amount)
  88.  
  89.  
  90.         sql_formula = "INSERT INTO paymentinfo (ID, link, payment_num, instaler, initiator, invoice_num, " \
  91.                       "req_amount, pur_order, description, date, labor_ctg, store, type, customer, " \
  92.                       "paid, check_num, check_date, check_amount) " \
  93.                       "VALUES (NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
  94.  
  95.         mycursor.execute(sql_formula, info)
  96.         mydb.commit()
  97.     if ok == 1:
  98.         break
  99.     driver.get('https://scs-lowes.microsoftcrmportals.com/Payments/')
  100.     time.sleep(12)
  101.     btn_next = driver.find_element_by_link_text(str(next_page))
  102.     btn_next.click()
  103.     next_page = next_page + 1
  104.  
  105. check_query = "SELECT link FROM paymentinfo WHERE paid='no'"
  106. mycursor.execute(check_query)
  107. links_nt_pd = mycursor.fetchall()
  108. for link in links_nt_pd:
  109.     href = str(link)
  110.     href = href[2:-3]
  111.     driver.get(href)
  112.     parsed_page = lxml.html.fromstring(driver.page_source)
  113.     doc = parsed_page.xpath('//input[contains(@id, "scs_paymentamount")]')  # req_amount
  114.     req_amount = doc[0].get('value')
  115.     doc = parsed_page.xpath('//input[contains(@id, "scs_checkamount")]')  # check_amount
  116.     check_amount = doc[0].get('value')
  117.     if (req_amount == check_amount):
  118.         sql_update = "UPDATE paymentinfo SET paid='yes' WHERE link=%s"
  119.         mycursor.execute(sql_update, (href, ))
  120.         mydb.commit()
  121.         doc = parsed_page.xpath('//input[contains(@id, "scs_checknumber")]')  # check_num
  122.         check_num = doc[0].get('value')
  123.         sql_update = "UPDATE paymentinfo SET check_num=%s WHERE link=%s"
  124.         mycursor.execute(sql_update, (check_num, href))
  125.         mydb.commit()
  126.         doc = parsed_page.xpath('//input[contains(@id, "scs_checkdate")]')  # check_date
  127.         check_date = doc[0].get('value')
  128.         sql_update = "UPDATE paymentinfo SET check_date=%s WHERE link=%s"
  129.         mycursor.execute(sql_update, (check_date, href))
  130.         mydb.commit()
  131.         sql_update = "UPDATE paymentinfo SET check_amount=%s WHERE link=%s"
  132.         mycursor.execute(sql_update, (check_amount, href))
  133.         mydb.commit()
  134.  
  135. driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement