Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.16 KB | None | 0 0
  1. #Load URLS from CSV
  2. def mycontents():
  3. contents = []
  4. with open('global_csv.csv', 'r') as csvf:
  5. reader = csv.reader(csvf, delimiter=";")
  6. for row in reader:
  7. contents.append(row[1]) # Add each url to list contents
  8. return contents
  9. # parse a single item to get information
  10. def parse(url):
  11.  
  12. headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}
  13.  
  14. r = requests.get(url, headers, timeout=10)
  15. sleep(3)
  16.  
  17. info = []
  18. availability_text = '-'
  19. price_text = '-'
  20.  
  21.  
  22. if r.status_code == 200:
  23. print('Processing..'+ url)
  24. html = r.text
  25.  
  26. soup = BeautifulSoup(html, 'html.parser')
  27. time.sleep(4)
  28. price = soup.select(".price")
  29.  
  30. if price is not None:
  31. price_text = price.text.strip()
  32. print(price_text)
  33. else:
  34. price_text = "0,00"
  35. print(price_text)
  36.  
  37. availability = soup.find('span', attrs={'class':'wholesale-availability'})
  38.  
  39. if availability is not None:
  40. availability_text = availability.text.strip()
  41. print(availability_text)
  42. else:
  43. availability_text = "Not Available"
  44. print(availability_text)
  45.  
  46. info.append(price_text)
  47. info.append(availability_text)
  48.  
  49. return ';'.join(info)
  50.  
  51. web_links = None
  52. web_links = mycontents()
  53.  
  54. #Insert First Row
  55. fields=['SKU','price','availability']
  56. with open('output_global.csv', 'w') as f:
  57. writer = csv.writer(f)
  58. writer.writerow(fields)
  59.  
  60. if __name__ == "__main__":
  61. #Load Webdriver
  62. browser = webdriver.Chrome('C:\chromedriver.exe')
  63. browser.get('TheLoginPage')
  64. #Find Username Field
  65. username = browser.find_element_by_id('email')
  66. username.send_keys('myusername')
  67. #Find Password Field
  68. password = browser.find_element_by_id('pass')
  69. time.sleep(2)
  70. password.send_keys('mypassword')
  71. #Find Connect Button
  72. sing_in = browser.find_element_by_xpath('//*[@id="send2"]')
  73. sing_in.click()
  74. #Start MultiProcess
  75. with Pool(4) as p:
  76. records = p.map(parse, web_links)
  77.  
  78. if len(records) > 0:
  79. with open('output_global.csv', 'a') as f:
  80. f.write('n'.join(records))
  81.  
  82. if __name__ == "__main__":
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement