Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Load URLS from CSV
- def mycontents():
- contents = []
- with open('global_csv.csv', 'r') as csvf:
- reader = csv.reader(csvf, delimiter=";")
- for row in reader:
- contents.append(row[1]) # Add each url to list contents
- return contents
- # parse a single item to get information
- def parse(url):
- headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}
- r = requests.get(url, headers, timeout=10)
- sleep(3)
- info = []
- availability_text = '-'
- price_text = '-'
- if r.status_code == 200:
- print('Processing..'+ url)
- html = r.text
- soup = BeautifulSoup(html, 'html.parser')
- time.sleep(4)
- price = soup.select(".price")
- if price is not None:
- price_text = price.text.strip()
- print(price_text)
- else:
- price_text = "0,00"
- print(price_text)
- availability = soup.find('span', attrs={'class':'wholesale-availability'})
- if availability is not None:
- availability_text = availability.text.strip()
- print(availability_text)
- else:
- availability_text = "Not Available"
- print(availability_text)
- info.append(price_text)
- info.append(availability_text)
- return ';'.join(info)
- web_links = None
- web_links = mycontents()
- #Insert First Row
- fields=['SKU','price','availability']
- with open('output_global.csv', 'w') as f:
- writer = csv.writer(f)
- writer.writerow(fields)
- if __name__ == "__main__":
- #Load Webdriver
- browser = webdriver.Chrome('C:\chromedriver.exe')
- browser.get('TheLoginPage')
- #Find Username Field
- username = browser.find_element_by_id('email')
- username.send_keys('myusername')
- #Find Password Field
- password = browser.find_element_by_id('pass')
- time.sleep(2)
- password.send_keys('mypassword')
- #Find Connect Button
- sing_in = browser.find_element_by_xpath('//*[@id="send2"]')
- sing_in.click()
- #Start MultiProcess
- with Pool(4) as p:
- records = p.map(parse, web_links)
- if len(records) > 0:
- with open('output_global.csv', 'a') as f:
- f.write('n'.join(records))
- if __name__ == "__main__":
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement