G2A Many GEOs
SHARE
TWEET

Untitled

a guest Apr 5th, 2020 129 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # import web driver
  2. from selenium import webdriver
  3. from time import sleep
  4. from selenium.webdriver.common.keys import Keys
  5. from parsel import Selector
  6. import json
  7.  
  8. def validate_field(field):
  9.     if not field:
  10.         field = 'No results'
  11.     return field
  12.  
  13.  
  14. driver = webdriver.Chrome('C:/chromedriver_win32/chromedriver')
  15.  
  16. driver.maximize_window()
  17. driver.get('https:www.google.com')
  18. sleep(3)
  19. country = "russian"
  20. potential_title = "developer"
  21. search_query = driver.find_element_by_name('q')
  22. search_query.send_keys('site:doyoubuzz.com "'+potential_title+'" AND "'+country+'"')
  23.  
  24. sleep(0.5)
  25.  
  26. search_query.send_keys(Keys.RETURN)
  27. sleep(10)
  28.  
  29.  
  30. pages=driver.find_elements_by_xpath("//*[@id='nav']/tbody/tr/td/a")
  31. youbuzz_urls = []
  32. for page in pages:
  33.     href = driver.find_elements_by_xpath('//a[starts-with(@href, "https://www.doyoubuzz.com/")]')
  34.     for i in href:
  35.         youbuzz_urls.append(i.get_attribute('href'))
  36.     try:
  37.         driver.find_element_by_xpath("//span[text()='Suivant']").click()
  38.     except:
  39.         pass
  40. sleep(0.5)
  41. for youbuzz_url in youbuzz_urls:
  42.     driver.get(youbuzz_url)
  43.  
  44.     # add a 5 second pause loading each URL
  45.     sleep(5)
  46.  
  47.     sel = Selector(text=driver.page_source)
  48.  
  49.     firstName = sel.xpath('//*[starts-with(@class,"userName__firstName")]/text()').extract_first()
  50.    
  51.     if firstName:
  52.         firstName = firstName.strip()
  53.        
  54.     lastName = sel.xpath('//*[starts-with(@class,"userName__lastName")]/text()').extract_first()
  55.  
  56.     if lastName:
  57.         lastName = lastName.strip()
  58.  
  59.  
  60.     current_title = sel.xpath('//*[@class="cvTitle"]/text()').extract_first()
  61.     if current_title:
  62.         current_title = current_title.strip()
  63.  
  64.  
  65.  
  66.     lives_in = sel.xpath('//*[starts-with(@class,"widgetUserInfo__item widgetUserInfo__item_location")]/text()').extract_first()
  67.     if lives_in:
  68.         lives_in = lives_in.strip()
  69.  
  70.     age = sel.xpath('//*[starts-with(@class,"widgetUserInfo__item widgetUserInfo__item_age")]/text()').extract_first()
  71.     if age:
  72.         age = age.strip()    
  73.    
  74.     youbuzz_url = driver.current_url
  75.  
  76.     firstName = validate_field(firstName)
  77.     lastName = validate_field(lastName)
  78.     current_title = validate_field(current_title)
  79.     lives_in = validate_field(lives_in)
  80.     youbuzz_url = validate_field(youbuzz_url)
  81.  
  82.     if lives_in != 'No Results':
  83.         lives_in = ' '.join(lives_in.split())
  84.     try:
  85.         # printing the output to the terminal
  86.         print('\n')
  87.         print('First Name: ' + firstName)
  88.         print('last Name: ' + lastName)
  89.         print('current_title: ' + current_title)
  90.         print('lives_in: ' + lives_in)
  91.         print('youbuzz_url: ' + youbuzz_url)
  92.         print('\n')
  93.     except:
  94.         pass
  95.     with open('data.json',mode='a', encoding='utf-8') as outfile:
  96.         res = {        
  97.             'currentPosition' : current_title,
  98.             'livesIn' : lives_in,
  99.             'country' : country,
  100.             'profile' : youbuzz_url,
  101.             'firstName': firstName,
  102.             'lastName' : lastName,
  103.             'age' : age
  104.         }
  105.         json.dump(res, outfile, indent=2)
  106.  
  107.  
  108.  
  109. driver.quit()
RAW Paste Data
Ledger Nano X - The secure hardware wallet
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top