Advertisement
Guest User

Untitled

a guest
Apr 25th, 2017
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. import requests
  2. from lxml import html
  3. import base64
  4.  
  5. pageContent=requests.get('https://www.soe.ucsc.edu/people/faculty')
  6. tree = html.fromstring(pageContent.content)
  7. for people in tree.xpath('//*[@id="soe-people-directory-list"]/li'):
  8. try:
  9. name = people.xpath('h3/a[1]/text()')[0]
  10. try:
  11. email = people.xpath('ul/li/script/text()')[0]
  12. email = email[email.find("('")+2:email.find("'))")]
  13. email = email.decode('base64').split(':')[1].split('"')[0]
  14. except:
  15. email = ''
  16. try:
  17. picture = people.xpath('a/img/@src')[0]
  18. picture = 'https://www.soe.ucsc.edu' + picture
  19. except:
  20. picture = ''
  21. try:
  22. discription = people.xpath('ul/li/text()')
  23. except:
  24. print ''
  25. print name,email,picture,discription
  26. except:
  27. pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement