Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from lxml import html
- import base64
- pageContent=requests.get('https://www.soe.ucsc.edu/people/faculty')
- tree = html.fromstring(pageContent.content)
- for people in tree.xpath('//*[@id="soe-people-directory-list"]/li'):
- try:
- name = people.xpath('h3/a[1]/text()')[0]
- try:
- email = people.xpath('ul/li/script/text()')[0]
- email = email[email.find("('")+2:email.find("'))")]
- email = email.decode('base64').split(':')[1].split('"')[0]
- except:
- email = ''
- try:
- picture = people.xpath('a/img/@src')[0]
- picture = 'https://www.soe.ucsc.edu' + picture
- except:
- picture = ''
- try:
- discription = people.xpath('ul/li/text()')
- except:
- print ''
- print name,email,picture,discription
- except:
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement