Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- author:smilehsu
- requirements:Windows7、python3.52
- date:2017-09-25 09:26:29
- '''
- '''
- 搜尋條件,philippines cebu ,女 18-30歲 ,sorted by last active
- 網址格式(20170925):
- https://www.dateinasia.com/Search.aspx?g=2&af=18&at=30&c=PH&ci=Cebu&s=2
- 第一頁
- https://www.dateinasia.com/Search.aspx?pg=0&g=2&af=18&at=30&c=PH&ci=Cebu&s=2
- 第二頁
- https://www.dateinasia.com/Search.aspx?pg=1&g=2&af=18&at=30&c=PH&ci=Cebu&s=2
- 第三頁
- https://www.dateinasia.com/Search.aspx?pg=2&g=2&af=18&at=30&c=PH&ci=Cebu&s=2
- 頁數控制參數
- pg=數字
- 其他參數
- '''
- import requests
- from bs4 import BeautifulSoup
- base_url='https://www.dateinasia.com'
- searchfor='&g=2&af=18&at=30&c=PH&ci=Cebu&s=2'
- page_list=[]
- page_link=[]
- pg=0
- num=2
- for i in range(0,num+1):
- get_page='pg='+str(pg)
- pg+=1
- page_list.append(base_url+'/Search.aspx?'+get_page+searchfor)
- #測試
- '''
- for p in page_list:
- print('第頁')
- '''
- #print("第一頁:\n"+page_list[0])
- #抓取頁面的使用者頁面連結
- res=requests.get(page_list[0])
- soup=BeautifulSoup(res.content, 'html5lib')
- get_data=soup.find_all("span",{'class':'responsive-container galleryphoto-responsive'})
- #印出頁面上使用者的頁面連結與照片連結
- #print(get_data)
- User_Page_Link=[]
- for link in get_data:
- UserName=link.find('img').attrs['alt'].replace(' ','+')
- UserLink=base_url+'/'+UserName
- User_Page_Link.append(UserLink)
- print(UserName)
- print(UserLink)
- print("頁面使用者數:",len(User_Page_Link))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement