Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import requests
- from bs4 import BeautifulSoup
- flist=[]
- for j in range(999):
- #url = 'http://i.intimcity.nl/persons.php?index=' + str(j)
- url = 'http://i.intimcity.nl/persons.php?checkedadmin=Y&index=' + str(j)
- r = requests.get(url)
- soup = BeautifulSoup(r.text.encode('utf8'), 'html.parser')
- a = soup.find_all('a')
- id = []
- for e in a:
- if e.get('title', 'default') != 'default':
- if '/persons.php?id=' in e.get('href', 'default'):
- id.append(e['href'])
- p_p1 = soup.find_all(class_='p_p1')
- p_p2 = soup.find_all(class_='p_p2')
- if (len(p_p1) < 1):
- break
- if (len(p_p1) < len(id)):
- print('ERROR')
- exit()
- for i in range(len(id)):
- f = [];
- f.append(id[i])
- f.append(p_p1[i].get_text())
- flist.append(f)
- print(j, end='\r')
- print('N = ' + str(len(flist)))
- s = {}
- for i in range(2, 25):
- s.update({str(i*500): 1})
- for e in flist:
- p1 = e[1]
- if s.get(p1, -1) == -1:
- s.update({p1: 1})
- else:
- s[p1] = s[p1] + 1
- print(s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement