Advertisement
Guest User

parser

a guest
Jul 17th, 2018
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.11 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import requests
  3. from bs4 import BeautifulSoup
  4. flist=[]
  5.  
  6. for j in range(999):
  7.     #url = 'http://i.intimcity.nl/persons.php?index=' + str(j)
  8.     url = 'http://i.intimcity.nl/persons.php?checkedadmin=Y&index=' + str(j)
  9.     r = requests.get(url)
  10.     soup = BeautifulSoup(r.text.encode('utf8'), 'html.parser')
  11.  
  12.     a = soup.find_all('a')
  13.     id = []
  14.     for e in a:
  15.         if e.get('title', 'default') != 'default':
  16.             if '/persons.php?id=' in e.get('href', 'default'):
  17.                 id.append(e['href'])  
  18.     p_p1 = soup.find_all(class_='p_p1')
  19.     p_p2 = soup.find_all(class_='p_p2')
  20.     if (len(p_p1) < 1):
  21.         break
  22.     if (len(p_p1) < len(id)):
  23.         print('ERROR')
  24.         exit()
  25.     for i in range(len(id)):
  26.         f = [];
  27.         f.append(id[i])
  28.         f.append(p_p1[i].get_text())
  29.         flist.append(f)
  30.     print(j, end='\r')
  31.  
  32. print('N = ' + str(len(flist)))
  33. s = {}
  34. for i in range(2, 25):
  35.     s.update({str(i*500): 1})
  36. for e in flist:
  37.     p1 = e[1]
  38.     if s.get(p1, -1) == -1:
  39.         s.update({p1: 1})
  40.     else:
  41.         s[p1] = s[p1] + 1
  42. print(s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement