Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2014
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.57 KB | None | 0 0
  1. import codecs
  2. import urllib2
  3. import re
  4. req = urllib2.Request('http://en.wikipedia.org/wiki/Bach')
  5. page = urllib2.urlopen(req)
  6. page = page.read()
  7.  
  8. def find_pages (x):
  9. y = re.findall('href="/wiki/([^ ]*)"', x)
  10. return list(set (y))
  11. def find_organ (x):
  12. y = re.findall('>[^<^>]*[Oo]+[Rr]+[Gg]+[Aa]+[Nn]+[Ss]*[^<^>]*<', x)
  13. return len(y)
  14.  
  15. for i in find_pages(page):
  16. counter = 0
  17. req2 = urllib2.Request('http://en.wikipedia.org/wiki/' + 'i')
  18. page2 = urllib2.urlopen(req2)
  19. page2 = page2.read()
  20. counter += find_organ (page2)
  21. print counter
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement