Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re, operator
- from bs4 import BeautifulSoup
- h = BeautifulSoup(open('diskuze.html').read())
- divs = h.find_all("div", "ds_hlavicka")
- names = {}
- for div in divs:
- div = str(div)
- m = re.search('(\d\d):\d\d(.*?)<b', div, re.DOTALL)
- n = m.group(2).strip()
- if n[0] == '<':
- n = re.search('>(.*?)<', n).group(1)
- names[n] = names.get(n, 0)
- names[n] += 1
- names = sorted(names.items(), key=operator.itemgetter(1))
- names.reverse()
- for n in names:
- print(n[0] + ": " + str(n[1]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement