Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import urllib
- import chardet
- import requests
- import re
- urlread = lambda url: urllib.urlopen(url).read()
- f = open('/media/data/rss.csv/wrong_encoding_urls.txt', 'r')
- for line in f.readlines():
- line = line.strip()
- if line:
- req = requests.get(line)
- content = req.content
- encoding = re.split(r'encoding="([^"]+)"', content)[1]
- max_value = max(map(ord, content))
- if max_value > 127:
- print line, encoding, max_value
- f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement