Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # какой то файл скачанный с интернета в неизвестной кодировке.
- open('test.txt', 'w', encoding='cp500').write('Hello\n')
- # сюда можно впихнуть все известные кодировки.
- encoding = [
- 'utf-8',
- 'cp500',
- 'utf-16',
- 'GBK',
- 'windows-1251',
- 'ASCII',
- 'US-ASCII',
- 'Big5'
- ]
- correct_encoding = ''
- for enc in encoding:
- try:
- open('test.txt', encoding=enc).read()
- except (UnicodeDecodeError, LookupError):
- pass
- else:
- correct_encoding = enc
- print('Done!')
- break
- print(correct_encoding)
- from chardet.universaldetector import UniversalDetector
- detector = UniversalDetector()
- with open('test.txt', 'rb') as fh:
- for line in fh:
- detector.feed(line)
- if detector.done:
- break
- detector.close()
- print(detector.result)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement