tba $ python --version Python 2.7.8 tba $ xxd ./questionably_encoded.txt 00000000: 2e2e 2e20 302d 310a 0a5b 4576 656e 7420 ... 0-1..[Event 00000010: 2248 6172 7661 7264 2043 7570 2033 30b4 "Harvard Cup 30. 00000020: 225d 0a2e 2e2e "].... tba $ cat ~/bin/check-encoding #!/usr/bin/env python import argparse, codecs parser = argparse.ArgumentParser() parser.add_argument('--encoding', '-e', default=None) parser.add_argument('file') args = parser.parse_args() f = codecs.open(args.file, 'r', encoding=args.encoding) line_number = 1 try: for line in f: line_number += 1 print('%s\tvalid %s' % (args.file, args.encoding)) except Exception as e: print('error on line %s' % line_number) raise e tba $ check-encoding ./questionably_encoded.txt ./questionably_encoded.txt valid None tba $ python -c 'import locale; print locale.getpreferredencoding()' UTF-8 tba $ check-encoding -e UTF-8 ./questionably_encoded.txt error on line 1 Traceback (most recent call last): File "/Users/tba/bin/check-encoding", line 18, in raise e UnicodeDecodeError: 'utf8' codec can't decode byte 0xb4 in position 0: invalid start byte tba $ python -c 'import sys; print sys.getdefaultencoding()' ascii tba $ check-encoding -e ascii ./questionably_encoded.txt error on line 1 Traceback (most recent call last): File "/Users/tba/bin/check-encoding", line 18, in raise e UnicodeDecodeError: 'ascii' codec can't decode byte 0xb4 in position 0: ordinal not in range(128) tba $ python -c 'import locale; print locale.getpreferredencoding(False)' US-ASCII tba $ check-encoding -e US-ASCII ./questionably_encoded.txt error on line 1 Traceback (most recent call last): File "/Users/tba/bin/check-encoding", line 18, in raise e UnicodeDecodeError: 'ascii' codec can't decode byte 0xb4 in position 0: ordinal not in range(128) tba $