Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import codecs, unicodedata, collections, textwrap
- input_bytes = bytes([202])
- codec_names = ["ascii", "big5", "big5hkscs", "cp037", "cp273", "cp424",
- "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855",
- "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864",
- "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950",
- "cp1006", "cp1026", "cp1125", "cp1140", "cp1250", "cp1251", "cp1252",
- "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp",
- "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz",
- "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004",
- "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2",
- "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7",
- "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_11", "iso8859_13",
- "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_t",
- "koi8_u", "kz1048", "mac_cyrillic", "mac_greek", "mac_iceland",
- "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis",
- "shift_jis_2004", "shift_jisx0213", "utf_32", "utf_32_be", "utf_32_le",
- "utf_16", "utf_16_be", "utf_16_le", "utf_7", "utf_8", "utf_8_sig"]
- INVALID = (float('inf'),)
- def decode(bytes, codec):
- try:
- return tuple(ord(c) for c in codecs.decode(bytes, codec))
- except ValueError:
- return INVALID
- def character_name(ordinal):
- result = "U+%04X" % (ordinal,)
- try:
- name = unicodedata.name(chr(ordinal))
- result += " " + name
- except ValueError:
- pass
- return result
- results = collections.defaultdict(set)
- for codec_name in codec_names:
- results[decode(input_bytes, codec_name)].add(codec_name)
- print("Possible decodings for " + str(input_bytes) + ":\n")
- for result in sorted(results):
- if result == INVALID:
- result_string = "(decode error)"
- else:
- result_string = ", ".join(character_name(c) for c in result) \
- + ": \"" + "".join(chr(c) for c in result) + "\""
- result_codecs = sorted(results[result])
- print(result_string)
- for line in textwrap.wrap(', '.join(result_codecs)):
- print(" "*4 + line)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement