Advertisement
Guest User

Untitled

a guest
Jul 16th, 2021
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.23 KB | None | 0 0
  1. import codecs, unicodedata, collections, textwrap
  2.  
  3. input_bytes = bytes([202])
  4.  
  5. codec_names = ["ascii", "big5", "big5hkscs", "cp037", "cp273", "cp424",
  6.     "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855",
  7.     "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864",
  8.     "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950",
  9.     "cp1006", "cp1026", "cp1125", "cp1140", "cp1250", "cp1251", "cp1252",
  10.     "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp",
  11.     "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz",
  12.     "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004",
  13.     "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2",
  14.     "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7",
  15.     "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_11", "iso8859_13",
  16.     "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_t",
  17.     "koi8_u", "kz1048", "mac_cyrillic", "mac_greek", "mac_iceland",
  18.     "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis",
  19.     "shift_jis_2004", "shift_jisx0213", "utf_32", "utf_32_be", "utf_32_le",
  20.     "utf_16", "utf_16_be", "utf_16_le", "utf_7", "utf_8", "utf_8_sig"]
  21.  
  22. INVALID = (float('inf'),)
  23.  
  24. def decode(bytes, codec):
  25.     try:
  26.         return tuple(ord(c) for c in codecs.decode(bytes, codec))
  27.     except ValueError:
  28.         return INVALID
  29.  
  30. def character_name(ordinal):
  31.     result = "U+%04X" % (ordinal,)
  32.     try:
  33.         name = unicodedata.name(chr(ordinal))
  34.         result += " " + name
  35.     except ValueError:
  36.         pass
  37.     return result
  38.  
  39. results = collections.defaultdict(set)
  40. for codec_name in codec_names:
  41.     results[decode(input_bytes, codec_name)].add(codec_name)
  42.  
  43. print("Possible decodings for " + str(input_bytes) + ":\n")
  44.  
  45. for result in sorted(results):
  46.     if result == INVALID:
  47.         result_string = "(decode error)"
  48.     else:
  49.         result_string = ", ".join(character_name(c) for c in result) \
  50.             + ": \"" + "".join(chr(c) for c in result) + "\""
  51.     result_codecs = sorted(results[result])
  52.  
  53.     print(result_string)
  54.     for line in textwrap.wrap(', '.join(result_codecs)):
  55.         print(" "*4 + line)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement