Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat ./unicode.py
- #!/opt/python/bin/python3
- """
- $ echo "Hello © ≠" | od -t x1
- 0000000 48 65 6c 6c 6f 20 c2 a9 20 e2 89 a0 0a
- 0000015
- """
- import sys
- import unicodedata
- byte_list = list()
- for line in sys.stdin.readlines():
- token_list = line.split()
- token_list.pop(0)
- byte_list.extend(token_list)
- #print(byte_list)
- letter_count = 1
- i = 0
- while i < len(byte_list) - 1:
- print(str(letter_count).rjust(2), end=". ")
- binary_char = str(bin(int(byte_list[i], 16))[2:].zfill(8))
- #print(binary_char)
- extra_byte_count = binary_char.find("0")
- if extra_byte_count > 0:
- # Mush together the binary digits from the multiple bytes
- # Ignore the first extra_byte_count+1 binary digits of binary_char
- # Ignore the first two binary digits of the remaining bytes
- extended_binary_string = binary_char[extra_byte_count:]
- for j in range(extra_byte_count-1):
- i += 1
- extra_binary_char = str(bin(int(byte_list[i], 16))[2:].zfill(8))
- #print(" " + extra_binary_char)
- extended_binary_string += extra_binary_char[2:]
- print(unicodedata.name(chr(int(extended_binary_string, 2))))
- else:
- print(unicodedata.name(chr(int(binary_char, 2))))
- i += 1
- letter_count += 1
- ##################################################################
- $ echo "Hello! © ≠" | od -t x1 | ./unicode.py
- 1. LATIN CAPITAL LETTER H
- 2. LATIN SMALL LETTER E
- 3. LATIN SMALL LETTER L
- 4. LATIN SMALL LETTER L
- 5. LATIN SMALL LETTER O
- 6. EXCLAMATION MARK
- 7. SPACE
- 8. COPYRIGHT SIGN
- 9. SPACE
- 10. NOT EQUAL TO
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement