Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from __future__ import print_function
- import sys
- import locale
- locale.setlocale(locale.LC_ALL, '')
- print("Python version:", sys.version_info)
- print("LC_ALL =", locale.getlocale(locale.LC_ALL))
- print("LC_COLLATE =", locale.getlocale(locale.LC_COLLATE))
- encoding = locale.getpreferredencoding()
- print("Prefered encoding is:", encoding)
- print()
- alphabet = [
- "a", "ą", "b", "c", "ć",
- "d", "e", "ę", "f", "g",
- "h", "i", "j", "k", "l",
- "ł", "m", "n", "ń", "o",
- "ó", "p", "r", "s", "ś",
- "u", "v", "w", "y", "z",
- "ź", "ż"
- ]
- # Convert to unicode in python 2.x
- if isinstance(alphabet[0], bytes):
- alphabet = [ x.decode('utf-8') for x in alphabet ]
- def run_test(label, sort_filter):
- print(label)
- try:
- result = sort_filter(alphabet)
- print(' '.join(result))
- if result != alphabet:
- print("FAILED: Bad result.")
- except TypeError as e:
- print("FAILED: Exception:", e)
- except Exception as e:
- print("FAILED: Exception:", e)
- finally:
- print()
- # the original
- run_test("Sorted alphabet", lambda x: x)
- # Most efficient would be using strxfrm as key on the original values
- #
- # This is broken on Windows (locale: Polish_Poland)
- # - on 2.6.4 in should fail with UnicodeDecodeError, instead yields bad results
- # - on 3.1.1 yields bad results
- #
- # On Linux (locale pl_PL.UTF8):
- # - works in 3.1.1 and trunk
- # - yields the expected Exception in 2.6.4
- run_test('Key=strxfrm(unicode)', lambda x: sorted(x, key=locale.strxfrm))
- # Second option is to use strcoll (you can't in py3k)
- #
- # Works in 2.6.4 fine everywhere (why doesn't this yield the UnicodeDecodeError ?!)
- #
- run_test('Cmp=strcoll(unicode)', lambda x: sorted(x, cmp=locale.strcoll))
- print("A is before Z", (locale.strcoll('a', 'z') < 0) )
- print("P is after K", (locale.strcoll('p', 'k') > 0) )
- print("Ą is before B", (locale.strcoll('ą', 'b') < 0) )
- print()
- # Next guess is to use strxfrm on strings encoded in the native coding
- # Works in 2.6.4 - both Linux and Windows
- # 3.1.1-Win: wrong anwser
- # 3.1.1-Linux: throws exception (as it should)
- encoded_key = lambda x: locale.strxfrm(x.encode(encoding))
- run_test("Key=strxfrm(bytes_using_preferred_encoding)", lambda x: sorted(x, key=encoded_key))
- encoded_cmp = lambda x, y: locale.strcoll(x.encode(encoding), y.encode(encoding))
- run_test("Cmp=strcoll(bytes_using_preferred_encoding)", lambda x: sorted(x, cmp=encoded_cmp))
- print("A is before Z", (locale.strcoll('a'.encode(encoding), 'z'.encode(encoding)) < 0) )
- print("P is after K", (locale.strcoll('p'.encode(encoding), 'k'.encode(encoding)) > 0) )
- print("Ą is before B", (locale.strcoll('ą'.encode(encoding), 'b'.encode(encoding)) < 0) )
- print()
Add Comment
Please, Sign In to add comment