Guest User

Untitled

a guest
Apr 20th, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.70 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. from __future__ import print_function
  4.  
  5. import sys
  6.  
  7. import locale
  8. locale.setlocale(locale.LC_ALL, '')
  9. print("Python version:", sys.version_info)
  10. print("LC_ALL =", locale.getlocale(locale.LC_ALL))
  11. print("LC_COLLATE =", locale.getlocale(locale.LC_COLLATE))
  12.  
  13. encoding = locale.getpreferredencoding()
  14. print("Prefered encoding is:", encoding)
  15. print()
  16.  
  17. alphabet = [
  18. "a", "ą", "b", "c", "ć",
  19. "d", "e", "ę", "f", "g",
  20. "h", "i", "j", "k", "l",
  21. "ł", "m", "n", "ń", "o",
  22. "ó", "p", "r", "s", "ś",
  23. "u", "v", "w", "y", "z",
  24. "ź", "ż"
  25. ]
  26.  
  27. # Convert to unicode in python 2.x
  28. if isinstance(alphabet[0], bytes):
  29. alphabet = [ x.decode('utf-8') for x in alphabet ]
  30.  
  31. def run_test(label, sort_filter):
  32.  
  33. print(label)
  34. try:
  35. result = sort_filter(alphabet)
  36. print(' '.join(result))
  37. if result != alphabet:
  38. print("FAILED: Bad result.")
  39. except TypeError as e:
  40. print("FAILED: Exception:", e)
  41. except Exception as e:
  42. print("FAILED: Exception:", e)
  43. finally:
  44. print()
  45.  
  46.  
  47.  
  48. # the original
  49. run_test("Sorted alphabet", lambda x: x)
  50.  
  51.  
  52. # Most efficient would be using strxfrm as key on the original values
  53. #
  54. # This is broken on Windows (locale: Polish_Poland)
  55. # - on 2.6.4 in should fail with UnicodeDecodeError, instead yields bad results
  56. # - on 3.1.1 yields bad results
  57. #
  58. # On Linux (locale pl_PL.UTF8):
  59. # - works in 3.1.1 and trunk
  60. # - yields the expected Exception in 2.6.4
  61. run_test('Key=strxfrm(unicode)', lambda x: sorted(x, key=locale.strxfrm))
  62.  
  63. # Second option is to use strcoll (you can't in py3k)
  64. #
  65. # Works in 2.6.4 fine everywhere (why doesn't this yield the UnicodeDecodeError ?!)
  66. #
  67. run_test('Cmp=strcoll(unicode)', lambda x: sorted(x, cmp=locale.strcoll))
  68. print("A is before Z", (locale.strcoll('a', 'z') < 0) )
  69. print("P is after K", (locale.strcoll('p', 'k') > 0) )
  70. print("Ą is before B", (locale.strcoll('ą', 'b') < 0) )
  71. print()
  72.  
  73. # Next guess is to use strxfrm on strings encoded in the native coding
  74. # Works in 2.6.4 - both Linux and Windows
  75. # 3.1.1-Win: wrong anwser
  76. # 3.1.1-Linux: throws exception (as it should)
  77. encoded_key = lambda x: locale.strxfrm(x.encode(encoding))
  78. run_test("Key=strxfrm(bytes_using_preferred_encoding)", lambda x: sorted(x, key=encoded_key))
  79.  
  80. encoded_cmp = lambda x, y: locale.strcoll(x.encode(encoding), y.encode(encoding))
  81. run_test("Cmp=strcoll(bytes_using_preferred_encoding)", lambda x: sorted(x, cmp=encoded_cmp))
  82. print("A is before Z", (locale.strcoll('a'.encode(encoding), 'z'.encode(encoding)) < 0) )
  83. print("P is after K", (locale.strcoll('p'.encode(encoding), 'k'.encode(encoding)) > 0) )
  84. print("Ą is before B", (locale.strcoll('ą'.encode(encoding), 'b'.encode(encoding)) < 0) )
  85. print()
Add Comment
Please, Sign In to add comment