Advertisement
Guest User

Untitled

a guest
May 24th, 2016
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.95 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. import json
  5. import locale
  6.  
  7. print sys.getdefaultencoding()
  8. print sys.getfilesystemencoding()
  9. print locale.getdefaultlocale()
  10.  
  11. #源代码UTF8编码,所以直接写的字符串是UTF8编码
  12. json_text = '{"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}'
  13. print 'json_text is unicode ', isinstance(json_text, unicode)
  14. print 'json_text = ', json_text
  15. print 'json_text len = ', len(json_text), ' , size = ', sys.getsizeof(json_text)
  16. print 'json_text gbk ', json_text.decode('gbk')
  17. print 'json_text utf-8 ', json_text.decode('utf-8')
  18. #正确使用json模块,utf8是单字节变长编码,处理方式跟ASCII差不多
  19. text_1 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}, ensure_ascii=False)
  20. print 'text_1 is unicode ', isinstance(text_1, unicode)
  21. print 'text_1 = ', text_1
  22. print 'text_1 len = ', len(text_1), ' , size = ', sys.getsizeof(text_1)
  23. print 'text_1 gbk ', text_1.decode('gbk')
  24. print 'text_1 utf-8 ', text_1.decode('utf-8')
  25. #错误用法,注意长度,此时打印出来的unicode转义并不是单个字符而是六个字符!
  26. text_2 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]})
  27. print 'text_2 is unicode ', isinstance(text_2, unicode)
  28. print 'text_2 = ', text_2
  29. print 'text_2 len = ', len(text_2), ' , size = ', sys.getsizeof(text_2)
  30. #正确将utf8字符串转成unicode的用法,注意python内部unicode一般默认是utf-16
  31. text_3 = json_text.decode('utf-8')
  32. print 'text_3 is unicode ', isinstance(text_3, unicode)
  33. print 'text_3 = ', text_3
  34. print 'text_3 len = ', len(text_3), ' , size = ', sys.getsizeof(text_3)
  35. #可逆的
  36. text_4 = text_3.encode('utf-8')
  37. print 'text_4 is unicode ', isinstance(text_3, unicode)
  38. print 'text_4 = ', text_4
  39. print 'text_4 len = ', len(text_4), ' , size = ', sys.getsizeof(text_4)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement