Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import sys
- import json
- import locale
- print sys.getdefaultencoding()
- print sys.getfilesystemencoding()
- print locale.getdefaultlocale()
- #源代码UTF8编码,所以直接写的字符串是UTF8编码
- json_text = '{"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}'
- print 'json_text is unicode ', isinstance(json_text, unicode)
- print 'json_text = ', json_text
- print 'json_text len = ', len(json_text), ' , size = ', sys.getsizeof(json_text)
- print 'json_text gbk ', json_text.decode('gbk')
- print 'json_text utf-8 ', json_text.decode('utf-8')
- #正确使用json模块,utf8是单字节变长编码,处理方式跟ASCII差不多
- text_1 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}, ensure_ascii=False)
- print 'text_1 is unicode ', isinstance(text_1, unicode)
- print 'text_1 = ', text_1
- print 'text_1 len = ', len(text_1), ' , size = ', sys.getsizeof(text_1)
- print 'text_1 gbk ', text_1.decode('gbk')
- print 'text_1 utf-8 ', text_1.decode('utf-8')
- #错误用法,注意长度,此时打印出来的unicode转义并不是单个字符而是六个字符!
- text_2 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]})
- print 'text_2 is unicode ', isinstance(text_2, unicode)
- print 'text_2 = ', text_2
- print 'text_2 len = ', len(text_2), ' , size = ', sys.getsizeof(text_2)
- #正确将utf8字符串转成unicode的用法,注意python内部unicode一般默认是utf-16
- text_3 = json_text.decode('utf-8')
- print 'text_3 is unicode ', isinstance(text_3, unicode)
- print 'text_3 = ', text_3
- print 'text_3 len = ', len(text_3), ' , size = ', sys.getsizeof(text_3)
- #可逆的
- text_4 = text_3.encode('utf-8')
- print 'text_4 is unicode ', isinstance(text_3, unicode)
- print 'text_4 = ', text_4
- print 'text_4 len = ', len(text_4), ' , size = ', sys.getsizeof(text_4)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement