Untitled

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import json
import locale

print sys.getdefaultencoding()
print sys.getfilesystemencoding()
print locale.getdefaultlocale()

#源代码UTF8编码，所以直接写的字符串是UTF8编码
json_text = '{"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}'
print 'json_text is unicode ', isinstance(json_text, unicode)
print 'json_text = ', json_text
print 'json_text len = ', len(json_text), ' , size = ', sys.getsizeof(json_text)
print 'json_text gbk ', json_text.decode('gbk')
print 'json_text utf-8 ', json_text.decode('utf-8')
#正确使用json模块，utf8是单字节变长编码，处理方式跟ASCII差不多
text_1 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]}, ensure_ascii=False)
print 'text_1 is unicode ', isinstance(text_1, unicode)
print 'text_1 = ', text_1
print 'text_1 len = ', len(text_1), ' , size = ', sys.getsizeof(text_1)
print 'text_1 gbk ', text_1.decode('gbk')
print 'text_1 utf-8 ', text_1.decode('utf-8')
#错误用法，注意长度，此时打印出来的unicode转义并不是单个字符而是六个字符！
text_2 = json.dumps({"count": 2, "list": [{ "id": 1, "name":"乌兰", "gender":"F"}, { "id": 2, "name":"乌兰", "gender":"F"}]})
print 'text_2 is unicode ', isinstance(text_2, unicode)
print 'text_2 = ', text_2
print 'text_2 len = ', len(text_2), ' , size = ', sys.getsizeof(text_2)
#正确将utf8字符串转成unicode的用法，注意python内部unicode一般默认是utf-16
text_3 = json_text.decode('utf-8')
print 'text_3 is unicode ', isinstance(text_3, unicode)
print 'text_3 = ', text_3
print 'text_3 len = ', len(text_3), ' , size = ', sys.getsizeof(text_3)
#可逆的
text_4 = text_3.encode('utf-8')
print 'text_4 is unicode ', isinstance(text_3, unicode)
print 'text_4 = ', text_4
print 'text_4 len = ', len(text_4), ' , size = ', sys.getsizeof(text_4)