Advertisement
Guest User

hunnyb Lirbary

a guest
Sep 18th, 2014
277
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.36 KB | None | 0 0
  1. """
  2. ===================
  3. HunnyB (de|en)coder
  4. ===================
  5.  
  6. Something like "Bencode remixed"
  7. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8.  
  9. HunnyB implements the `bencode`_ encoding/decoding originally
  10. created by `Petru Paler`_ for use in the guts of `BitTorrent`_,
  11. brainchild of `Bram Cohen`_.  
  12.        
  13.        >>> import hunnyb
  14.  
  15. Import of the hunnyb module will register the encode/decode functions
  16. with the standard library `codecs`_ module, meaning strings may be
  17. encoded in one of the following ways:
  18.  
  19.        >>> "foobaz hambones".encode('hunnyb')
  20.        '15:foobaz hambones'
  21.        
  22.        >>> "foobaz hambones".encode('hb')
  23.        '15:foobaz hambones'
  24.        
  25.        >>> "foobaz hambones".encode('bencode')
  26.        '15:foobaz hambones'
  27.        
  28.        >>> "foobaz hambones".encode('b')
  29.        '15:foobaz hambones'
  30.  
  31.  
  32. Likewise, bencoded strings may be decoded, although the result will always
  33. be a string (a requirement of `codecs`_), meaning one will have to
  34. ``eval()`` said result if not of string type.
  35.  
  36.        >>> enc_str = "ForkingHam BIZZYBONE RazzMATAZZ".encode('hb')
  37.  
  38.        >>> print enc_str
  39.        31:ForkingHam BIZZYBONE RazzMATAZZ
  40.  
  41.        >>> enc_str.decode('hb')
  42.        'ForkingHam BIZZYBONE RazzMATAZZ'
  43.  
  44.        >>> enc_dict = hunnyb.encode({'foo': 99000, 0: [99, 8, 'bobob']})
  45.  
  46.        >>> print enc_dict
  47.        d1:0li99ei8e5:bobobe3:fooi99000ee
  48.        
  49.        >>> enc_dict.decode('bencode')
  50.        "{'0': [99, 8, 'bobob'], 'foo': 99000}"
  51.  
  52.  
  53. Alternatively, the ``encode`` and ``decode`` functions available in
  54. ``hunnyb`` may be used directly, with decoding always returning a
  55. given object's Python equivalent.
  56.  
  57.        >>> hunnyb.decode(enc_dict)
  58.        {'0': [99, 8, 'bobob'], 'foo': 99000}
  59.  
  60.  
  61. .. _bencode: http://en.wikipedia.org/wiki/Bencode
  62. .. _Petru Paler: http://petru.paler.net/
  63. .. _BitTorrent: http://www.bittorrent.com/what-is-bittorrent
  64. .. _Bram Cohen: http://en.wikipedia.org/wiki/Bram_Cohen
  65. .. _codecs: http://docs.python.org/lib/module-codecs.html
  66. .. vim:filetype=rst
  67. """
  68. # 2008 Dan Buch daniel.buch@gmail.com - Licensed MIT
  69.        
  70. import codecs as _codecs
  71. from encodings import aliases as _aliases
  72.  
  73.  
  74. HUNNYB_ENC_NAME = "hunnyb"
  75. HB = 'hb'
  76. HB_ALIASES = (HB, 'bencode', 'benc', 'b')
  77. INT_BEGIN = 'i'
  78. LIST_BEGIN = 'l'
  79. DICT_BEGIN = 'd'
  80. STR_BEGIN0 = '0'
  81. STR_BEGIN1 = '1'
  82. STR_BEGIN2 = '2'
  83. STR_BEGIN3 = '3'
  84. STR_BEGIN4 = '4'
  85. STR_BEGIN5 = '5'
  86. STR_BEGIN6 = '6'
  87. STR_BEGIN7 = '7'
  88. STR_BEGIN8 = '8'
  89. STR_BEGIN9 = '9'
  90. ENC_END = 'e'
  91. ENC_JOIN = ':'
  92. _DECODE_FUNCS_CACHE = {}
  93. _ENCODE_FUNCS_CACHE = {}
  94. INT = 0
  95. LNG = 1
  96. STR = 2
  97. LST = 3
  98. TUP = 4
  99. DCT = 5
  100. BOO = 6
  101.  
  102.  
  103. class HunnyBError(Exception):
  104.     pass
  105.  
  106.  
  107. class HunnyBDecodingError(HunnyBError):
  108.     pass
  109.  
  110.  
  111. def _hunnyb_search_func(name):
  112.     """search function required by ``codecs.register``"""
  113.     if name in (HUNNYB_ENC_NAME,) + HB_ALIASES:
  114.         return (_encode, _decode, None, None)
  115.  
  116.  
  117. def _label_duck(obj):
  118.     if isinstance(obj, basestring):
  119.         return STR
  120.     elif str(obj).isdigit():
  121.         return INT
  122.     elif hasattr(obj, 'append') and hasattr(obj, 'index'):
  123.         return LST
  124.     elif hasattr(obj, '__iter__') and not hasattr(obj, 'append') \
  125.             and not hasattr(obj, 'items'):
  126.         return TUP
  127.     elif hasattr(obj, 'items') and hasattr(obj, 'keys') \
  128.             and hasattr(obj, 'values'):
  129.         return DCT
  130.     elif str(obj) in ('True', 'False'):
  131.         return BOO
  132.     else:
  133.         raise HunnyBError("not an encodeable object: " + str(obj))
  134.  
  135.  
  136. def _decode_int(obj, count):
  137.     count += 1
  138.     new_count = obj.index(ENC_END, count)
  139.     num = int(obj[count:new_count])
  140.     if obj[count] == '-':
  141.         if obj[count + 1] == STR_BEGIN0:
  142.             raise HunnyBDecodingError
  143.     elif obj[count] == STR_BEGIN0 and new_count != count + 1:
  144.         raise HunnyBDecodingError
  145.     return (num, new_count + 1)
  146.  
  147.  
  148. def _decode_string(obj, count):
  149.     colon = obj.index(ENC_JOIN, count)
  150.     num = int(obj[count:colon])
  151.     if obj[count] == STR_BEGIN0 and colon != count + 1:
  152.         raise HunnyBDecodingError
  153.     colon += 1
  154.     return (obj[colon:colon + num], colon + num)
  155.  
  156.  
  157. def _decode_list(obj, count, dec_funcs=_DECODE_FUNCS_CACHE):
  158.     buf = []
  159.     count += 1
  160.     while obj[count] != ENC_END:
  161.         item, count = dec_funcs[obj[count]](obj, count)
  162.         buf.append(item)
  163.     return (buf, count + 1)
  164.  
  165.  
  166. def _decode_dict(obj, count, dec_str=_decode_string,
  167.         dec_funcs=_DECODE_FUNCS_CACHE):
  168.     ret = {}
  169.     count += 1
  170.     while obj[count] != ENC_END:
  171.         key, count = dec_str(obj, count)
  172.         ret[key], count = dec_funcs[obj[count]](obj, count)
  173.     return (ret, count + 1)
  174.  
  175.  
  176. def _decode(obj, decode_funcs=_DECODE_FUNCS_CACHE, stringify=True):
  177.     try:
  178.         ret, length = decode_funcs[obj[0]](obj, 0)
  179.     except HunnyBDecodingError:
  180.         raise HunnyBError("not a hunnyb-encoded string")
  181.     if length != len(obj):
  182.         raise HunnyBError("not a valid encoded value")
  183.     if stringify:
  184.         return (str(ret), len(ret))
  185.     else:
  186.         return (ret, len(ret))
  187.  
  188.  
  189. def decode(obj, decode_funcs=_DECODE_FUNCS_CACHE, dec=_decode):
  190.     """decode bencoded string, returning python object"""
  191.     return dec(obj, decode_funcs, False)[0]
  192.  
  193.  
  194. def _encode_int(obj, buf):
  195.     buf.extend(['i', str(obj), 'e'])
  196.  
  197.  
  198. def _encode_bool(obj, buf, enc_int=_encode_int):
  199.     if obj:
  200.         enc_int(1, buf)
  201.     else:
  202.         enc_int(0, buf)
  203.  
  204.        
  205. def _encode_string(obj, buf):
  206.     buf.extend([str(len(obj)), ':', obj])
  207.  
  208.  
  209. def _encode_list(obj, buf, enc_funcs=_ENCODE_FUNCS_CACHE):
  210.     buf.append('l')
  211.     for item in obj:
  212.         enc_funcs[_label_duck(item)](item, buf)
  213.     buf.append('e')
  214.  
  215.  
  216. def _encode_dict(obj, buf, enc_funcs=_ENCODE_FUNCS_CACHE):
  217.     buf.append('d')
  218.     for key, val in sorted(obj.items()):
  219.         buf.extend([str(len(str(key))), ':', key])
  220.         enc_funcs[_label_duck(val)](val, buf)
  221.     buf.append('e')
  222.  
  223.  
  224. def _encode(obj, enc_funcs=_ENCODE_FUNCS_CACHE):
  225.     lstbuf = []
  226.     enc_funcs[_label_duck(obj)](obj, lstbuf)
  227.     ret = ''.join([str(i) for i in lstbuf])
  228.     return (ret, len(ret))
  229.  
  230.  
  231. def encode(obj, enc_funcs=_ENCODE_FUNCS_CACHE, enc=_encode):
  232.     """encode given object, returning bencoded string"""
  233.     return enc(obj, enc_funcs)[0]
  234.  
  235.  
  236. # register with codecs, aliases set in encodings.aliases
  237. _codecs.register(_hunnyb_search_func)
  238. _aliases.aliases.update(
  239.     dict([(__a, HUNNYB_ENC_NAME) for __a in HB_ALIASES]))
  240.  
  241. # function mappings
  242. _DECODE_FUNCS_CACHE[INT_BEGIN] = _decode_int
  243. _DECODE_FUNCS_CACHE[LIST_BEGIN] = _decode_list
  244. _DECODE_FUNCS_CACHE[DICT_BEGIN] = _decode_dict
  245. _DECODE_FUNCS_CACHE[STR_BEGIN0] = _decode_string
  246. _DECODE_FUNCS_CACHE[STR_BEGIN1] = _decode_string
  247. _DECODE_FUNCS_CACHE[STR_BEGIN2] = _decode_string
  248. _DECODE_FUNCS_CACHE[STR_BEGIN3] = _decode_string
  249. _DECODE_FUNCS_CACHE[STR_BEGIN4] = _decode_string
  250. _DECODE_FUNCS_CACHE[STR_BEGIN5] = _decode_string
  251. _DECODE_FUNCS_CACHE[STR_BEGIN6] = _decode_string
  252. _DECODE_FUNCS_CACHE[STR_BEGIN7] = _decode_string
  253. _DECODE_FUNCS_CACHE[STR_BEGIN8] = _decode_string
  254. _DECODE_FUNCS_CACHE[STR_BEGIN9] = _decode_string
  255. _ENCODE_FUNCS_CACHE[INT] = _encode_int
  256. _ENCODE_FUNCS_CACHE[BOO] = _encode_bool
  257. _ENCODE_FUNCS_CACHE[LNG] = _encode_int
  258. _ENCODE_FUNCS_CACHE[STR] = _encode_string
  259. _ENCODE_FUNCS_CACHE[LST] = _encode_list
  260. _ENCODE_FUNCS_CACHE[TUP] = _encode_list
  261. _ENCODE_FUNCS_CACHE[DCT] = _encode_dict
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement