Want more features on Pastebin? Sign Up, it's FREE!
Guest

TextIOWrapper callable encoding parameter: pyio diff

By: a guest on Jun 13th, 2012  |  syntax: None  |  size: 4.93 KB  |  views: 21  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Re: TextIOWrapper callable encoding parameter
  2. thread on python-ideas@python.org.
  3.  
  4. This is a diff to _pyio.c in the Python-3.2.3 standard library.
  5.  
  6. It is in no way supposed to be a serious patch.
  7.  
  8. It was the minimal changes I could make in order to
  9. see if my suggestion to allow a callable encoding parameter
  10. in TextIOWrapper was feasible, and allow some timing tests.
  11.  
  12. I am quite sure it will not pass the Python's tests.
  13.  
  14. It does I hope give some idea of the nature and scale of the
  15. code changes needed to implement a callable encodign parameter.
  16.  
  17. --------------------------------
  18.  
  19. --- /usr/lib/python3.2/_pyio.py 2012-04-13 18:26:04.000000000 -0600
  20. +++ _pyio.py    2012-06-10 12:16:58.745853794 -0600
  21. @@ -2,7 +2,7 @@
  22.  Python implementation of the io module.
  23.  """
  24.  
  25. -import os
  26. +import os, pdb
  27.  import abc
  28.  import codecs
  29.  import warnings
  30. @@ -152,7 +152,7 @@
  31.          raise TypeError("invalid mode: %r" % mode)
  32.      if not isinstance(buffering, int):
  33.          raise TypeError("invalid buffering: %r" % buffering)
  34. -    if encoding is not None and not isinstance(encoding, str):
  35. +    if encoding is not None and not isinstance(encoding, str) and not callable(encoding):
  36.          raise TypeError("invalid encoding: %r" % encoding)
  37.      if errors is not None and not isinstance(errors, str):
  38.          raise TypeError("invalid errors: %r" % errors)
  39. @@ -1490,7 +1490,7 @@
  40.                  else:
  41.                      encoding = locale.getpreferredencoding()
  42.  
  43. -        if not isinstance(encoding, str):
  44. +        if not isinstance(encoding, str) and not callable (encoding):
  45.              raise ValueError("invalid encoding: %r" % encoding)
  46.  
  47.          if errors is None:
  48. @@ -1501,7 +1501,12 @@
  49.  
  50.          self._buffer = buffer
  51.          self._line_buffering = line_buffering
  52. -        self._encoding = encoding
  53. +        if callable (encoding):
  54. +            self._decoding_hook = encoding
  55. +            self._encoding = None
  56. +        else:
  57. +            self._decoding_hook = None
  58. +            self._encoding = encoding
  59.          self._errors = errors
  60.          self._readuniversal = not newline
  61.          self._readtranslate = newline is None
  62. @@ -1668,8 +1673,21 @@
  63.          # some of it may remain buffered in the decoder, yet to be
  64.          # converted.
  65.  
  66. -        if self._decoder is None:
  67. -            raise ValueError("no decoder")
  68. +#        if self._decoder is None:
  69. +#            raise ValueError("no decoder")
  70. +
  71. +        # Read a chunk.
  72. +        if self._has_read1:
  73. +            input_chunk = self.buffer.read1(self._CHUNK_SIZE)
  74. +        else:
  75. +            input_chunk = self.buffer.read(self._CHUNK_SIZE)
  76. +        eof = not input_chunk
  77. +
  78. +        # If no encoding known yet, call the decoding hook to get it.
  79. +        if not self._encoding:
  80. +            self._encoding = self._decoding_hook (input_chunk, self._buffer)
  81. +        if not self._decoder: self._decoder = self._get_decoder()
  82. +        if not self._decoder: raise ValueError("no decoder")
  83.  
  84.          if self._telling:
  85.              # To prepare for tell(), we need to snapshot a point in the
  86. @@ -1679,12 +1697,7 @@
  87.              # Given this, we know there was a valid snapshot point
  88.              # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
  89.  
  90. -        # Read a chunk, decode it, and put the result in self._decoded_chars.
  91. -        if self._has_read1:
  92. -            input_chunk = self.buffer.read1(self._CHUNK_SIZE)
  93. -        else:
  94. -            input_chunk = self.buffer.read(self._CHUNK_SIZE)
  95. -        eof = not input_chunk
  96. +        # Decode the data and put the result in self._decoded_chars
  97.          self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
  98.  
  99.          if self._telling:
  100. @@ -1864,15 +1877,20 @@
  101.          self._checkReadable()
  102.          if n is None:
  103.              n = -1
  104. -        decoder = self._decoder or self._get_decoder()
  105. +        if self._encoding:
  106. +            decoder = self._decoder or self._get_decoder()
  107.          try:
  108.              n.__index__
  109.          except AttributeError as err:
  110.              raise TypeError("an integer is required") from err
  111.          if n < 0:
  112.              # Read everything.
  113. +            data = self.buffer.read()
  114. +            if not self._encoding:
  115. +                self._encoding = self._decoding_hook (data, self._buffer)
  116. +                decoder = self._get_decoder()
  117.              result = (self._get_decoded_chars() +
  118. -                      decoder.decode(self.buffer.read(), final=True))
  119. +                      decoder.decode(data, final=True))
  120.              self._set_decoded_chars('')
  121.              self._snapshot = None
  122.              return result
  123. @@ -1907,8 +1925,9 @@
  124.  
  125.          start = 0
  126.          # Make the decoder if it doesn't already exist.
  127. -        if not self._decoder:
  128. -            self._get_decoder()
  129. +        # [but no need because read_chunk will do it.]
  130. +        #if not self._decoder:
  131. +        #    self._get_decoder()
  132.  
  133.          pos = endpos = None
  134.          while True:
clone this paste RAW Paste Data