Re: TextIOWrapper callable encoding parameter thread on python-ideas@python.org. This is a diff to _pyio.c in the Python-3.2.3 standard library. It is in no way supposed to be a serious patch. It was the minimal changes I could make in order to see if my suggestion to allow a callable encoding parameter in TextIOWrapper was feasible, and allow some timing tests. I am quite sure it will not pass the Python's tests. It does I hope give some idea of the nature and scale of the code changes needed to implement a callable encodign parameter. -------------------------------- --- /usr/lib/python3.2/_pyio.py 2012-04-13 18:26:04.000000000 -0600 +++ _pyio.py 2012-06-10 12:16:58.745853794 -0600 @@ -2,7 +2,7 @@ Python implementation of the io module. """ -import os +import os, pdb import abc import codecs import warnings @@ -152,7 +152,7 @@ raise TypeError("invalid mode: %r" % mode) if not isinstance(buffering, int): raise TypeError("invalid buffering: %r" % buffering) - if encoding is not None and not isinstance(encoding, str): + if encoding is not None and not isinstance(encoding, str) and not callable(encoding): raise TypeError("invalid encoding: %r" % encoding) if errors is not None and not isinstance(errors, str): raise TypeError("invalid errors: %r" % errors) @@ -1490,7 +1490,7 @@ else: encoding = locale.getpreferredencoding() - if not isinstance(encoding, str): + if not isinstance(encoding, str) and not callable (encoding): raise ValueError("invalid encoding: %r" % encoding) if errors is None: @@ -1501,7 +1501,12 @@ self._buffer = buffer self._line_buffering = line_buffering - self._encoding = encoding + if callable (encoding): + self._decoding_hook = encoding + self._encoding = None + else: + self._decoding_hook = None + self._encoding = encoding self._errors = errors self._readuniversal = not newline self._readtranslate = newline is None @@ -1668,8 +1673,21 @@ # some of it may remain buffered in the decoder, yet to be # converted. - if self._decoder is None: - raise ValueError("no decoder") +# if self._decoder is None: +# raise ValueError("no decoder") + + # Read a chunk. + if self._has_read1: + input_chunk = self.buffer.read1(self._CHUNK_SIZE) + else: + input_chunk = self.buffer.read(self._CHUNK_SIZE) + eof = not input_chunk + + # If no encoding known yet, call the decoding hook to get it. + if not self._encoding: + self._encoding = self._decoding_hook (input_chunk, self._buffer) + if not self._decoder: self._decoder = self._get_decoder() + if not self._decoder: raise ValueError("no decoder") if self._telling: # To prepare for tell(), we need to snapshot a point in the @@ -1679,12 +1697,7 @@ # Given this, we know there was a valid snapshot point # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). - # Read a chunk, decode it, and put the result in self._decoded_chars. - if self._has_read1: - input_chunk = self.buffer.read1(self._CHUNK_SIZE) - else: - input_chunk = self.buffer.read(self._CHUNK_SIZE) - eof = not input_chunk + # Decode the data and put the result in self._decoded_chars self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) if self._telling: @@ -1864,15 +1877,20 @@ self._checkReadable() if n is None: n = -1 - decoder = self._decoder or self._get_decoder() + if self._encoding: + decoder = self._decoder or self._get_decoder() try: n.__index__ except AttributeError as err: raise TypeError("an integer is required") from err if n < 0: # Read everything. + data = self.buffer.read() + if not self._encoding: + self._encoding = self._decoding_hook (data, self._buffer) + decoder = self._get_decoder() result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) + decoder.decode(data, final=True)) self._set_decoded_chars('') self._snapshot = None return result @@ -1907,8 +1925,9 @@ start = 0 # Make the decoder if it doesn't already exist. - if not self._decoder: - self._get_decoder() + # [but no need because read_chunk will do it.] + #if not self._decoder: + # self._get_decoder() pos = endpos = None while True: