Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Re: TextIOWrapper callable encoding parameter
- thread on python-ideas@python.org.
- This is a diff to _pyio.c in the Python-3.2.3 standard library.
- It is in no way supposed to be a serious patch.
- It was the minimal changes I could make in order to
- see if my suggestion to allow a callable encoding parameter
- in TextIOWrapper was feasible, and allow some timing tests.
- I am quite sure it will not pass the Python's tests.
- It does I hope give some idea of the nature and scale of the
- code changes needed to implement a callable encodign parameter.
- --------------------------------
- --- /usr/lib/python3.2/_pyio.py 2012-04-13 18:26:04.000000000 -0600
- +++ _pyio.py 2012-06-10 12:16:58.745853794 -0600
- @@ -2,7 +2,7 @@
- Python implementation of the io module.
- """
- -import os
- +import os, pdb
- import abc
- import codecs
- import warnings
- @@ -152,7 +152,7 @@
- raise TypeError("invalid mode: %r" % mode)
- if not isinstance(buffering, int):
- raise TypeError("invalid buffering: %r" % buffering)
- - if encoding is not None and not isinstance(encoding, str):
- + if encoding is not None and not isinstance(encoding, str) and not callable(encoding):
- raise TypeError("invalid encoding: %r" % encoding)
- if errors is not None and not isinstance(errors, str):
- raise TypeError("invalid errors: %r" % errors)
- @@ -1490,7 +1490,7 @@
- else:
- encoding = locale.getpreferredencoding()
- - if not isinstance(encoding, str):
- + if not isinstance(encoding, str) and not callable (encoding):
- raise ValueError("invalid encoding: %r" % encoding)
- if errors is None:
- @@ -1501,7 +1501,12 @@
- self._buffer = buffer
- self._line_buffering = line_buffering
- - self._encoding = encoding
- + if callable (encoding):
- + self._decoding_hook = encoding
- + self._encoding = None
- + else:
- + self._decoding_hook = None
- + self._encoding = encoding
- self._errors = errors
- self._readuniversal = not newline
- self._readtranslate = newline is None
- @@ -1668,8 +1673,21 @@
- # some of it may remain buffered in the decoder, yet to be
- # converted.
- - if self._decoder is None:
- - raise ValueError("no decoder")
- +# if self._decoder is None:
- +# raise ValueError("no decoder")
- +
- + # Read a chunk.
- + if self._has_read1:
- + input_chunk = self.buffer.read1(self._CHUNK_SIZE)
- + else:
- + input_chunk = self.buffer.read(self._CHUNK_SIZE)
- + eof = not input_chunk
- +
- + # If no encoding known yet, call the decoding hook to get it.
- + if not self._encoding:
- + self._encoding = self._decoding_hook (input_chunk, self._buffer)
- + if not self._decoder: self._decoder = self._get_decoder()
- + if not self._decoder: raise ValueError("no decoder")
- if self._telling:
- # To prepare for tell(), we need to snapshot a point in the
- @@ -1679,12 +1697,7 @@
- # Given this, we know there was a valid snapshot point
- # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
- - # Read a chunk, decode it, and put the result in self._decoded_chars.
- - if self._has_read1:
- - input_chunk = self.buffer.read1(self._CHUNK_SIZE)
- - else:
- - input_chunk = self.buffer.read(self._CHUNK_SIZE)
- - eof = not input_chunk
- + # Decode the data and put the result in self._decoded_chars
- self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
- if self._telling:
- @@ -1864,15 +1877,20 @@
- self._checkReadable()
- if n is None:
- n = -1
- - decoder = self._decoder or self._get_decoder()
- + if self._encoding:
- + decoder = self._decoder or self._get_decoder()
- try:
- n.__index__
- except AttributeError as err:
- raise TypeError("an integer is required") from err
- if n < 0:
- # Read everything.
- + data = self.buffer.read()
- + if not self._encoding:
- + self._encoding = self._decoding_hook (data, self._buffer)
- + decoder = self._get_decoder()
- result = (self._get_decoded_chars() +
- - decoder.decode(self.buffer.read(), final=True))
- + decoder.decode(data, final=True))
- self._set_decoded_chars('')
- self._snapshot = None
- return result
- @@ -1907,8 +1925,9 @@
- start = 0
- # Make the decoder if it doesn't already exist.
- - if not self._decoder:
- - self._get_decoder()
- + # [but no need because read_chunk will do it.]
- + #if not self._decoder:
- + # self._get_decoder()
- pos = endpos = None
- while True:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement