Re: TextIOWrapper callable encoding parameter
thread on python-ideas@python.org.
This is a diff to _pyio.c in the Python-3.2.3 standard library.
It is in no way supposed to be a serious patch.
It was the minimal changes I could make in order to
see if my suggestion to allow a callable encoding parameter
in TextIOWrapper was feasible, and allow some timing tests.
I am quite sure it will not pass the Python's tests.
It does I hope give some idea of the nature and scale of the
code changes needed to implement a callable encodign parameter.
--------------------------------
--- /usr/lib/python3.2/_pyio.py 2012-04-13 18:26:04.000000000 -0600
+++ _pyio.py 2012-06-10 12:16:58.745853794 -0600
@@ -2,7 +2,7 @@
Python implementation of the io module.
"""
-import os
+import os, pdb
import abc
import codecs
import warnings
@@ -152,7 +152,7 @@
raise TypeError("invalid mode: %r" % mode)
if not isinstance(buffering, int):
raise TypeError("invalid buffering: %r" % buffering)
- if encoding is not None and not isinstance(encoding, str):
+ if encoding is not None and not isinstance(encoding, str) and not callable(encoding):
raise TypeError("invalid encoding: %r" % encoding)
if errors is not None and not isinstance(errors, str):
raise TypeError("invalid errors: %r" % errors)
@@ -1490,7 +1490,7 @@
else:
encoding = locale.getpreferredencoding()
- if not isinstance(encoding, str):
+ if not isinstance(encoding, str) and not callable (encoding):
raise ValueError("invalid encoding: %r" % encoding)
if errors is None:
@@ -1501,7 +1501,12 @@
self._buffer = buffer
self._line_buffering = line_buffering
- self._encoding = encoding
+ if callable (encoding):
+ self._decoding_hook = encoding
+ self._encoding = None
+ else:
+ self._decoding_hook = None
+ self._encoding = encoding
self._errors = errors
self._readuniversal = not newline
self._readtranslate = newline is None
@@ -1668,8 +1673,21 @@
# some of it may remain buffered in the decoder, yet to be
# converted.
- if self._decoder is None:
- raise ValueError("no decoder")
+# if self._decoder is None:
+# raise ValueError("no decoder")
+
+ # Read a chunk.
+ if self._has_read1:
+ input_chunk = self.buffer.read1(self._CHUNK_SIZE)
+ else:
+ input_chunk = self.buffer.read(self._CHUNK_SIZE)
+ eof = not input_chunk
+
+ # If no encoding known yet, call the decoding hook to get it.
+ if not self._encoding:
+ self._encoding = self._decoding_hook (input_chunk, self._buffer)
+ if not self._decoder: self._decoder = self._get_decoder()
+ if not self._decoder: raise ValueError("no decoder")
if self._telling:
# To prepare for tell(), we need to snapshot a point in the
@@ -1679,12 +1697,7 @@
# Given this, we know there was a valid snapshot point
# len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
- # Read a chunk, decode it, and put the result in self._decoded_chars.
- if self._has_read1:
- input_chunk = self.buffer.read1(self._CHUNK_SIZE)
- else:
- input_chunk = self.buffer.read(self._CHUNK_SIZE)
- eof = not input_chunk
+ # Decode the data and put the result in self._decoded_chars
self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
if self._telling:
@@ -1864,15 +1877,20 @@
self._checkReadable()
if n is None:
n = -1
- decoder = self._decoder or self._get_decoder()
+ if self._encoding:
+ decoder = self._decoder or self._get_decoder()
try:
n.__index__
except AttributeError as err:
raise TypeError("an integer is required") from err
if n < 0:
# Read everything.
+ data = self.buffer.read()
+ if not self._encoding:
+ self._encoding = self._decoding_hook (data, self._buffer)
+ decoder = self._get_decoder()
result = (self._get_decoded_chars() +
- decoder.decode(self.buffer.read(), final=True))
+ decoder.decode(data, final=True))
self._set_decoded_chars('')
self._snapshot = None
return result
@@ -1907,8 +1925,9 @@
start = 0
# Make the decoder if it doesn't already exist.
- if not self._decoder:
- self._get_decoder()
+ # [but no need because read_chunk will do it.]
+ #if not self._decoder:
+ # self._get_decoder()
pos = endpos = None
while True: