Advertisement
Guest User

TextIOWrapper callable encoding parameter: pyio diff

a guest
Jun 13th, 2012
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.93 KB | None | 0 0
  1. Re: TextIOWrapper callable encoding parameter
  2. thread on python-ideas@python.org.
  3.  
  4. This is a diff to _pyio.c in the Python-3.2.3 standard library.
  5.  
  6. It is in no way supposed to be a serious patch.
  7.  
  8. It was the minimal changes I could make in order to
  9. see if my suggestion to allow a callable encoding parameter
  10. in TextIOWrapper was feasible, and allow some timing tests.
  11.  
  12. I am quite sure it will not pass the Python's tests.
  13.  
  14. It does I hope give some idea of the nature and scale of the
  15. code changes needed to implement a callable encodign parameter.
  16.  
  17. --------------------------------
  18.  
  19. --- /usr/lib/python3.2/_pyio.py 2012-04-13 18:26:04.000000000 -0600
  20. +++ _pyio.py 2012-06-10 12:16:58.745853794 -0600
  21. @@ -2,7 +2,7 @@
  22. Python implementation of the io module.
  23. """
  24.  
  25. -import os
  26. +import os, pdb
  27. import abc
  28. import codecs
  29. import warnings
  30. @@ -152,7 +152,7 @@
  31. raise TypeError("invalid mode: %r" % mode)
  32. if not isinstance(buffering, int):
  33. raise TypeError("invalid buffering: %r" % buffering)
  34. - if encoding is not None and not isinstance(encoding, str):
  35. + if encoding is not None and not isinstance(encoding, str) and not callable(encoding):
  36. raise TypeError("invalid encoding: %r" % encoding)
  37. if errors is not None and not isinstance(errors, str):
  38. raise TypeError("invalid errors: %r" % errors)
  39. @@ -1490,7 +1490,7 @@
  40. else:
  41. encoding = locale.getpreferredencoding()
  42.  
  43. - if not isinstance(encoding, str):
  44. + if not isinstance(encoding, str) and not callable (encoding):
  45. raise ValueError("invalid encoding: %r" % encoding)
  46.  
  47. if errors is None:
  48. @@ -1501,7 +1501,12 @@
  49.  
  50. self._buffer = buffer
  51. self._line_buffering = line_buffering
  52. - self._encoding = encoding
  53. + if callable (encoding):
  54. + self._decoding_hook = encoding
  55. + self._encoding = None
  56. + else:
  57. + self._decoding_hook = None
  58. + self._encoding = encoding
  59. self._errors = errors
  60. self._readuniversal = not newline
  61. self._readtranslate = newline is None
  62. @@ -1668,8 +1673,21 @@
  63. # some of it may remain buffered in the decoder, yet to be
  64. # converted.
  65.  
  66. - if self._decoder is None:
  67. - raise ValueError("no decoder")
  68. +# if self._decoder is None:
  69. +# raise ValueError("no decoder")
  70. +
  71. + # Read a chunk.
  72. + if self._has_read1:
  73. + input_chunk = self.buffer.read1(self._CHUNK_SIZE)
  74. + else:
  75. + input_chunk = self.buffer.read(self._CHUNK_SIZE)
  76. + eof = not input_chunk
  77. +
  78. + # If no encoding known yet, call the decoding hook to get it.
  79. + if not self._encoding:
  80. + self._encoding = self._decoding_hook (input_chunk, self._buffer)
  81. + if not self._decoder: self._decoder = self._get_decoder()
  82. + if not self._decoder: raise ValueError("no decoder")
  83.  
  84. if self._telling:
  85. # To prepare for tell(), we need to snapshot a point in the
  86. @@ -1679,12 +1697,7 @@
  87. # Given this, we know there was a valid snapshot point
  88. # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
  89.  
  90. - # Read a chunk, decode it, and put the result in self._decoded_chars.
  91. - if self._has_read1:
  92. - input_chunk = self.buffer.read1(self._CHUNK_SIZE)
  93. - else:
  94. - input_chunk = self.buffer.read(self._CHUNK_SIZE)
  95. - eof = not input_chunk
  96. + # Decode the data and put the result in self._decoded_chars
  97. self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
  98.  
  99. if self._telling:
  100. @@ -1864,15 +1877,20 @@
  101. self._checkReadable()
  102. if n is None:
  103. n = -1
  104. - decoder = self._decoder or self._get_decoder()
  105. + if self._encoding:
  106. + decoder = self._decoder or self._get_decoder()
  107. try:
  108. n.__index__
  109. except AttributeError as err:
  110. raise TypeError("an integer is required") from err
  111. if n < 0:
  112. # Read everything.
  113. + data = self.buffer.read()
  114. + if not self._encoding:
  115. + self._encoding = self._decoding_hook (data, self._buffer)
  116. + decoder = self._get_decoder()
  117. result = (self._get_decoded_chars() +
  118. - decoder.decode(self.buffer.read(), final=True))
  119. + decoder.decode(data, final=True))
  120. self._set_decoded_chars('')
  121. self._snapshot = None
  122. return result
  123. @@ -1907,8 +1925,9 @@
  124.  
  125. start = 0
  126. # Make the decoder if it doesn't already exist.
  127. - if not self._decoder:
  128. - self._get_decoder()
  129. + # [but no need because read_chunk will do it.]
  130. + #if not self._decoder:
  131. + # self._get_decoder()
  132.  
  133. pos = endpos = None
  134. while True:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement