Guest User

Untitled

a guest
May 26th, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 39.98 KB | None | 0 0
  1. Index: encoding.c
  2. ===================================================================
  3. --- encoding.c (revision 4140)
  4. +++ encoding.c (working copy)
  5. @@ -146,6 +146,32 @@ mr_enc_dummy_p(VALUE self, SEL sel)
  6. return Qfalse;
  7. }
  8.  
  9. +// For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
  10. +rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
  11. +{
  12. + rb_str_t *replacement_str = NULL;
  13. + if (destination == rb_encodings[ENCODING_UTF16BE]) {
  14. + replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
  15. + }
  16. + else if (destination == rb_encodings[ENCODING_UTF32BE]) {
  17. + replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
  18. + }
  19. + else if (destination == rb_encodings[ENCODING_UTF16LE]) {
  20. + replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
  21. + }
  22. + else if (destination == rb_encodings[ENCODING_UTF32LE]) {
  23. + replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
  24. + }
  25. + else if (destination == rb_encodings[ENCODING_UTF8]) {
  26. + replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
  27. + }
  28. + else {
  29. + replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
  30. + replacement_str = str_simple_transcode(replacement_str, destination);
  31. + }
  32. + return replacement_str;
  33. +}
  34. +
  35. static void
  36. define_encoding_constant(const char *name, rb_encoding_t *encoding)
  37. {
  38. @@ -291,6 +317,7 @@ Init_PreEncoding(void)
  39. add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
  40. // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
  41. add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
  42. + add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
  43. //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
  44. //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
  45. //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
  46. Index: encoding.h
  47. ===================================================================
  48. --- encoding.h (revision 4140)
  49. +++ encoding.h (working copy)
  50. @@ -148,7 +148,7 @@ enum {
  51. ENCODING_MACCYRILLIC,
  52. ENCODING_BIG5,
  53. ENCODING_EUCJP,
  54. - //ENCODING_SJIS,
  55. + ENCODING_SJIS,
  56. //ENCODING_CP932,
  57.  
  58. ENCODINGS_COUNT
  59. @@ -293,6 +293,40 @@ str_set_valid_encoding(rb_str_t *self, bool status)
  60. STRING_VALID_ENCODING);
  61. }
  62.  
  63. +typedef enum {
  64. + TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
  65. + TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING,
  66. + TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
  67. + TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
  68. +} transcode_behavior_t;
  69. +
  70. +typedef enum {
  71. + ECONV_INVALID_MASK = 1,
  72. + ECONV_INVALID_REPLACE = 1 << 1,
  73. + ECONV_UNDEF_MASK = 1 << 2,
  74. + ECONV_UNDEF_REPLACE = 1 << 3,
  75. + ECONV_UNDEF_HEX_CHARREF = 1 << 4,
  76. + ECONV_PARTIAL_INPUT = 1 << 5,
  77. + ECONV_AFTER_OUTPUT = 1 << 6,
  78. + ECONV_UNIVERSAL_NEWLINE_DECORATOR = 1 << 7,
  79. + ECONV_CRLF_NEWLINE_DECORATOR = 1 << 8,
  80. + ECONV_CR_NEWLINE_DECORATOR = 1 << 9,
  81. + ECONV_XML_TEXT_DECORATOR = 1 << 10,
  82. + ECONV_XML_ATTR_CONTENT_DECORATOR = 1 << 11,
  83. + ECONV_XML_ATTR_QUOTE_DECORATOR = 1 << 12
  84. +} transcode_flags_t;
  85. +
  86. +rb_str_t *str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
  87. + int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
  88. +
  89. +static inline rb_str_t *
  90. +str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
  91. +{
  92. + return str_transcode(self, self->encoding, dst_encoding,
  93. + TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
  94. +}
  95. +
  96. +
  97. void rb_str_NSCoder_encode(void *coder, VALUE str, const char *key);
  98. VALUE rb_str_NSCoder_decode(void *coder, const char *key);
  99.  
  100. @@ -319,6 +353,10 @@ unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
  101. long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
  102. long *end_offset);
  103. void rb_str_force_encoding(VALUE str, rb_encoding_t *encoding);
  104. +rb_str_t *str_need_string(VALUE str);
  105. +rb_str_t *replacement_string_for_encoding(rb_encoding_t* enc);
  106. +void str_replace_with_string(rb_str_t *self, rb_str_t *source);
  107. +
  108.  
  109. #if defined(__cplusplus)
  110. } // extern "C"
  111. Index: inits.c
  112. ===================================================================
  113. --- inits.c (revision 4140)
  114. +++ inits.c (working copy)
  115. @@ -58,6 +58,7 @@ void Init_ObjC(void);
  116. void Init_BridgeSupport(void);
  117. void Init_FFI(void);
  118. void Init_Dispatch(void);
  119. +void Init_Transcode(void);
  120. void Init_PostVM(void);
  121.  
  122. void
  123. @@ -110,5 +111,6 @@ rb_call_inits()
  124. Init_BridgeSupport();
  125. Init_FFI();
  126. Init_Dispatch();
  127. + Init_Transcode();
  128. Init_PostVM();
  129. }
  130. Index: rakelib/builder/builder.rb
  131. ===================================================================
  132. --- rakelib/builder/builder.rb (revision 4140)
  133. +++ rakelib/builder/builder.rb (working copy)
  134. @@ -6,7 +6,7 @@ OBJS = %w{
  135. random range rational re ruby signal sprintf st string struct time
  136. util variable version thread id objc bs ucnv encoding main dln dmyext marshal
  137. gcd vm_eval gc-stub bridgesupport compiler dispatcher vm symbol debugger MacRuby
  138. - MacRubyDebuggerConnector NSArray NSDictionary NSString
  139. + MacRubyDebuggerConnector NSArray NSDictionary NSString transcode
  140. }
  141.  
  142. EXTENSIONS = %w{
  143. Index: spec/frozen/tags/macruby/core/encoding/converter/asciicompat_encoding_tags.txt
  144. ===================================================================
  145. --- spec/frozen/tags/macruby/core/encoding/converter/asciicompat_encoding_tags.txt (revision 4140)
  146. +++ spec/frozen/tags/macruby/core/encoding/converter/asciicompat_encoding_tags.txt (working copy)
  147. @@ -1,7 +1,4 @@
  148. -fails:Encoding::Converter.asciicompat_encoding accepts an encoding name as a String argument
  149. fails:Encoding::Converter.asciicompat_encoding coerces non-String/Encoding objects with #to_str
  150. fails:Encoding::Converter.asciicompat_encoding accepts an Encoding object as an argument
  151. fails:Encoding::Converter.asciicompat_encoding returns a corresponding ASCII compatible encoding for ASCII-incompatible encodings
  152. -fails:Encoding::Converter.asciicompat_encoding returns nil when the given encoding is ASCII compatible
  153. fails:Encoding::Converter.asciicompat_encoding handles encoding names who resolve to nil encodings
  154. -fails:Encoding::Converter.asciicompat_encoding returns nil if called with an encoding it returned previously
  155. Index: spec/frozen/tags/macruby/core/encoding/converter/constants_tags.txt
  156. deleted file mode 100644
  157. ===================================================================
  158. --- spec/frozen/tags/macruby/core/encoding/converter/constants_tags.txt (revision 4140)
  159. +++ /dev/null (working copy)
  160. @@ -1,26 +0,0 @@
  161. -fails:Encoding::Converter::INVALID_MASK exists
  162. -fails:Encoding::Converter::INVALID_MASK has a Fixnum value
  163. -fails:Encoding::Converter::INVALID_REPLACE exists
  164. -fails:Encoding::Converter::INVALID_REPLACE has a Fixnum value
  165. -fails:Encoding::Converter::UNDEF_MASK exists
  166. -fails:Encoding::Converter::UNDEF_MASK has a Fixnum value
  167. -fails:Encoding::Converter::UNDEF_REPLACE exists
  168. -fails:Encoding::Converter::UNDEF_REPLACE has a Fixnum value
  169. -fails:Encoding::Converter::UNDEF_HEX_CHARREF exists
  170. -fails:Encoding::Converter::UNDEF_HEX_CHARREF has a Fixnum value
  171. -fails:Encoding::Converter::PARTIAL_INPUT exists
  172. -fails:Encoding::Converter::PARTIAL_INPUT has a Fixnum value
  173. -fails:Encoding::Converter::AFTER_OUTPUT exists
  174. -fails:Encoding::Converter::AFTER_OUTPUT has a Fixnum value
  175. -fails:Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR exists
  176. -fails:Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR has a Fixnum value
  177. -fails:Encoding::Converter::CRLF_NEWLINE_DECORATOR exists
  178. -fails:Encoding::Converter::CRLF_NEWLINE_DECORATOR has a Fixnum value
  179. -fails:Encoding::Converter::CR_NEWLINE_DECORATOR exists
  180. -fails:Encoding::Converter::CR_NEWLINE_DECORATOR has a Fixnum value
  181. -fails:Encoding::Converter::XML_TEXT_DECORATOR exists
  182. -fails:Encoding::Converter::XML_TEXT_DECORATOR has a Fixnum value
  183. -fails:Encoding::Converter::XML_ATTR_CONTENT_DECORATOR exists
  184. -fails:Encoding::Converter::XML_ATTR_CONTENT_DECORATOR has a Fixnum value
  185. -fails:Encoding::Converter::XML_ATTR_QUOTE_DECORATOR exists
  186. -fails:Encoding::Converter::XML_ATTR_QUOTE_DECORATOR has a Fixnum value
  187. Index: spec/frozen/tags/macruby/core/encoding/converter/convert_tags.txt
  188. ===================================================================
  189. --- spec/frozen/tags/macruby/core/encoding/converter/convert_tags.txt (revision 4140)
  190. +++ spec/frozen/tags/macruby/core/encoding/converter/convert_tags.txt (working copy)
  191. @@ -1,7 +1,2 @@
  192. -fails:Encoding::Converter#convert returns a String
  193. -fails:Encoding::Converter#convert sets the encoding of the result to the target encoding
  194. -fails:Encoding::Converter#convert transcodes the given String to the target encoding
  195. fails:Encoding::Converter#convert allows Strings of different encodings to the source encoding
  196. -fails:Encoding::Converter#convert reuses the given encoding pair if called multiple times
  197. -fails:Encoding::Converter#convert raises UndefinedConversionError if the String contains characters invalid for the target encoding
  198. -fails:Encoding::Converter#convert raises an ArgumentError if called on a finished stream
  199. +
  200. Index: spec/frozen/tags/macruby/core/encoding/converter/convpath_tags.txt
  201. ===================================================================
  202. --- spec/frozen/tags/macruby/core/encoding/converter/convpath_tags.txt (revision 4140)
  203. +++ spec/frozen/tags/macruby/core/encoding/converter/convpath_tags.txt (working copy)
  204. @@ -1,7 +1,2 @@
  205. -fails:Encoding::Converter#convpath returns an Array
  206. -fails:Encoding::Converter#convpath returns each encoding pair as a sub-Array
  207. -fails:Encoding::Converter#convpath returns each encoding as an Encoding object
  208. fails:Encoding::Converter#convpath returns multiple encoding pairs when direct conversion is impossible
  209. -fails:Encoding::Converter#convpath sets the last element of each pair to the first element of the next
  210. -fails:Encoding::Converter#convpath only lists a source encoding once
  211. fails:Encoding::Converter#convpath indicates if crlf_newline conversion would occur
  212. Index: spec/frozen/tags/macruby/core/encoding/converter/destination_encoding_tags.txt
  213. ===================================================================
  214. --- spec/frozen/tags/macruby/core/encoding/converter/destination_encoding_tags.txt (revision 4140)
  215. +++ spec/frozen/tags/macruby/core/encoding/converter/destination_encoding_tags.txt (working copy)
  216. @@ -1 +1 @@
  217. -fails:Encoding::Converter#destination_encoding returns the destination encoding as an Encoding object
  218. +
  219. Index: spec/frozen/tags/macruby/core/encoding/converter/replacement_tags.txt
  220. ===================================================================
  221. --- spec/frozen/tags/macruby/core/encoding/converter/replacement_tags.txt (revision 4140)
  222. +++ spec/frozen/tags/macruby/core/encoding/converter/replacement_tags.txt (working copy)
  223. @@ -1,8 +1,3 @@
  224. fails:Encoding::Converter#replacement returns '?' in US-ASCII when the destination encoding is not UTF-8
  225. -fails:Encoding::Converter#replacement returns � when the destination encoding is UTF-8
  226. -fails:Encoding::Converter#replacement= accepts a String argument
  227. -fails:Encoding::Converter#replacement= accepts a String argument of arbitrary length
  228. -fails:Encoding::Converter#replacement= raises an TypeError if assigned a non-String argument
  229. -fails:Encoding::Converter#replacement= sets #replacement
  230. fails:Encoding::Converter#replacement= raises an UndefinedConversionError is the argument cannot be converted into the destination encoding
  231. fails:Encoding::Converter#replacement= does not change the replacement character if the argument cannot be converted into the destination encoding
  232. Index: spec/frozen/tags/macruby/core/encoding/converter/search_convpath_tags.txt
  233. ===================================================================
  234. --- spec/frozen/tags/macruby/core/encoding/converter/search_convpath_tags.txt (revision 4140)
  235. +++ spec/frozen/tags/macruby/core/encoding/converter/search_convpath_tags.txt (working copy)
  236. @@ -1,8 +1,3 @@
  237. -fails:Encoding::Converter.search_convpath returns an Array
  238. -fails:Encoding::Converter.search_convpath returns each encoding pair as a sub-Array
  239. -fails:Encoding::Converter.search_convpath returns each encoding as an Encoding object
  240. fails:Encoding::Converter.search_convpath returns multiple encoding pairs when direct conversion is impossible
  241. -fails:Encoding::Converter.search_convpath sets the last element of each pair to the first element of the next
  242. -fails:Encoding::Converter.search_convpath only lists a source encoding once
  243. fails:Encoding::Converter.search_convpath indicates if crlf_newline conversion would occur
  244. fails:Encoding::Converter.search_convpath raises an Encoding::ConverterNotFoundError if no conversion path exists
  245. Index: spec/frozen/tags/macruby/core/encoding/converter/source_encoding_tags.txt
  246. ===================================================================
  247. --- spec/frozen/tags/macruby/core/encoding/converter/source_encoding_tags.txt (revision 4140)
  248. +++ spec/frozen/tags/macruby/core/encoding/converter/source_encoding_tags.txt (working copy)
  249. @@ -1 +1 @@
  250. -fails:Encoding::Converter#source_encoding returns the source encoding as an Encoding object
  251. +
  252. Index: string.c
  253. ===================================================================
  254. --- string.c (revision 4140)
  255. +++ string.c (working copy)
  256. @@ -251,7 +251,7 @@ str_replace_with_bytes(rb_str_t *self, const char *bytes, long len,
  257. }
  258. }
  259.  
  260. -static void
  261. +void
  262. str_replace_with_string(rb_str_t *self, rb_str_t *source)
  263. {
  264. if (self == source) {
  265. @@ -1118,7 +1118,7 @@ str_include_string(rb_str_t *self, rb_str_t *searched)
  266. self->length_in_bytes, true) != -1;
  267. }
  268.  
  269. -static rb_str_t *
  270. +rb_str_t *
  271. str_need_string(VALUE str)
  272. {
  273. switch (TYPE(str)) {
  274. @@ -1247,24 +1247,6 @@ rstr_append(VALUE str, VALUE substr)
  275. }
  276. }
  277.  
  278. -enum {
  279. - TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
  280. - TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING,
  281. - TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
  282. - TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
  283. -};
  284. -
  285. -
  286. -static rb_str_t *
  287. -str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
  288. - int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
  289. -static inline rb_str_t *
  290. -str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
  291. -{
  292. - return str_transcode(self, self->encoding, dst_encoding,
  293. - TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
  294. -}
  295. -
  296. static void inline
  297. str_concat_ascii_cstr(rb_str_t *self, char *cstr)
  298. {
  299. @@ -1280,7 +1262,7 @@ str_concat_ascii_cstr(rb_str_t *self, char *cstr)
  300. }
  301. }
  302.  
  303. -static rb_str_t *
  304. +rb_str_t *
  305. str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
  306. int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str)
  307. {
  308. @@ -1844,165 +1826,6 @@ rstr_is_ascii_only(VALUE self, SEL sel)
  309. return str_is_ruby_ascii_only(RSTR(self)) ? Qtrue : Qfalse;
  310. }
  311.  
  312. -/*
  313. - * call-seq:
  314. - * str.encode(encoding [, options] ) => str
  315. - * str.encode(dst_encoding, src_encoding [, options] ) => str
  316. - * str.encode([options]) => str
  317. - *
  318. - * The first form returns a copy of <i>str</i> transcoded
  319. - * to encoding +encoding+.
  320. - * The second form returns a copy of <i>str</i> transcoded
  321. - * from src_encoding to dst_encoding.
  322. - * The last form returns a copy of <i>str</i> transcoded to
  323. - * <code>Encoding.default_internal</code>.
  324. - * By default, the first and second form raise
  325. - * Encoding::UndefinedConversionError for characters that are
  326. - * undefined in the destination encoding, and
  327. - * Encoding::InvalidByteSequenceError for invalid byte sequences
  328. - * in the source encoding. The last form by default does not raise
  329. - * exceptions but uses replacement strings.
  330. - * The <code>options</code> Hash gives details for conversion.
  331. - *
  332. - * === options
  333. - * The hash <code>options</code> can have the following keys:
  334. - * :invalid ::
  335. - * If the value is <code>:replace</code>, <code>#encode</code> replaces
  336. - * invalid byte sequences in <code>str</code> with the replacement character.
  337. - * The default is to raise the exception
  338. - * :undef ::
  339. - * If the value is <code>:replace</code>, <code>#encode</code> replaces
  340. - * characters which are undefined in the destination encoding with
  341. - * the replacement character.
  342. - * :replace ::
  343. - * Sets the replacement string to the value. The default replacement
  344. - * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
  345. - * :xml ::
  346. - * The value must be <code>:text</code> or <code>:attr</code>.
  347. - * If the value is <code>:text</code> <code>#encode</code> replaces
  348. - * undefined characters with their (upper-case hexadecimal) numeric
  349. - * character references. '&', '<', and '>' are converted to "&",
  350. - * "<", and ">", respectively.
  351. - * If the value is <code>:attr</code>, <code>#encode</code> also quotes
  352. - * the replacement result (using '"'), and replaces '"' with """.
  353. - */
  354. -extern rb_encoding_t *default_internal;
  355. -static VALUE
  356. -rstr_encode(VALUE str, SEL sel, int argc, VALUE *argv)
  357. -{
  358. - VALUE opt = Qnil;
  359. - if (argc > 0) {
  360. - opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
  361. - if (!NIL_P(opt)) {
  362. - argc--;
  363. - }
  364. - }
  365. -
  366. - rb_str_t *self = RSTR(str);
  367. - rb_str_t *replacement_str = NULL;
  368. - rb_encoding_t *src_encoding, *dst_encoding;
  369. - int behavior_for_invalid = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  370. - int behavior_for_undefined = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  371. - if (argc == 0) {
  372. - src_encoding = self->encoding;
  373. - dst_encoding = default_internal;
  374. - behavior_for_invalid = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  375. - behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  376. - }
  377. - else if (argc == 1) {
  378. - src_encoding = self->encoding;
  379. - dst_encoding = rb_to_encoding(argv[0]);
  380. - }
  381. - else if (argc == 2) {
  382. - dst_encoding = rb_to_encoding(argv[0]);
  383. - src_encoding = rb_to_encoding(argv[1]);
  384. - }
  385. - else {
  386. - rb_raise(rb_eArgError, "wrong number of arguments (%d for 0..2)", argc);
  387. - }
  388. -
  389. - if (!NIL_P(opt)) {
  390. - VALUE invalid_val = rb_hash_aref(opt, ID2SYM(rb_intern("invalid")));
  391. - VALUE replace_sym = ID2SYM(rb_intern("replace"));
  392. - if (invalid_val == replace_sym) {
  393. - behavior_for_invalid = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  394. - }
  395. - VALUE undefined_val = rb_hash_aref(opt, ID2SYM(rb_intern("undefined")));
  396. - if (undefined_val == replace_sym) {
  397. - behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  398. - }
  399. - VALUE xml_val = rb_hash_aref(opt, ID2SYM(rb_intern("xml")));
  400. - if (xml_val == ID2SYM(rb_intern("text"))) {
  401. - behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT;
  402. - }
  403. - else if (xml_val == ID2SYM(rb_intern("attr"))) {
  404. - behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR;
  405. - }
  406. -
  407. - VALUE replacement = rb_hash_aref(opt, replace_sym);
  408. - if (!NIL_P(replacement)) {
  409. - replacement_str = str_need_string(replacement);
  410. - if ((replacement_str->encoding != dst_encoding) && (replacement_str->length_in_bytes > 0)) {
  411. - replacement_str = str_simple_transcode(replacement_str, dst_encoding);
  412. - }
  413. - if ((behavior_for_invalid != TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
  414. - && (behavior_for_undefined == TRANSCODE_BEHAVIOR_RAISE_EXCEPTION)) {
  415. - behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  416. - }
  417. - }
  418. - }
  419. -
  420. - if ((replacement_str == NULL)
  421. - && ((behavior_for_invalid == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
  422. - || (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING))) {
  423. - if (dst_encoding == rb_encodings[ENCODING_UTF16BE]) {
  424. - replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, dst_encoding));
  425. - }
  426. - else if (dst_encoding == rb_encodings[ENCODING_UTF32BE]) {
  427. - replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, dst_encoding));
  428. - }
  429. - else if (dst_encoding == rb_encodings[ENCODING_UTF16LE]) {
  430. - replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, dst_encoding));
  431. - }
  432. - else if (dst_encoding == rb_encodings[ENCODING_UTF32LE]) {
  433. - replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, dst_encoding));
  434. - }
  435. - else if (dst_encoding == rb_encodings[ENCODING_UTF8]) {
  436. - replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, dst_encoding));
  437. - }
  438. - else {
  439. - replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
  440. - replacement_str = str_simple_transcode(replacement_str, dst_encoding);
  441. - }
  442. - }
  443. -
  444. - return (VALUE)str_transcode(self, src_encoding, dst_encoding,
  445. - behavior_for_invalid, behavior_for_undefined, replacement_str);
  446. -}
  447. -
  448. -/*
  449. - * call-seq:
  450. - * str.encode!(encoding [, options] ) => str
  451. - * str.encode!(dst_encoding, src_encoding [, options] ) => str
  452. - *
  453. - * The first form transcodes the contents of <i>str</i> from
  454. - * str.encoding to +encoding+.
  455. - * The second form transcodes the contents of <i>str</i> from
  456. - * src_encoding to dst_encoding.
  457. - * The options Hash gives details for conversion. See String#encode
  458. - * for details.
  459. - * Returns the string even if no changes were made.
  460. - */
  461. -static VALUE
  462. -rstr_encode_bang(VALUE str, SEL sel, int argc, VALUE *argv)
  463. -{
  464. - rstr_modify(str);
  465. -
  466. - VALUE new_str = rstr_encode(str, sel, argc, argv);
  467. - str_replace_with_string(RSTR(str), RSTR(new_str));
  468. - return str;
  469. -}
  470. -
  471.  
  472. /*
  473. * call-seq:
  474. @@ -5958,8 +5781,6 @@ Init_String(void)
  475. rb_objc_define_method(rb_cRubyString, "partition", rstr_partition, 1);
  476. rb_objc_define_method(rb_cRubyString, "rpartition", rstr_rpartition, 1);
  477. rb_objc_define_method(rb_cRubyString, "crypt", rstr_crypt, 1);
  478. - rb_objc_define_method(rb_cRubyString, "encode", rstr_encode, -1);
  479. - rb_objc_define_method(rb_cRubyString, "encode!", rstr_encode_bang, -1);
  480.  
  481. // MacRuby extensions.
  482. rb_objc_define_method(rb_cRubyString, "transform", rstr_transform, 1);
  483. Index: transcode.c
  484. new file mode 100644
  485. ===================================================================
  486. --- /dev/null (revision 4140)
  487. +++ transcode.c (working copy)
  488. @@ -0,0 +1,450 @@
  489. +/*
  490. + * MacRuby implementation of transcode.c.
  491. + *
  492. + * This file is covered by the Ruby license. See COPYING for more details.
  493. + *
  494. + * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
  495. + * Copyright (C) 1993-2007 Yukihiro Matsumoto
  496. + * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
  497. + * Copyright (C) 2000 Information-technology Promotion Agency, Japan
  498. + */
  499. +
  500. +// Notes:
  501. +// AFAICT, we need to add support for newline decorators.
  502. +
  503. +#include "ruby.h"
  504. +#include "ruby/encoding.h"
  505. +#include "encoding.h"
  506. +
  507. +static VALUE sym_invalid;
  508. +static VALUE sym_undef;
  509. +static VALUE sym_replace;
  510. +static VALUE sym_xml;
  511. +static VALUE sym_text;
  512. +static VALUE sym_attr;
  513. +
  514. +typedef struct rb_econv_s {
  515. + rb_encoding_t *source;
  516. + rb_encoding_t *destination;
  517. + transcode_behavior_t invalid_sequence_behavior;
  518. + transcode_behavior_t undefined_conversion_behavior;
  519. + transcode_flags_t special_flags;
  520. + rb_str_t *replacement;
  521. + bool finished;
  522. +} rb_econv_t;
  523. +
  524. +VALUE rb_cEncodingConverter;
  525. +
  526. +static rb_econv_t* RConverter(VALUE self) {
  527. + rb_econv_t *conv;
  528. + Data_Get_Struct(self, rb_econv_t, conv);
  529. + return conv;
  530. +}
  531. +
  532. +static VALUE
  533. +rb_econv_alloc(VALUE klass, SEL sel)
  534. +{
  535. + rb_econv_t *conv = ALLOC(rb_econv_t);
  536. + conv->source = NULL;
  537. + conv->destination = NULL;
  538. + conv->replacement = NULL;
  539. + conv->special_flags = 0;
  540. + conv->finished = false;
  541. + return Data_Wrap_Struct(klass, 0, 0, conv);
  542. +}
  543. +
  544. +static VALUE
  545. +rb_econv_asciicompat_encoding(VALUE klass, SEL sel, VALUE arg)
  546. +{
  547. + rb_encoding_t *enc = NULL;
  548. + if (CLASS_OF(arg) == rb_cEncoding) {
  549. + enc = rb_to_encoding(arg);
  550. + }
  551. + else {
  552. + StringValue(arg);
  553. + enc = rb_enc_find(RSTRING_PTR(arg));
  554. + }
  555. +
  556. + if ((enc == NULL) || (enc->ascii_compatible)) {
  557. + return Qnil;
  558. + }
  559. + else if (UTF16_ENC(enc) || UTF32_ENC(enc)) {
  560. + return (VALUE)rb_utf8_encoding();
  561. + }
  562. + // TODO: Port MRI's table that maps ASCII-incompatible encodings to compatible ones.
  563. + rb_raise(rb_eConverterNotFoundError, "could not find ASCII-compatible encoding for %s", enc->public_name);
  564. +}
  565. +
  566. +static VALUE rb_econv_convpath(VALUE self, SEL sel);
  567. +
  568. +static VALUE
  569. +rb_econv_search_convpath(VALUE klass, SEL sel, int argc, VALUE* argv)
  570. +{
  571. + return rb_econv_convpath(rb_class_new_instance(argc, argv, klass), sel);
  572. +}
  573. +
  574. +static transcode_behavior_t
  575. +symbol_option_with_default(VALUE given_symbol, transcode_behavior_t otherwise, const char* name)
  576. +{
  577. + if (given_symbol == sym_replace) {
  578. + return TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  579. + }
  580. + else if (given_symbol == sym_attr) {
  581. + return TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR;
  582. + }
  583. + else if (given_symbol == sym_text) {
  584. + return TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT;
  585. + }
  586. + else if (!NIL_P(given_symbol)) {
  587. + rb_raise(rb_eArgError, "unknown value '%s' for option %s", StringValuePtr(given_symbol), name);
  588. + }
  589. + return otherwise;
  590. +}
  591. +
  592. +static void parse_conversion_options(VALUE options, transcode_behavior_t* behavior_for_invalid,
  593. + transcode_behavior_t* behavior_for_undefined, rb_str_t** replacement_str, rb_encoding_t* destination)
  594. +{
  595. +
  596. + *behavior_for_invalid = symbol_option_with_default(rb_hash_aref(options, sym_invalid),
  597. + TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, "invalid-character");
  598. +
  599. + *behavior_for_undefined = symbol_option_with_default(rb_hash_aref(options, sym_undef),
  600. + TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, "undefined-conversion");
  601. +
  602. + // Because the API conflates the :xml and :undef options, we pass in the previous setting
  603. + *behavior_for_undefined = symbol_option_with_default(rb_hash_aref(options, sym_xml),
  604. + *behavior_for_undefined, "xml-replacement");
  605. +
  606. + *behavior_for_undefined = symbol_option_with_default(rb_hash_aref(options, sym_xml),
  607. + *behavior_for_undefined, "xml-replacement");
  608. +
  609. + VALUE replacement = rb_hash_aref(options, sym_replace);
  610. + if (!NIL_P(replacement)) {
  611. + *replacement_str = str_simple_transcode(str_need_string(replacement), destination);
  612. + }
  613. +
  614. +}
  615. +
  616. +static VALUE
  617. +rb_econv_initialize(VALUE self, SEL sel, int argc, VALUE* argv)
  618. +{
  619. + rb_econv_t *conv = RConverter(self);
  620. + VALUE sourceobj, destobj, options;
  621. + rb_scan_args(argc, argv, "21", &sourceobj, &destobj, &options);
  622. +
  623. + rb_encoding_t* source = rb_to_encoding(sourceobj);
  624. + rb_encoding_t* destination = rb_to_encoding(destobj);
  625. + rb_str_t* replacement_str = NULL;
  626. +
  627. + conv->source = source;
  628. + conv->destination = destination;
  629. +
  630. + conv->invalid_sequence_behavior = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  631. + conv->undefined_conversion_behavior = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  632. +
  633. + // Extract the options. This is a hateful, hateful API.
  634. + if (!NIL_P(options)) {
  635. +
  636. + if (FIXNUM_P(options)) {
  637. + rb_bug("fixnum arguments are not supported yet.");
  638. + }
  639. + else if (TYPE(options) == T_HASH) {
  640. + parse_conversion_options(options, &conv->invalid_sequence_behavior,
  641. + &conv->undefined_conversion_behavior, &replacement_str, destination);
  642. + }
  643. + else {
  644. + rb_raise(rb_eArgError, "expected either a hash or a fixnum as the last parameter");
  645. + }
  646. + }
  647. +
  648. + // Get the default replacement string. For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
  649. + if (replacement_str == NULL) {
  650. + replacement_str = replacement_string_for_encoding(destination);
  651. + }
  652. + GC_WB(&conv->replacement, replacement_str);
  653. +
  654. + return self;
  655. +}
  656. +
  657. +static VALUE
  658. +rb_econv_inspect(VALUE self, SEL sel)
  659. +{
  660. + // TODO: make this comply with the MRI output when we add newline decorators
  661. + rb_econv_t *conv = RConverter(self);
  662. + return rb_sprintf("#<%s: %s to %s>", rb_obj_classname(self), conv->source->public_name,
  663. + conv->destination->public_name);
  664. +}
  665. +
  666. +static VALUE
  667. +rb_econv_convpath(VALUE self, SEL sel)
  668. +{
  669. + // in MacRuby, the convpath always looks like this:
  670. + // [[source_encoding, native UTF-16], [native UTF-16, dest_encoding]]
  671. + // The first element is omitted if the source encoding is UTF-16, obviously.
  672. + rb_econv_t *conv = RConverter(self);
  673. + VALUE to_return = rb_ary_new2(2);
  674. + rb_encoding_t* nativeUTF16 = rb_encodings[ENCODING_UTF16_NATIVE];
  675. +
  676. + if (conv->source != nativeUTF16) {
  677. + rb_ary_push(to_return, rb_assoc_new((VALUE)conv->source, (VALUE)nativeUTF16));
  678. + }
  679. +
  680. + rb_ary_push(to_return, rb_assoc_new((VALUE)nativeUTF16, (VALUE)conv->destination));
  681. +
  682. + return to_return;
  683. +}
  684. +
  685. +static VALUE
  686. +rb_econv_source_encoding(VALUE self, SEL sel)
  687. +{
  688. + return (VALUE)(RConverter(self)->source);
  689. +}
  690. +
  691. +static VALUE
  692. +rb_econv_destination_encoding(VALUE self, SEL sel)
  693. +{
  694. + return (VALUE)(RConverter(self)->destination);
  695. +}
  696. +
  697. +// Since our converter is basically a black box at this point, we'll leave
  698. +// the lower-level methods unimplemented.
  699. +#define rb_econv_primitive_convert rb_f_notimplement
  700. +
  701. +static VALUE
  702. +rb_econv_convert(VALUE self, SEL sel, VALUE str)
  703. +{
  704. + rb_econv_t *conv;
  705. + Data_Get_Struct(self, rb_econv_t, conv);
  706. +
  707. + if (conv->finished) {
  708. + rb_raise(rb_eArgError, "convert() called on a finished stream");
  709. + }
  710. +
  711. + assert(conv->replacement->encoding == conv->destination);
  712. + return (VALUE)str_transcode(str_need_string(str), conv->source, conv->destination, conv->invalid_sequence_behavior, conv->undefined_conversion_behavior, conv->replacement);
  713. +}
  714. +
  715. +static VALUE
  716. +rb_econv_finish(VALUE self, SEL sel)
  717. +{
  718. + // TODO: Flesh this out later.
  719. + RConverter(self)->finished = true;
  720. + return rb_str_new2("");
  721. +}
  722. +
  723. +#define rb_econv_primitive_errinfo rb_f_notimplement
  724. +
  725. +#define rb_econv_insert_output rb_f_notimplement
  726. +
  727. +#define rb_econv_putback rb_f_notimplement
  728. +
  729. +#define rb_econv_last_error rb_f_notimplement
  730. +
  731. +static VALUE
  732. +rb_econv_replacement(VALUE self, SEL sel)
  733. +{
  734. + return (VALUE)(RConverter(self)->replacement);
  735. +}
  736. +
  737. +static VALUE
  738. +rb_econv_set_replacement(VALUE self, SEL sel, VALUE str)
  739. +{
  740. + // TODO: Should we copy this string? Probably.
  741. + rb_econv_t *conv = RConverter(self);
  742. + if (TYPE(str) != T_STRING) {
  743. + rb_raise(rb_eTypeError, "wrong argument type %s (expected String)", rb_obj_classname(str));
  744. + }
  745. + rb_str_force_encoding(str, conv->destination);
  746. + GC_WB(&conv->replacement, str_need_string(str));
  747. + return str;
  748. +}
  749. +
  750. +/*
  751. + * call-seq:
  752. + * str.encode(encoding [, options] ) => str
  753. + * str.encode(dst_encoding, src_encoding [, options] ) => str
  754. + * str.encode([options]) => str
  755. + *
  756. + * The first form returns a copy of <i>str</i> transcoded
  757. + * to encoding +encoding+.
  758. + * The second form returns a copy of <i>str</i> transcoded
  759. + * from src_encoding to dst_encoding.
  760. + * The last form returns a copy of <i>str</i> transcoded to
  761. + * <code>Encoding.default_internal</code>.
  762. + * By default, the first and second form raise
  763. + * Encoding::UndefinedConversionError for characters that are
  764. + * undefined in the destination encoding, and
  765. + * Encoding::InvalidByteSequenceError for invalid byte sequences
  766. + * in the source encoding. The last form by default does not raise
  767. + * exceptions but uses replacement strings.
  768. + * The <code>options</code> Hash gives details for conversion.
  769. + *
  770. + * === options
  771. + * The hash <code>options</code> can have the following keys:
  772. + * :invalid ::
  773. + * If the value is <code>:replace</code>, <code>#encode</code> replaces
  774. + * invalid byte sequences in <code>str</code> with the replacement character.
  775. + * The default is to raise the exception
  776. + * :undef ::
  777. + * If the value is <code>:replace</code>, <code>#encode</code> replaces
  778. + * characters which are undefined in the destination encoding with
  779. + * the replacement character.
  780. + * :replace ::
  781. + * Sets the replacement string to the value. The default replacement
  782. + * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
  783. + * :xml ::
  784. + * The value must be <code>:text</code> or <code>:attr</code>.
  785. + * If the value is <code>:text</code> <code>#encode</code> replaces
  786. + * undefined characters with their (upper-case hexadecimal) numeric
  787. + * character references. '&', '<', and '>' are converted to "&",
  788. + * "<", and ">", respectively.
  789. + * If the value is <code>:attr</code>, <code>#encode</code> also quotes
  790. + * the replacement result (using '"'), and replaces '"' with """.
  791. + */
  792. +extern rb_encoding_t *default_internal;
  793. +static VALUE
  794. +rstr_encode(VALUE str, SEL sel, int argc, VALUE *argv)
  795. +{
  796. + VALUE opt = Qnil;
  797. + if (argc > 0) {
  798. + opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
  799. + if (!NIL_P(opt)) {
  800. + argc--;
  801. + }
  802. + }
  803. +
  804. + rb_str_t *self = RSTR(str);
  805. + rb_str_t *replacement_str = NULL;
  806. + rb_encoding_t *src_encoding, *dst_encoding;
  807. + transcode_behavior_t behavior_for_invalid = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  808. + transcode_behavior_t behavior_for_undefined = TRANSCODE_BEHAVIOR_RAISE_EXCEPTION;
  809. + if (argc == 0) {
  810. + src_encoding = self->encoding;
  811. + dst_encoding = default_internal;
  812. + behavior_for_invalid = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  813. + behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  814. + }
  815. + else if (argc == 1) {
  816. + src_encoding = self->encoding;
  817. + dst_encoding = rb_to_encoding(argv[0]);
  818. + }
  819. + else if (argc == 2) {
  820. + dst_encoding = rb_to_encoding(argv[0]);
  821. + src_encoding = rb_to_encoding(argv[1]);
  822. + }
  823. + else {
  824. + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0..2)", argc);
  825. + }
  826. +
  827. + if (!NIL_P(opt)) {
  828. + parse_conversion_options(opt, &behavior_for_invalid, &behavior_for_undefined, &replacement_str, dst_encoding);
  829. + if ((replacement_str != NULL)
  830. + && (behavior_for_invalid != TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
  831. + && (behavior_for_undefined == TRANSCODE_BEHAVIOR_RAISE_EXCEPTION)) {
  832. + behavior_for_undefined = TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING;
  833. + }
  834. + }
  835. +
  836. + if ((replacement_str == NULL)
  837. + && ((behavior_for_invalid == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
  838. + || (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING))) {
  839. + replacement_str = replacement_string_for_encoding(dst_encoding);
  840. + }
  841. +
  842. + return (VALUE)str_transcode(self, src_encoding, dst_encoding,
  843. + behavior_for_invalid, behavior_for_undefined, replacement_str);
  844. +}
  845. +
  846. +/*
  847. + * call-seq:
  848. + * str.encode!(encoding [, options] ) => str
  849. + * str.encode!(dst_encoding, src_encoding [, options] ) => str
  850. + *
  851. + * The first form transcodes the contents of <i>str</i> from
  852. + * str.encoding to +encoding+.
  853. + * The second form transcodes the contents of <i>str</i> from
  854. + * src_encoding to dst_encoding.
  855. + * The options Hash gives details for conversion. See String#encode
  856. + * for details.
  857. + * Returns the string even if no changes were made.
  858. + */
  859. +static VALUE
  860. +rstr_encode_bang(VALUE str, SEL sel, int argc, VALUE *argv)
  861. +{
  862. + rstr_modify(str);
  863. +
  864. + VALUE new_str = rstr_encode(str, sel, argc, argv);
  865. + str_replace_with_string(RSTR(str), RSTR(new_str));
  866. + return str;
  867. +}
  868. +
  869. +void
  870. +Init_Transcode(void)
  871. +{
  872. + rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
  873. + rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
  874. + rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
  875. +
  876. + rb_objc_define_method(rb_cRubyString, "encode", rstr_encode, -1);
  877. + rb_objc_define_method(rb_cRubyString, "encode!", rstr_encode_bang, -1);
  878. +
  879. + rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cObject);
  880. + rb_objc_define_method(*(VALUE *)rb_cEncodingConverter, "alloc", rb_econv_alloc, 0);
  881. + rb_objc_define_method(*(VALUE *)rb_cEncodingConverter, "asciicompat_encoding", rb_econv_asciicompat_encoding, 1);
  882. + rb_objc_define_method(*(VALUE *)rb_cEncodingConverter, "search_convpath", rb_econv_search_convpath, -1);
  883. +
  884. + rb_objc_define_method(rb_cEncodingConverter, "initialize", rb_econv_initialize, -1);
  885. + rb_objc_define_method(rb_cEncodingConverter, "inspect", rb_econv_inspect, 0);
  886. + rb_objc_define_method(rb_cEncodingConverter, "convpath", rb_econv_convpath, 0);
  887. + rb_objc_define_method(rb_cEncodingConverter, "source_encoding", rb_econv_source_encoding, 0);
  888. + rb_objc_define_method(rb_cEncodingConverter, "destination_encoding", rb_econv_destination_encoding, 0);
  889. + rb_objc_define_method(rb_cEncodingConverter, "primitive_convert", rb_econv_primitive_convert, -1);
  890. + rb_objc_define_method(rb_cEncodingConverter, "convert", rb_econv_convert, 1);
  891. + rb_objc_define_method(rb_cEncodingConverter, "finish", rb_econv_finish, 0);
  892. + rb_objc_define_method(rb_cEncodingConverter, "primitive_errinfo", rb_econv_primitive_errinfo, 0);
  893. + rb_objc_define_method(rb_cEncodingConverter, "insert_output", rb_econv_insert_output, 1);
  894. + rb_objc_define_method(rb_cEncodingConverter, "putback", rb_econv_putback, -1);
  895. + rb_objc_define_method(rb_cEncodingConverter, "last_error", rb_econv_last_error, 0);
  896. + rb_objc_define_method(rb_cEncodingConverter, "replacement", rb_econv_replacement, 0);
  897. + rb_objc_define_method(rb_cEncodingConverter, "replacement=", rb_econv_set_replacement, 1);
  898. +
  899. + sym_invalid = ID2SYM(rb_intern("invalid"));
  900. + sym_undef = ID2SYM(rb_intern("undef"));
  901. + sym_replace = ID2SYM(rb_intern("replace"));
  902. + sym_attr = ID2SYM(rb_intern("attr"));
  903. + sym_text = ID2SYM(rb_intern("text"));
  904. + sym_xml = ID2SYM(rb_intern("xml"));
  905. +
  906. + // If only these mapped to the internal enums...
  907. + rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
  908. + rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
  909. + rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
  910. + rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
  911. + rb_define_const(rb_cEncodingConverter, "UNDEF_HEX_CHARREF", INT2FIX(ECONV_UNDEF_HEX_CHARREF));
  912. + rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
  913. + rb_define_const(rb_cEncodingConverter, "AFTER_OUTPUT", INT2FIX(ECONV_AFTER_OUTPUT));
  914. + rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
  915. + rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_DECORATOR", INT2FIX(ECONV_CRLF_NEWLINE_DECORATOR));
  916. + rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_DECORATOR", INT2FIX(ECONV_CR_NEWLINE_DECORATOR));
  917. + rb_define_const(rb_cEncodingConverter, "XML_TEXT_DECORATOR", INT2FIX(ECONV_XML_TEXT_DECORATOR));
  918. + rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_DECORATOR", INT2FIX(ECONV_XML_ATTR_CONTENT_DECORATOR));
  919. + rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_DECORATOR", INT2FIX(ECONV_XML_ATTR_QUOTE_DECORATOR));
  920. +
  921. +#if 0
  922. + rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0);
  923. + rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
  924. + rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0);
  925. + rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0);
  926. + rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0);
  927. +
  928. + rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0);
  929. + rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
  930. + rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0);
  931. + rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0);
  932. + rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0);
  933. + rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0);
  934. + rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0);
  935. +
  936. + Init_newline();
  937. +#endif
  938. +}
Add Comment
Please, Sign In to add comment