Guest User

Untitled

a guest
Jul 18th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.52 KB | None | 0 0
  1. require 'iconv'
  2. conv = Iconv.new( 'UTF-8', 'ASCII' )
  3.  
  4. str = "Award winning BBQ and southern entr\xc3\x83\xc2\x83\xc3\x82\xc2\x83\xc3\x83\xc2\x82\xc3\x82\xc2\x83\xc3\x83\xc2\x83\xc3\x82\xc2\x82\xc3\x83\xc2\x82\xc3\x82\xc2\xa9e favorites"
  5.  
  6. while str =~ /[\xc0-\xff][\x80-\xbf]/
  7. ## Throws:
  8. ## multiple.rb:10:in `iconv': "\303\203\302\203\303\202\302\203\303\203\302\202\303\202\302\203"... (Iconv::IllegalSequence)
  9. ## from multiple.rb:10
  10. str = conv.iconv( str )
  11. puts str
  12. end
  13.  
  14. target = "Award winning BBQ and southern entr\xe9e favorites"
  15.  
  16.  
  17. ## Works:
  18. # use Test::More tests => 1;
  19. # use Encode ();
  20. #
  21. # is(
  22. # multiple_downgrade(
  23. # "Award winning BBQ and southern entr\xc3\x83\xc2\x83\xc3\x82\xc2\x83\xc3\x83\xc2\x82\xc3\x82\xc2\x83\xc3\x83\xc2\x83\xc3\x82\xc2\x82\xc3\x83\xc2\x82\xc3\x82\xc2\xa9e favorites" ),
  24. # "Award winning BBQ and southern entr\xe9e favorites",
  25. # "4-times encoded" );
  26. # exit;
  27. #
  28. # sub multiple_downgrade {
  29. # my ( $str ) = @_;
  30. #
  31. # # These bytes are a Unicode string. After running this, multi-byte characters are now interpreted
  32. # # as single characters.
  33. # Encode::_utf8_on( $str );
  34. #
  35. # # Keep decoding while we still have UTF-8 encoding visible inside our string.
  36. # while ( $str =~ /[\xc0-\xff][\x80-\xbf]/ ) {
  37. #
  38. # # Unwrap a level of UTF-8 encoding. Each character is replaced by its code point.
  39. # utf8::downgrade( $str );
  40. #
  41. # # But! Now make perl think these bytes are actually UTF-8
  42. # Encode::_utf8_on( $str );
  43. # }
  44. #
  45. # return $str;
  46. # }
Add Comment
Please, Sign In to add comment