Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'iconv'
- conv = Iconv.new( 'UTF-8', 'ASCII' )
- str = "Award winning BBQ and southern entr\xc3\x83\xc2\x83\xc3\x82\xc2\x83\xc3\x83\xc2\x82\xc3\x82\xc2\x83\xc3\x83\xc2\x83\xc3\x82\xc2\x82\xc3\x83\xc2\x82\xc3\x82\xc2\xa9e favorites"
- while str =~ /[\xc0-\xff][\x80-\xbf]/
- ## Throws:
- ## multiple.rb:10:in `iconv': "\303\203\302\203\303\202\302\203\303\203\302\202\303\202\302\203"... (Iconv::IllegalSequence)
- ## from multiple.rb:10
- str = conv.iconv( str )
- puts str
- end
- target = "Award winning BBQ and southern entr\xe9e favorites"
- ## Works:
- # use Test::More tests => 1;
- # use Encode ();
- #
- # is(
- # multiple_downgrade(
- # "Award winning BBQ and southern entr\xc3\x83\xc2\x83\xc3\x82\xc2\x83\xc3\x83\xc2\x82\xc3\x82\xc2\x83\xc3\x83\xc2\x83\xc3\x82\xc2\x82\xc3\x83\xc2\x82\xc3\x82\xc2\xa9e favorites" ),
- # "Award winning BBQ and southern entr\xe9e favorites",
- # "4-times encoded" );
- # exit;
- #
- # sub multiple_downgrade {
- # my ( $str ) = @_;
- #
- # # These bytes are a Unicode string. After running this, multi-byte characters are now interpreted
- # # as single characters.
- # Encode::_utf8_on( $str );
- #
- # # Keep decoding while we still have UTF-8 encoding visible inside our string.
- # while ( $str =~ /[\xc0-\xff][\x80-\xbf]/ ) {
- #
- # # Unwrap a level of UTF-8 encoding. Each character is replaced by its code point.
- # utf8::downgrade( $str );
- #
- # # But! Now make perl think these bytes are actually UTF-8
- # Encode::_utf8_on( $str );
- # }
- #
- # return $str;
- # }
Add Comment
Please, Sign In to add comment