Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # encoding: utf-8
- require "benchmark"
- Encoding.default_internal = Encoding::UTF_8
- def encode1(str)
- str.force_encoding(Encoding::UTF_8)
- if !str.valid_encoding?
- replace_invalid_characters(str)
- end
- str
- end
- def encode2(str)
- str.chars.map{ |c| c.force_encoding("UTF-8").valid_encoding? ? c : "?" }.join
- end
- def encode3(str)
- str.force_encoding(Encoding::UTF_8)
- if str.valid_encoding?
- str
- else
- str.chars.map{ |c| c.valid_encoding? ? c : "?" }.join
- end
- end
- def encode4(str)
- str.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '').encode('UTF-8')
- end
- def self.replace_invalid_characters(str)
- for i in (0...str.size)
- if !str[i].valid_encoding?
- str[i] = "?"
- end
- end
- end
- N = 100000
- VALID = "foo£bar£car".encode(Encoding::UTF_8).force_encoding(Encoding::ASCII_8BIT)
- puts encode1(VALID.dup)
- puts encode2(VALID.dup)
- puts encode3(VALID.dup)
- puts encode4(VALID.dup)
- Benchmark.bmbm do |x|
- x.report { for i in (1..N); encode1(VALID.dup); end }
- x.report { for i in (1..N); encode2(VALID.dup); end }
- x.report { for i in (1..N); encode3(VALID.dup); end }
- x.report { for i in (1..N); encode4(VALID.dup); end }
- end
- INVALID = "£foo\xC0\x8Abar\xC0\x8Acar".force_encoding(Encoding::ASCII_8BIT)
- puts encode1(INVALID.dup)
- puts encode2(INVALID.dup)
- puts encode3(INVALID.dup)
- puts encode4(INVALID.dup)
- Benchmark.bmbm do |x|
- x.report { for i in (1..N); encode1(INVALID.dup); end }
- x.report { for i in (1..N); encode2(INVALID.dup); end }
- x.report { for i in (1..N); encode3(INVALID.dup); end }
- x.report { for i in (1..N); encode4(INVALID.dup); end }
- end
Add Comment
Please, Sign In to add comment