Guest User

Untitled

a guest
Nov 17th, 2018
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.59 KB | None | 0 0
  1. # encoding: utf-8
  2. require "benchmark"
  3.  
  4. Encoding.default_internal = Encoding::UTF_8
  5.  
  6. def encode1(str)
  7. str.force_encoding(Encoding::UTF_8)
  8. if !str.valid_encoding?
  9. replace_invalid_characters(str)
  10. end
  11. str
  12. end
  13.  
  14. def encode2(str)
  15. str.chars.map{ |c| c.force_encoding("UTF-8").valid_encoding? ? c : "?" }.join
  16. end
  17.  
  18. def encode3(str)
  19. str.force_encoding(Encoding::UTF_8)
  20. if str.valid_encoding?
  21. str
  22. else
  23. str.chars.map{ |c| c.valid_encoding? ? c : "?" }.join
  24. end
  25. end
  26.  
  27. def encode4(str)
  28. str.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '').encode('UTF-8')
  29. end
  30. def self.replace_invalid_characters(str)
  31. for i in (0...str.size)
  32. if !str[i].valid_encoding?
  33. str[i] = "?"
  34. end
  35. end
  36. end
  37.  
  38. N = 100000
  39. VALID = "foo£bar£car".encode(Encoding::UTF_8).force_encoding(Encoding::ASCII_8BIT)
  40.  
  41. puts encode1(VALID.dup)
  42. puts encode2(VALID.dup)
  43. puts encode3(VALID.dup)
  44. puts encode4(VALID.dup)
  45.  
  46. Benchmark.bmbm do |x|
  47. x.report { for i in (1..N); encode1(VALID.dup); end }
  48. x.report { for i in (1..N); encode2(VALID.dup); end }
  49. x.report { for i in (1..N); encode3(VALID.dup); end }
  50. x.report { for i in (1..N); encode4(VALID.dup); end }
  51.  
  52. end
  53.  
  54. INVALID = "£foo\xC0\x8Abar\xC0\x8Acar".force_encoding(Encoding::ASCII_8BIT)
  55.  
  56. puts encode1(INVALID.dup)
  57. puts encode2(INVALID.dup)
  58. puts encode3(INVALID.dup)
  59. puts encode4(INVALID.dup)
  60.  
  61. Benchmark.bmbm do |x|
  62. x.report { for i in (1..N); encode1(INVALID.dup); end }
  63. x.report { for i in (1..N); encode2(INVALID.dup); end }
  64. x.report { for i in (1..N); encode3(INVALID.dup); end }
  65. x.report { for i in (1..N); encode4(INVALID.dup); end }
  66.  
  67. end
Add Comment
Please, Sign In to add comment