Advertisement
Guest User

Untitled

a guest
Jul 17th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.31 KB | None | 0 0
  1. - (Object) limit(limit)
  2.  
  3. 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
  4.  
  5. s.each_char.each_with_object('') do|char, result|
  6. if result.bytesize + char.bytesize > 255
  7. break result
  8. else
  9. result << char
  10. end
  11. end
  12.  
  13. def limit_bytesize(str, size)
  14. str.encoding.name == 'UTF-8' or raise ArgumentError, "str must have UTF-8 encoding"
  15.  
  16. # Change to canonical unicode form (compose any decomposed characters).
  17. # Works only if you're using active_support
  18. str = str.mb_chars.compose.to_s if str.respond_to?(:mb_chars)
  19.  
  20. # Start with a string of the correct byte size, but
  21. # with a possibly incomplete char at the end.
  22. new_str = str.byteslice(0, size)
  23.  
  24. # We need to force_encoding from utf-8 to utf-8 so ruby will re-validate
  25. # (idea from halfelf).
  26. until new_str[-1].force_encoding('utf-8').valid_encoding?
  27. # remove the invalid char
  28. new_str = new_str.slice(0..-2)
  29. end
  30. new_str
  31. end
  32.  
  33. >> limit_bytesize("abcu2014d", 4)
  34. => "abc"
  35. >> limit_bytesize("abcu2014d", 5)
  36. => "abc"
  37. >> limit_bytesize("abcu2014d", 6)
  38. => "abc—"
  39. >> limit_bytesize("abcu2014d", 7)
  40. => "abc—d"
  41.  
  42. >> limit_bytesize("abcu0065u0301d", 4)
  43. => "abce"
  44. >> limit_bytesize("abcu0065u0301d", 5)
  45. => "abce"
  46. >> limit_bytesize("abcu0065u0301d", 6)
  47. => "abcé"
  48. >> limit_bytesize("abcu0065u0301d", 7)
  49. => "abcéd"
  50.  
  51. >> limit_bytesize("abcu0065u0301d", 4)
  52. => "abc"
  53. >> limit_bytesize("abcu0065u0301d", 5)
  54. => "abcé"
  55. >> limit_bytesize("abcu0065u0301d", 6)
  56. => "abcéd"
  57.  
  58. s = "δogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδog"
  59. count = 0
  60. while true
  61. more_truncate = "a" + (255-count).to_s
  62. s2 = s.unpack(more_truncate)[0]
  63. s2.force_encoding 'utf-8'
  64.  
  65. if s2[-1].valid_encoding?
  66. break
  67. else
  68. count += 1
  69. end
  70. end
  71.  
  72. s2.force_encoding 'utf-8'
  73. puts s2
  74.  
  75. >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size
  76. => 20
  77. >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize
  78. => 80
  79. >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20)
  80. => "🔪🔪🔪🔪…"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement