Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- - (Object) limit(limit)
- 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
- s.each_char.each_with_object('') do|char, result|
- if result.bytesize + char.bytesize > 255
- break result
- else
- result << char
- end
- end
- def limit_bytesize(str, size)
- str.encoding.name == 'UTF-8' or raise ArgumentError, "str must have UTF-8 encoding"
- # Change to canonical unicode form (compose any decomposed characters).
- # Works only if you're using active_support
- str = str.mb_chars.compose.to_s if str.respond_to?(:mb_chars)
- # Start with a string of the correct byte size, but
- # with a possibly incomplete char at the end.
- new_str = str.byteslice(0, size)
- # We need to force_encoding from utf-8 to utf-8 so ruby will re-validate
- # (idea from halfelf).
- until new_str[-1].force_encoding('utf-8').valid_encoding?
- # remove the invalid char
- new_str = new_str.slice(0..-2)
- end
- new_str
- end
- >> limit_bytesize("abcu2014d", 4)
- => "abc"
- >> limit_bytesize("abcu2014d", 5)
- => "abc"
- >> limit_bytesize("abcu2014d", 6)
- => "abc—"
- >> limit_bytesize("abcu2014d", 7)
- => "abc—d"
- >> limit_bytesize("abcu0065u0301d", 4)
- => "abce"
- >> limit_bytesize("abcu0065u0301d", 5)
- => "abce"
- >> limit_bytesize("abcu0065u0301d", 6)
- => "abcé"
- >> limit_bytesize("abcu0065u0301d", 7)
- => "abcéd"
- >> limit_bytesize("abcu0065u0301d", 4)
- => "abc"
- >> limit_bytesize("abcu0065u0301d", 5)
- => "abcé"
- >> limit_bytesize("abcu0065u0301d", 6)
- => "abcéd"
- s = "δogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδogδog"
- count = 0
- while true
- more_truncate = "a" + (255-count).to_s
- s2 = s.unpack(more_truncate)[0]
- s2.force_encoding 'utf-8'
- if s2[-1].valid_encoding?
- break
- else
- count += 1
- end
- end
- s2.force_encoding 'utf-8'
- puts s2
- >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size
- => 20
- >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize
- => 80
- >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20)
- => "🔪🔪🔪🔪…"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement