Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # typed: ignore
- # frozen_string_literal: true
- module TextFormatting
- module Inbound
- module Formatters
- class Telegram < Formatter
- # Example of payload with formatting
- #
- # {
- # text: "Normal Bold _Italic_ Strikethrough _Bold and Italic_ ~~Bold and Strikethrough~~ _~~Italic and Strikethrough~~_",
- # entities: [
- # {
- # offset: 7,
- # length: 4,
- # type: "bold"
- # },
- # {
- # offset: 21,
- # length: 13,
- # type: "strikethrough"
- # },
- # {
- # offset: 35,
- # length: 17,
- # type: "bold"
- # },
- # {
- # offset: 54,
- # length: 26,
- # type: "bold"
- # }
- # ]
- # }
- sig { override.returns(::String) }
- def format
- return original_text if formatting_entities.blank?
- # escape original text from GFM formatting characters like: * _ ~~
- formatted_text = escape_text_and_update_entities(original_text)
- remove_trailing_whitespace_from_formatting(formatted_text)
- # pass by each formatting entity, and apply the corresponding formatting
- # according to the offset and length range
- formatting_entities.each do |e|
- offset = e.dig(:offset)
- length = e.dig(:length)
- type = e.dig(:type)
- # get the corresponding formatting entity string
- f_entity = evaluate_formatting_entity(type)
- next if f_entity.nil?
- # capture the range to be transformed
- transformation_range = offset .. offset + length - 1
- formatted_text[transformation_range] = "#{f_entity}#{formatted_text[transformation_range]}#{f_entity}"
- # since we added characters to the string, we will need to shift all preceding formatting entities
- # by the amount of characters we added accordingly
- # this step adjusts the offset values according to the formatting applied on the range
- shift_preceding_entities(offset, length, f_entity)
- end
- formatted_text
- end
- private
- def escape_text_and_update_entities(original_text)
- # get the indices of the characters to be escaped
- # we will need those to shift entities affected
- matched_entities_indices = original_text.gsub(/\*|_|(~~)/).map { Regexp.last_match.begin(0) }
- # for each formatting entity, we need to check the indices generated by adding escape characters
- # there are two cases:
- # 1. the index is before the offset of the entity
- # in that case, we need to shift the entity's offset by 1
- # example: normal * italic text
- # _ _
- # the * symbol will be escaped ("*" -> "\*")
- # therefore the offset value of the bold formatting should shift by 1
- #
- # 2. the index is between the offset and the length
- # in that case, we need to increase the entity's length by 1
- # example: normal italic * text
- # _ _
- # the * symbol will be escaped ("*" -> "\*")
- # therefore the length of the bold formatting should increase by 1
- formatting_entities.map! do |e|
- shift = 0
- matched_entities_indices.each do |idx|
- if e[:offset] > idx + shift
- e[:offset] += 1
- shift += 1
- end
- e[:length] += 1 if e[:offset] <= idx + shift && e[:offset] + e[:length] > idx + shift
- end
- e
- end
- escape_text(original_text.dup)
- end
- def remove_trailing_whitespace_from_formatting(formatted_text)
- formatting_entities.map! do |e|
- e_length = e.dig(:length)
- e_offset = e.dig(:offset)
- # et the index of the trailing space in the substring to be formatted
- transformation_range = e_offset .. e_offset + e_length - 1
- trailing_whitespace_idx = formatted_text[transformation_range].index(/\s+$/)
- preceding_whitespace_idx = formatted_text[transformation_range].index(/^(\s*)/)
- # calculate the difference in length
- trailing_difference = trailing_whitespace_idx.nil? ? 0 : ((e_length) - trailing_whitespace_idx)
- # calculate the difference in offset
- preceding_difference = preceding_whitespace_idx
- # subtract the length difference from the length of the entity
- # to prevent it from adding whitespace before the entity
- e[:length] -= (trailing_difference + preceding_difference)
- e[:offset] += preceding_difference
- e
- end
- end
- def shift_preceding_entities(offset, length, f_entity)
- # for each formatting entity, we need to pass by all preceding entities
- # and increase their offset by the length of the characters added for formatting
- # example: bold italic
- # * * _ _
- # after parsing bold, the string will be
- # *bold* italic
- # _ _
- # the italic offset is broken, so we need to increase it by 2, the length of the formatting entity added
- formatting_entities.map! do |e|
- e_offset = e.dig(:offset)
- # is the formatting entity partially or completely inside the current entity?
- if e_offset >= offset && e_offset < offset + length
- e[:offset] += f_entity.length
- # is the formatting entity completely after the current entity?
- elsif e_offset >= offset + length
- e[:offset] += 2 * f_entity.length
- end
- e
- end
- end
- def evaluate_formatting_entity(type)
- return "**" if type == "bold"
- return "_" if type == "italic"
- return "~~" if type == "strikethrough"
- nil
- end
- def message
- @message ||= source_data.dig(:message)
- end
- def original_text
- @original_text ||= message.dig(:text)
- end
- def escape_text(text)
- text.gsub('*', '\*').gsub('_', '\_').gsub('~', '\~')
- end
- def formatting_entities
- @formatting_entities ||= message.dig(:entities)
- end
- end
- end
- end
- end
Add Comment
Please, Sign In to add comment