Untitled

# typed: ignore
# frozen_string_literal: true

module TextFormatting
  module Inbound
    module Formatters
      class Telegram < Formatter

        # Example of payload with formatting
        #
        # {
        #   text: "Normal Bold _Italic_ Strikethrough _Bold and Italic_  ~~Bold and Strikethrough~~ _~~Italic and Strikethrough~~_",
        #   entities: [
        #       {
        #           offset: 7,
        #           length: 4,
        #           type: "bold"
        #       },
        #       {
        #           offset: 21,
        #           length: 13,
        #           type: "strikethrough"
        #       },
        #       {
        #           offset: 35,
        #           length: 17,
        #           type: "bold"
        #       },
        #       {
        #           offset: 54,
        #           length: 26,
        #           type: "bold"
        #       }
        #   ]
        # }

        sig { override.returns(::String) }
        def format
          return original_text if formatting_entities.blank?

          # escape original text from GFM formatting characters like: * _ ~~
          formatted_text = escape_text_and_update_entities(original_text)
          remove_trailing_whitespace_from_formatting(formatted_text)

          # pass by each formatting entity, and apply the corresponding formatting
          # according to the offset and length range
          formatting_entities.each do |e|
            offset = e.dig(:offset)
            length = e.dig(:length)
            type = e.dig(:type)

            # get the corresponding formatting entity string
            f_entity = evaluate_formatting_entity(type)
            next if f_entity.nil?

            # capture the range to be transformed
            transformation_range = offset .. offset + length - 1
            formatted_text[transformation_range] = "#{f_entity}#{formatted_text[transformation_range]}#{f_entity}"

            # since we added characters to the string, we will need to shift all preceding formatting entities
            # by the amount of characters we added accordingly
            # this step adjusts the offset values according to the formatting applied on the range
            shift_preceding_entities(offset, length, f_entity)
          end

          formatted_text
        end

        private

        def escape_text_and_update_entities(original_text)
          # get the indices of the characters to be escaped
          # we will need those to shift entities affected
          matched_entities_indices = original_text.gsub(/\*|_|(~~)/).map { Regexp.last_match.begin(0) }

          # for each formatting entity, we need to check the indices generated by adding escape characters
          # there are two cases:
          #  1. the index is before the offset of the entity
          #     in that case, we need to shift the entity's offset by 1
          #     example:  normal * italic text
          #                        _         _
          #     the * symbol will be escaped ("*" -> "\*")
          #     therefore the offset value of the bold formatting should shift by 1
          #
          #  2. the index is between the offset and the length
          #     in that case, we need to increase the entity's length by 1
          #     example: normal italic * text
          #                     _           _
          #     the * symbol will be escaped ("*" -> "\*")
          #     therefore the length of the bold formatting should increase by 1
          formatting_entities.map! do |e|
            shift = 0

            matched_entities_indices.each do |idx|
              if e[:offset] > idx + shift
                e[:offset] += 1
                shift += 1
              end

              e[:length] += 1 if e[:offset] <= idx + shift && e[:offset] + e[:length] > idx + shift
            end

            e
          end

          escape_text(original_text.dup)
        end

        def remove_trailing_whitespace_from_formatting(formatted_text)
          formatting_entities.map! do |e|
            e_length = e.dig(:length)
            e_offset = e.dig(:offset)

            # et the index of the trailing space in the substring to be formatted
            transformation_range = e_offset .. e_offset + e_length - 1
            trailing_whitespace_idx = formatted_text[transformation_range].index(/\s+$/)
            preceding_whitespace_idx = formatted_text[transformation_range].index(/^(\s*)/)

            # calculate the difference in length
            trailing_difference = trailing_whitespace_idx.nil? ? 0 : ((e_length) - trailing_whitespace_idx)

            # calculate the difference in offset
            preceding_difference = preceding_whitespace_idx

            # subtract the length difference from the length of the entity
            # to prevent it from adding whitespace before the entity
            e[:length] -= (trailing_difference + preceding_difference)
            e[:offset] += preceding_difference

            e
          end
        end

        def shift_preceding_entities(offset, length, f_entity)
          # for each formatting entity, we need to pass by all preceding entities
          # and increase their offset by the length of the characters added for formatting
          # example: bold italic
          #          *  * _    _
          #          after parsing bold, the string will be
          #          *bold* italic
          #               _    _
          #           the italic offset is broken, so we need to increase it by 2, the length of the formatting entity added

          formatting_entities.map! do |e|
            e_offset = e.dig(:offset)

            # is the formatting entity partially or completely inside the current entity?
            if e_offset >= offset && e_offset < offset + length
              e[:offset] += f_entity.length
            # is the formatting entity completely after the current entity?
            elsif e_offset >= offset + length
              e[:offset] += 2 * f_entity.length
            end

            e
          end
        end

        def evaluate_formatting_entity(type)
          return "**" if type == "bold"
          return "_"  if type == "italic"
          return "~~" if type == "strikethrough"

          nil
        end

        def message
          @message ||= source_data.dig(:message)
        end

        def original_text
          @original_text ||= message.dig(:text)
        end

        def escape_text(text)
          text.gsub('*', '\*').gsub('_', '\_').gsub('~', '\~')
        end


        def formatting_entities
          @formatting_entities ||= message.dig(:entities)
        end

      end
    end
  end
end