Advertisement
Guest User

Untitled

a guest
Oct 20th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.35 KB | None | 0 0
  1. require 'pdf/reader'
  2. require 'json'
  3. require 'forwardable'
  4.  
  5. class JsonTextReceiver
  6. extend Forwardable
  7.  
  8. SPACE = " "
  9.  
  10. attr_reader :state, :options, :characters
  11.  
  12. ########## BEGIN FORWARDERS ##########
  13. # Graphics State Operators
  14. def_delegators :@state, :save_graphics_state, :restore_graphics_state
  15.  
  16. # Matrix Operators
  17. def_delegators :@state, :concatenate_matrix
  18.  
  19. # Text Object Operators
  20. def_delegators :@state, :begin_text_object, :end_text_object
  21.  
  22. # Text State Operators
  23. def_delegators :@state, :set_character_spacing, :set_horizontal_text_scaling
  24. def_delegators :@state, :set_text_font_and_size, :font_size
  25. def_delegators :@state, :set_text_leading, :set_text_rendering_mode
  26. def_delegators :@state, :set_text_rise, :set_word_spacing
  27.  
  28. # Text Positioning Operators
  29. def_delegators :@state, :move_text_position, :move_text_position_and_set_leading
  30. def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line
  31. ########## END FORWARDERS ##########
  32.  
  33. # starting a new page
  34. def page=(page)
  35. @state = PDF::Reader::PageState.new(page)
  36. @content = []
  37. @characters = []
  38. @mediabox = page.objects.deref(page.attributes[:MediaBox])
  39. end
  40.  
  41. #####################################################
  42. # Text Showing Operators
  43. #####################################################
  44. # record text that is drawn on the page
  45. def show_text(string) # Tj (AWAY)
  46. internal_show_text(string)
  47. end
  48.  
  49. def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  50. params.each do |arg|
  51. if arg.is_a?(String)
  52. internal_show_text(arg)
  53. else
  54. @state.process_glyph_displacement(0, arg, false)
  55. end
  56. end
  57. end
  58.  
  59. def move_to_next_line_and_show_text(str) # '
  60. @state.move_to_start_of_next_line
  61. show_text(str)
  62. end
  63.  
  64. def set_spacing_next_line_show_text(aw, ac, string) # "
  65. @state.set_word_spacing(aw)
  66. @state.set_character_spacing(ac)
  67. move_to_next_line_and_show_text(string)
  68. end
  69.  
  70. #####################################################
  71. # XObjects
  72. #####################################################
  73. def invoke_xobject(label)
  74. @state.invoke_xobject(label) do |xobj|
  75. case xobj
  76. when PDF::Reader::FormXObject then
  77. xobj.walk(self)
  78. end
  79. end
  80. end
  81.  
  82. private
  83.  
  84. def internal_show_text(string)
  85. if @state.current_font.nil?
  86. raise PDF::Reader::MalformedPDFError, "current font is invalid"
  87. end
  88. glyphs = @state.current_font.unpack(string)
  89. glyphs.each_with_index do |glyph_code, index|
  90. # paint the current glyph
  91. newx, newy = @state.trm_transform(0,0)
  92. utf8_chars = @state.current_font.to_utf8(glyph_code)
  93.  
  94. # apply to glyph displacment for the current glyph so the next
  95. # glyph will appear in the correct position
  96. glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
  97. th = 1
  98. scaled_glyph_width = glyph_width * @state.font_size * th
  99. unless utf8_chars == SPACE
  100. @characters << {x: newx, y: newy, scaled_font_size: scaled_glyph_width, font_size: @state.font_size, character: utf8_chars}
  101. end
  102. @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
  103. end
  104. end
  105.  
  106. end
  107.  
  108. PDF::Reader.open("WFD_I15_VCQM2D.pdf") do |reader|
  109. reader.pages.each do |page|
  110. receiver = JsonTextReceiver.new
  111. page.walk(receiver)
  112. puts JSON.pretty_generate(receiver.characters)
  113. end
  114. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement