Advertisement
Guest User

Untitled

a guest
Mar 28th, 2017
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.09 KB | None | 0 0
  1. require 'google/cloud/vision'
  2.  
  3. module GoogleCloudVision
  4. class OcrPdfService
  5. attr_accessor :pdf_filename
  6.  
  7. def initialize(pdf_filename)
  8. fail "GOOGLE_CLOUD_KEYFILE env variable required" unless ENV['GOOGLE_CLOUD_KEYFILE']
  9. fail "Input must be PDF" unless pdf_filename&.split('.')&.last&.downcase == 'pdf'
  10. self.pdf_filename = pdf_filename
  11. end
  12.  
  13. def perform
  14. [].tap do |pages|
  15. Magick::ImageList.new(self.pdf_filename).each_with_index do |page_img, i|
  16. tempfile = Tempfile.new(%w(image .jpg))
  17. begin
  18. log :writing_image, tempfile.path
  19. page_img.write(tempfile.path)
  20. image = vision.image(tempfile.path)
  21. annotation = vision.annotate(image, document: true)
  22. text = annotation.text.to_s
  23. log self.pdf_filename, i, text
  24. pages << text
  25. ensure
  26. tempfile.close
  27. tempfile.unlink
  28. end
  29. end
  30. end
  31. end
  32.  
  33. private
  34.  
  35. def vision
  36. @vision ||= Google::Cloud::Vision.new(project: ENV['GOOGLE_CLOUD_PROJECT_ID'])
  37. end
  38. end
  39. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement