Advertisement
Guest User

Untitled

a guest
May 26th, 2019
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.96 KB | None | 0 0
  1. import pdf2image
  2. from PIL import Image
  3. import time
  4. import urllib.request
  5.  
  6.  
  7. def download_lidl1():
  8. url = 'https://media.lidl-flyer.com/cdce4d18-7327-11e9-a340-005056ab0fb6/GAZETKA-OD-13.05-Oferta-wa%C5%BCna-od-13.05-do-15.05-01.pdf'
  9. urllib.request.urlretrieve(url, 'lidl/1/lidl1.pdf')
  10.  
  11. def download_lidl2():
  12. url = 'https://media.lidl-flyer.com/72e91026-6ff7-11e9-a340-005056ab0fb6/KATALOG-OD-13.05-Oferta-wa%C5%BCna-od-13.05-do-18.05-02.pdf'
  13. urllib.request.urlretrieve(url, 'lidl/2/lidl2.pdf')
  14.  
  15.  
  16. #DECLARE CONSTANTS
  17. PDF_PATH_LIDL1 = "lidl/1/lidl1.pdf"
  18. PDF_PATH_LIDL2 = "lidl/2/lidl2.pdf"
  19. DPI = 200
  20. OUTPUT_FOLDER = None
  21. FIRST_PAGE = None
  22. LAST_PAGE = None
  23. FORMAT = 'jpg'
  24. THREAD_COUNT = 1
  25. USERPWD = None
  26. USE_CROPBOX = False
  27. STRICT = False
  28.  
  29. def pdftopil_lidl1():
  30. #This method reads a pdf and converts it into a sequence of images
  31. #PDF_PATH sets the path to the PDF file
  32. #dpi parameter assists in adjusting the resolution of the image
  33. #output_folder parameter sets the path to the folder to which the PIL images can be stored (optional)
  34. #first_page parameter allows you to set a first page to be processed by pdftoppm
  35. #last_page parameter allows you to set a last page to be processed by pdftoppm
  36. #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF)
  37. #thread_count parameter allows you to set how many thread will be used for conversion.
  38. #userpw parameter allows you to set a password to unlock the converted PDF
  39. #use_cropbox parameter allows you to use the crop box instead of the media box when converting
  40. #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError
  41.  
  42. pil_images_lidl1 = pdf2image.convert_from_path(PDF_PATH_LIDL1, dpi=DPI, output_folder=OUTPUT_FOLDER, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
  43. return pil_images_lidl1
  44.  
  45. def pdftopil_lidl2():
  46.  
  47. pil_images_lidl2 = pdf2image.convert_from_path(PDF_PATH_LIDL2, dpi=DPI, output_folder=OUTPUT_FOLDER, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
  48. return pil_images_lidl2
  49.  
  50.  
  51. def save_images_lidl1(pil_images_lidl1):
  52. #This method helps in converting the images in PIL Image file format to the required image format
  53. index = 1
  54. for image in pil_images_lidl1:
  55. image.save("lidl/1/page_" + str(index) + ".jpg")
  56. index += 1
  57.  
  58. def save_images_lidl2(pil_images_lidl2):
  59. index = 1
  60. for image in pil_images_lidl2:
  61. image.save("lidl/2/page_" + str(index) + ".jpg")
  62. print(index)
  63. index += 1
  64.  
  65.  
  66. if __name__ == "__main__":
  67. download_lidl1()
  68. pil_images_lidl1 = pdftopil_lidl1()
  69. save_images_lidl1(pil_images_lidl1)
  70.  
  71. download_lidl2()
  72. pil_images_lidl2 = pdftopil_lidl2()
  73. save_images_lidl2(pil_images_lidl2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement