Advertisement
steve-shambles-2109

PDF JPG X V0.6

Dec 3rd, 2019
1,075
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.89 KB | None | 0 0
  1. """PDF JPG X V0.6
  2.   GUI for jpg extraction from .pdf files
  3.  
  4.   Written for practice by Steve Shambles March 2019
  5.   Updated a little December 2019
  6.   https://stevepython.wordpress.com/
  7.  
  8.   Extraction routine by Ned Batchelder
  9.   https://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html"""
  10.  
  11. import os
  12. from tkinter import Button, filedialog, LabelFrame, Listbox, Tk
  13.  
  14. root = Tk()
  15. root.title('PDF JPG X')
  16.  
  17. def double_clicked(event):
  18.     """Double clicked mouse over on item."""
  19.     ev_w = event.widget
  20.     idx = int(ev_w.curselection()[0])
  21.     value = ev_w.get(idx)
  22.     temp_val = value.replace(' ', '')
  23.     file_path = str(save_dir)+'/'+str(temp_val)+'.jpg'
  24.     os.startfile(file_path)
  25.  
  26.  
  27. def pdf_file():
  28.     """Select a pdf clicked, open filedialog showing only .pdf files."""
  29.     global file_selected
  30.     file_selected = filedialog.askopenfilename(title='Select A PDF file',
  31.                                                filetypes=(('Pdf Files', '*.pdf'),))
  32.  
  33. def get_jpgs():
  34.     """Extract all found .jpgs in pdf file, by Ned Batchelder"""
  35.     global save_dir
  36.  
  37.     with open(file_selected, 'rb') as file:
  38.         pdf = file.read()
  39.  
  40.     # Ask for a save directory for .jpgs
  41.     save_dir = filedialog.askdirectory()
  42.     print(save_dir)
  43.     lst_bx.insert('end', save_dir)
  44.  
  45.     startmark = b'\xff\xd8'
  46.     startfix = 0
  47.     endmark = b'\xff\xd9'
  48.     endfix = 2
  49.     i = 0
  50.  
  51.     njpg = 0
  52.     while True:
  53.         istream = pdf.find(b'stream', i)
  54.         if istream < 0:
  55.             break
  56.         istart = pdf.find(startmark, istream, istream + 20)
  57.         if istart < 0:
  58.             i = istream + 20
  59.             continue
  60.         iend = pdf.find(b'endstream', istart)
  61.         if iend < 0:
  62.             raise Exception('Didnt find end of stream!')
  63.         iend = pdf.find(endmark, iend - 20)
  64.         if iend < 0:
  65.             raise Exception('Didnt find end of JPG!')
  66.  
  67.         istart += startfix
  68.         iend += endfix
  69.         lst_bx.insert('end', 'JPG %d' % (njpg))
  70.         #print('JPG %d from %d to %d' % (njpg, istart, iend))
  71.         jpg = pdf[istart:iend]
  72.         with open(str(save_dir)+'\jpg%d.jpg' % njpg, 'wb') as jpgfile:
  73.             jpgfile.write(jpg)
  74.  
  75.         njpg += 1
  76.         i = iend
  77.  
  78.  
  79. main_frame = LabelFrame(root, text='PDF JPG X. V0.5')
  80. main_frame.grid(padx=5, pady=5)
  81.  
  82. sel_pdf_btn = Button(main_frame, bg='orange', text='Select PDF', command=pdf_file)
  83. sel_pdf_btn.grid(pady=10, padx=10)
  84.  
  85. get_jpgs_btn = Button(main_frame, bg='springgreen', text='  Get jpgs  ', command=get_jpgs)
  86. get_jpgs_btn.grid(pady=10, padx=10, row=0, column=1)
  87.  
  88. lbox_frame = LabelFrame(root, text='Jpgs saved. Dble click to view')
  89. lbox_frame.grid(padx=5, pady=5)
  90.  
  91. lst_bx = Listbox(
  92.     master=lbox_frame,
  93.     selectmode='single',
  94.     width=26,
  95.     height=10,
  96.     fg='black',
  97.     bg='skyblue')
  98.  
  99. lst_bx.grid()
  100. lst_bx.bind('<Double-1>', double_clicked)
  101.  
  102. root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement