Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """PDF JPG X V0.6
- GUI for jpg extraction from .pdf files
- Written for practice by Steve Shambles March 2019
- Updated a little December 2019
- https://stevepython.wordpress.com/
- Extraction routine by Ned Batchelder
- https://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html"""
- import os
- from tkinter import Button, filedialog, LabelFrame, Listbox, Tk
- root = Tk()
- root.title('PDF JPG X')
- def double_clicked(event):
- """Double clicked mouse over on item."""
- ev_w = event.widget
- idx = int(ev_w.curselection()[0])
- value = ev_w.get(idx)
- temp_val = value.replace(' ', '')
- file_path = str(save_dir)+'/'+str(temp_val)+'.jpg'
- os.startfile(file_path)
- def pdf_file():
- """Select a pdf clicked, open filedialog showing only .pdf files."""
- global file_selected
- file_selected = filedialog.askopenfilename(title='Select A PDF file',
- filetypes=(('Pdf Files', '*.pdf'),))
- def get_jpgs():
- """Extract all found .jpgs in pdf file, by Ned Batchelder"""
- global save_dir
- with open(file_selected, 'rb') as file:
- pdf = file.read()
- # Ask for a save directory for .jpgs
- save_dir = filedialog.askdirectory()
- print(save_dir)
- lst_bx.insert('end', save_dir)
- startmark = b'\xff\xd8'
- startfix = 0
- endmark = b'\xff\xd9'
- endfix = 2
- i = 0
- njpg = 0
- while True:
- istream = pdf.find(b'stream', i)
- if istream < 0:
- break
- istart = pdf.find(startmark, istream, istream + 20)
- if istart < 0:
- i = istream + 20
- continue
- iend = pdf.find(b'endstream', istart)
- if iend < 0:
- raise Exception('Didnt find end of stream!')
- iend = pdf.find(endmark, iend - 20)
- if iend < 0:
- raise Exception('Didnt find end of JPG!')
- istart += startfix
- iend += endfix
- lst_bx.insert('end', 'JPG %d' % (njpg))
- #print('JPG %d from %d to %d' % (njpg, istart, iend))
- jpg = pdf[istart:iend]
- with open(str(save_dir)+'\jpg%d.jpg' % njpg, 'wb') as jpgfile:
- jpgfile.write(jpg)
- njpg += 1
- i = iend
- main_frame = LabelFrame(root, text='PDF JPG X. V0.5')
- main_frame.grid(padx=5, pady=5)
- sel_pdf_btn = Button(main_frame, bg='orange', text='Select PDF', command=pdf_file)
- sel_pdf_btn.grid(pady=10, padx=10)
- get_jpgs_btn = Button(main_frame, bg='springgreen', text=' Get jpgs ', command=get_jpgs)
- get_jpgs_btn.grid(pady=10, padx=10, row=0, column=1)
- lbox_frame = LabelFrame(root, text='Jpgs saved. Dble click to view')
- lbox_frame.grid(padx=5, pady=5)
- lst_bx = Listbox(
- master=lbox_frame,
- selectmode='single',
- width=26,
- height=10,
- fg='black',
- bg='skyblue')
- lst_bx.grid()
- lst_bx.bind('<Double-1>', double_clicked)
- root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement