Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, re
- from PIL import Image
- def moreBreaks(pic, low_limit, high_limit):
- ## print('== More breaks ==')
- ## print(pic, low_limit, high_limit)
- new_breaks = []
- pic = pic.convert('L')
- lines = {}
- for y in range(low_limit, high_limit+1):
- lines[y] = pic.crop((0,y,pic.size[0],y+1)).getdata()
- for y in range(low_limit, high_limit):
- if max(lines[y])-min(lines[y]) <= 5:
- new_breaks = new_breaks + [y]
- if sum([abs(lines[y][x]-lines[y+1][x]) for x in range(pic.size[0])])/pic.size[0]>96:
- new_breaks = new_breaks + [y]
- accepted_breaks = []
- if len(new_breaks)>0:
- y = min(new_breaks)
- while y < high_limit:
- d = 0
- while y+d in new_breaks: d += 1
- accepted_breaks = accepted_breaks + [y + d//2]
- y = min([high_limit]+[line for line in new_breaks if line > y+d])
- else:
- print('No new breaks found')
- ## print('New breaks count:', len(new_breaks))
- ## print('Accepted breaks count:', len(accepted_breaks))
- ## print('Accepted breaks:', accepted_breaks)
- ## import pdb; pdb.set_trace()
- return accepted_breaks
- def cutPic(pic_path, rename=True):
- exceedingSizeWarning = '-ExceedingSize'
- prepaginizedWarning = '-Prepaginized'
- page_ratio = 1.6
- with Image.open(pic_path) as pic:
- print()
- print('File:', pic_path)
- column = list(pic.convert('L').resize(
- (1,pic.size[1]),
- Image.ANTIALIAS).getdata())
- max_height = page_ratio * pic.size[0]
- print('Picture size:', pic.size)
- if pic.size[1]<=max_height or (prepaginizedWarning in pic_path) or (exceedingSizeWarning in pic_path):
- print('Procession not needed.')
- return None
- print('Max page height:', max_height)
- ## blacks = [i for i, l in enumerate(column) if l == 0]
- ## whites = [i for i, l in enumerate(column) if l == 255]
- breaks = []
- i = 0
- while i<len(column):
- if column[i] in [0,255]:
- j = 0
- while i+j<len(column) and (column[i] == column[i+j]):
- j += 1
- breaks = breaks + [i+j//2]
- i += j
- else:
- i += 1
- breaks = [0] + breaks + [len(column)]
- additional_breaks = []
- print('Breaks:',breaks)
- for i,b in enumerate(breaks[:-1]):
- if breaks[i+1]-breaks[i] > max_height*1.5:
- additional_breaks = additional_breaks + moreBreaks(pic,breaks[i],breaks[i+1])
- if len(breaks)==2:
- additional_breaks = additional_breaks + moreBreaks(pic,*breaks)
- ## import pdb; pdb.set_trace()
- breaks = sorted(breaks + additional_breaks)
- print('Breaks:', breaks)
- page_breaks = [0]
- while page_breaks[-1]<len(column):
- ## import pdb; pdb.set_trace()
- new_page_break = max([page_breaks[-1]]+[b for b in breaks if b < page_breaks[-1]+max_height])
- if new_page_break == page_breaks[-1]:
- new_page_break = min([b for b in breaks if b >= page_breaks[-1]+max_height])
- page_breaks = page_breaks + [new_page_break]
- ## print('Page breaks:', page_breaks)
- for i in range(len(page_breaks)-1):
- page_pic = pic.crop([
- 0,
- page_breaks[i],
- pic.size[0],
- page_breaks[i+1]
- ])
- if page_pic.size[1]>max_height and rename:
- exceeding_marker = exceedingSizeWarning
- else:
- exceeding_marker = ''
- page_path = re.sub(r'(.*)\.',r'\1-{:02d}{}.'.format(i, exceeding_marker),pic_path)
- print('Page path:', page_path)
- page_pic.save(page_path)
- if rename: os.rename(
- pic_path,
- re.sub(r'(.*)\.',r'\1{}.'.format(prepaginizedWarning),pic_path)
- )
- ## import pdb; pdb.set_trace()
- def cutPicFolder(folder_path):
- filelist = []
- for a,b,c in os.walk(folder_path):
- for file in c:
- if file.lower().endswith(('.png','.jpg','.jpeg')):
- filelist = filelist + [os.path.join(a, file)]
- filelist.sort()
- for file in filelist: cutPic(file)
- if __name__=="__main__":
- ## cutPic('/home/hellerick/Documents/Akiba/Manga/Ensemble (mangatraders.org)/Selected/0000.jpg', rename=False)
- cutPicFolder('/home/hellerick/Documents/Akiba/Manga/Ensemble (mangatraders.org)/')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement