Advertisement
Hellerick_Ferlibay

Webcomic cutter.py

Oct 18th, 2016
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.55 KB | None | 0 0
  1. import os, re
  2. from PIL import Image
  3.  
  4.  
  5. def moreBreaks(pic, low_limit, high_limit):
  6. ##    print('== More breaks ==')
  7. ##    print(pic, low_limit, high_limit)
  8.     new_breaks = []
  9.     pic = pic.convert('L')
  10.     lines = {}
  11.     for y in range(low_limit, high_limit+1):
  12.         lines[y] = pic.crop((0,y,pic.size[0],y+1)).getdata()
  13.     for y in range(low_limit, high_limit):
  14.         if max(lines[y])-min(lines[y]) <= 5:
  15.             new_breaks = new_breaks + [y]
  16.         if sum([abs(lines[y][x]-lines[y+1][x]) for x in range(pic.size[0])])/pic.size[0]>96:
  17.             new_breaks = new_breaks + [y]
  18.     accepted_breaks = []
  19.     if len(new_breaks)>0:
  20.         y = min(new_breaks)
  21.         while y < high_limit:
  22.             d = 0
  23.             while y+d in new_breaks: d += 1
  24.             accepted_breaks = accepted_breaks + [y + d//2]
  25.             y = min([high_limit]+[line for line in new_breaks if line > y+d])
  26.     else:
  27.         print('No new breaks found')
  28. ##    print('New breaks count:', len(new_breaks))
  29. ##    print('Accepted breaks count:', len(accepted_breaks))
  30. ##    print('Accepted breaks:', accepted_breaks)
  31. ##    import pdb; pdb.set_trace()
  32.     return accepted_breaks
  33.  
  34.  
  35. def cutPic(pic_path, rename=True):
  36.     exceedingSizeWarning = '-ExceedingSize'
  37.     prepaginizedWarning = '-Prepaginized'
  38.     page_ratio = 1.6
  39.     with Image.open(pic_path) as pic:
  40.         print()
  41.         print('File:', pic_path)
  42.         column = list(pic.convert('L').resize(
  43.             (1,pic.size[1]),
  44.             Image.ANTIALIAS).getdata())
  45.         max_height = page_ratio * pic.size[0]
  46.         print('Picture size:', pic.size)
  47.         if pic.size[1]<=max_height or (prepaginizedWarning in pic_path) or (exceedingSizeWarning in pic_path):
  48.             print('Procession not needed.')
  49.             return None
  50.         print('Max page height:', max_height)
  51. ##        blacks = [i for i, l in enumerate(column) if l == 0]
  52. ##        whites = [i for i, l in enumerate(column) if l == 255]
  53.         breaks = []
  54.         i = 0
  55.         while i<len(column):
  56.             if column[i] in [0,255]:
  57.                 j = 0
  58.                 while i+j<len(column) and (column[i] == column[i+j]):
  59.                     j += 1
  60.                 breaks = breaks + [i+j//2]
  61.                 i += j
  62.             else:
  63.                 i += 1
  64.         breaks = [0] + breaks + [len(column)]
  65.         additional_breaks = []
  66.         print('Breaks:',breaks)
  67.         for i,b in enumerate(breaks[:-1]):
  68.             if breaks[i+1]-breaks[i] > max_height*1.5:
  69.                 additional_breaks = additional_breaks + moreBreaks(pic,breaks[i],breaks[i+1])
  70.         if len(breaks)==2:
  71.             additional_breaks = additional_breaks + moreBreaks(pic,*breaks)
  72. ##        import pdb; pdb.set_trace()
  73.         breaks = sorted(breaks + additional_breaks)
  74.         print('Breaks:', breaks)
  75.         page_breaks = [0]
  76.         while page_breaks[-1]<len(column):
  77. ##            import pdb; pdb.set_trace()
  78.             new_page_break = max([page_breaks[-1]]+[b for b in breaks if b < page_breaks[-1]+max_height])
  79.             if new_page_break == page_breaks[-1]:
  80.                 new_page_break = min([b for b in breaks if b >= page_breaks[-1]+max_height])
  81.             page_breaks = page_breaks + [new_page_break]
  82. ##        print('Page breaks:', page_breaks)
  83.         for i in range(len(page_breaks)-1):
  84.             page_pic = pic.crop([
  85.                     0,
  86.                     page_breaks[i],
  87.                     pic.size[0],
  88.                     page_breaks[i+1]
  89.                 ])
  90.             if page_pic.size[1]>max_height and rename:
  91.                 exceeding_marker = exceedingSizeWarning
  92.             else:
  93.                 exceeding_marker = ''
  94.             page_path = re.sub(r'(.*)\.',r'\1-{:02d}{}.'.format(i, exceeding_marker),pic_path)
  95.             print('Page path:', page_path)
  96.             page_pic.save(page_path)
  97.         if rename: os.rename(
  98.                 pic_path,
  99.                 re.sub(r'(.*)\.',r'\1{}.'.format(prepaginizedWarning),pic_path)
  100.             )
  101. ##    import pdb; pdb.set_trace()
  102.  
  103.  
  104. def cutPicFolder(folder_path):
  105.     filelist = []
  106.     for a,b,c in os.walk(folder_path):
  107.         for file in c:
  108.             if file.lower().endswith(('.png','.jpg','.jpeg')):
  109.                 filelist = filelist + [os.path.join(a, file)]
  110.     filelist.sort()
  111.     for file in filelist: cutPic(file)
  112.  
  113.  
  114. if __name__=="__main__":
  115. ##    cutPic('/home/hellerick/Documents/Akiba/Manga/Ensemble (mangatraders.org)/Selected/0000.jpg', rename=False)
  116.     cutPicFolder('/home/hellerick/Documents/Akiba/Manga/Ensemble (mangatraders.org)/')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement