Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # Usage: DEBUG=1 python rotate_angles.py *.png
- import os
- import sys
- import multiprocessing
- import subprocess
- import numpy as np
- import matplotlib.pyplot as plt
- import skimage
- import cv2
- import cv2.cuda
- import cv2.cuda as cuda
- from scipy import ndimage, signal
- from scipy.optimize import linprog
- from skimage import transform
- from skimage.exposure import rescale_intensity
- from skimage.io import imsave
- from skimage.transform import radon, rotate
- from skimage.color import rgb2gray, rgba2rgb, gray2rgb
- import time
- MAX_ANGLE = 1.5
- ANGLE_STEP = 0.001 # Don't go under .01 if not using GPU
- PROMINENCE = 250
- MAX_BORDER = 200
- IGNORE_BORDER = 150
- SIDE_MARGIN = int(os.getenv("SIDE_MARGIN", 270))
- TOP_MARGIN = int(os.getenv("TOP_MARGIN", 290))
- DEBUG = os.getenv("DEBUG") is not None
- ANALYZE_ONLY = os.getenv("ANALYZE_ONLY") is not None
- YON_KOMA = os.getenv("YON_KOMA") is not None
- NOCROP = os.getenv("NOCROP") is not None
- NOROTATE = os.getenv("NOROTATE") is not None
- GIMP_PATH = os.getenv("GIMP_PATH", "C:/Program Files/GIMP 2/bin/gimp-console-2.10.exe")
- USE_GPU = True
- def smart_remove_margin(radon_slice, img_height, img_width):
- """ Try to find the page margins, and remove them from calculations
- Margins must be >70% img width apart, >10% img height prominence,
- and at least 90% the max white value on the radon transform slice.
- FOR THIS TO WORK, SCANNER LID MUST BE BLACK!."""
- margin_peaks, _ = signal.find_peaks(
- radon_slice, distance=0.7 * img_width,
- prominence=0.1 * img_height, height=0.9 * max(radon_slice))
- if margin_peaks.size == 0:
- return radon_slice
- elif margin_peaks.size == 1:
- if margin_peaks[0] < (radon_slice.size / 2):
- return radon_slice[margin_peaks[0]:]
- else:
- return radon_slice[: margin_peaks[0]]
- else:
- return radon_slice[margin_peaks[0]: margin_peaks[-1]]
- def rotate_img_cuda(img: cuda.GpuMat, angle):
- angle = np.deg2rad(angle)
- (w, h) = img.size()
- center = w // 2
- cos_a, sin_a = np.cos(angle), np.sin(angle)
- R = np.array([[cos_a, sin_a, -center * (cos_a + sin_a - 1)],
- [-sin_a, cos_a, -center * (cos_a - sin_a - 1)],
- [0, 0, 1]])
- return cv2.cuda.warpPerspective(src=img,
- dsize=(w, h),
- M=R,
- flags=cv2.INTER_LINEAR)
- def _find_variances(img, angles):
- """
- Find variance of second derivative of radon transform at specified angles
- Parameters
- ----------
- img :
- a single channel float32 numpy array
- angles :
- list of angles to try
- Returns
- -------
- variances_gpu :
- Variance of second derivative of radon transform at specified angles.
- """
- # This bit is copied from skimage.transform.radon
- # Basically rotate an image then finding its projection onto one dimension (aka summing up the columns)
- diagonal = int(np.ceil(np.sqrt(2) * max(img.shape)))
- pad = [diagonal - s for s in img.shape]
- new_center = [(s + p) // 2 for s, p in zip(img.shape, pad)]
- old_center = [s // 2 for s in img.shape]
- pad_before = [nc - oc for oc, nc in zip(old_center, new_center)]
- pad_width = [(pb, p - pb) for pb, p in zip(pad_before, pad)]
- # Upload image to GPU
- img_gpu = cv2.cuda.GpuMat()
- img_gpu.upload(img)
- # Pad image so no pixels are lost when rotating
- img_gpu = cv2.cuda.copyMakeBorder(src=img_gpu,
- top=pad_width[0][0],
- bottom=pad_width[0][1],
- left=pad_width[1][0],
- right=pad_width[1][1],
- borderType=cv2.BORDER_CONSTANT,
- value=[0])
- # Output variable
- variances_gpu = np.zeros(len(angles), dtype=img.dtype)
- # rotate images and reduce
- for i, angle in enumerate(angles):
- img_gpu_rot = rotate_img_cuda(img_gpu, -angle)
- # Sum up columns into 1D vector
- img_gpu_rot_sum = cv2.cuda.reduce(img_gpu_rot, dim=0, reduceOp=cv2.REDUCE_SUM)
- # Find laplacian sum vector
- laplace_filter = cv2.cuda.createLaplacianFilter(srcType=img_gpu_rot_sum.type(),
- dstType=img_gpu_rot_sum.type())
- img_gpu_rot_sum_laplaced = laplace_filter.apply(img_gpu_rot_sum)
- # Find mean and std deviation of laplaced vector
- img_gpu_rot_sum_laplaced_stddev = cv2.cuda.meanStdDev(img_gpu_rot_sum_laplaced)
- # Variance = std dev ^ 2
- variances_gpu[i] = img_gpu_rot_sum_laplaced_stddev.download()[0][1] ** 2
- return variances_gpu
- def _find_best_angle_in_range(img, angles, filename=None):
- img = skimage.util.invert(img)
- img = img[IGNORE_BORDER:-IGNORE_BORDER, IGNORE_BORDER:-IGNORE_BORDER]
- if USE_GPU:
- variances = _find_variances(img, angles)
- else:
- # Take the radon transformation of the image against those angles.
- # Note circle=False: we don't want any image cropping
- radons = radon(img, angles, circle=False, preserve_range=False)
- # Take the second derivative of the radon transform for each angle
- laplacians = [ndimage.laplace(radons[:, i])
- for i in range(angles.size)]
- # Turns out chopping off the edges works too. Commenting "smart" way out.
- # h, w = img.shape
- # laplacians = [ndimage.laplace(smart_remove_margin(radons[:, i], h, w))
- # for i in range(angles.size)]
- # Find the variance of the second derivative of each angle. This is
- # a single number showing how "straight" some edges in the image are.
- variances = np.array([np.var(laplacian) for laplacian in laplacians])
- # Find the peaks in those variance values among the angles. Ideally
- # there should be one peak; but things are not ideal. There are generally
- # two more peaks: one alinging the page border (which might not be the
- # panel borders, and one at 0 because the image is sharpest not rotated.
- # For tankoubon pages, there is normally only one sharp page border, and
- # this peak can generally be filtered with prominence.
- peaks = np.array([])
- prominence = PROMINENCE
- while peaks.size == 0 and prominence >= 10:
- peaks, _ = signal.find_peaks(variances, distance=3,
- prominence=prominence)
- prominence /= 2
- # The pixel grid peak, if present along with other peaks, is removed.
- peak_at_zero = [i for i, idx in enumerate(peaks)
- if idx == int((angles.size - 1) / 2)]
- if peak_at_zero and peaks.size > 1:
- peaks = np.delete(peaks, peak_at_zero[0])
- # Debugging plots
- if DEBUG and filename:
- plt.clf()
- plt.plot(angles, variances)
- plt.plot(angles[peaks], variances[peaks], "xm")
- plt.grid(visible=True, which="both")
- [plt.annotate(f"{angle:.3f}\u00b0", (angle, value),
- textcoords="offset points", xytext=(0, 5), ha="center")
- for angle, value in zip(angles[peaks], variances[peaks])]
- plt.savefig(filename + "-straightness.png")
- # The final angle to rotate is then returned.
- if peaks.size > 0:
- best_peak = max(peaks, key=lambda i: variances[i])
- best_angle = angles[best_peak]
- alternatives = angles[[pk for pk in peaks if pk != best_peak]].tolist()
- else:
- best_angle = 0
- alternatives = []
- if DEBUG:
- print(f"> {filename}: {angles[peaks]} - "
- f"{variances[peaks].round(decimals=2)}",
- file=sys.stderr)
- return best_angle, alternatives
- def find_best_angle(img, filename=None):
- if NOROTATE:
- return 0, [], 0, 0
- # List all the possible rotation angles to be tried
- angles = np.arange(-MAX_ANGLE, MAX_ANGLE + ANGLE_STEP, ANGLE_STEP)
- angles = angles.round(decimals=3)
- rotate_angle, alts = _find_best_angle_in_range(img, angles, filename)
- # Do that again, but for angles around 90 degrees
- angles = np.arange(90 - MAX_ANGLE, 90 + MAX_ANGLE + ANGLE_STEP, ANGLE_STEP)
- angles = angles.round(decimals=3)
- skew_angle, _ = _find_best_angle_in_range(img, angles, f"{filename}-skew")
- shear_angle = skew_angle - 90 - rotate_angle
- # Find how many pixels to shear transform. GIMP doesn't take an angle.
- img = rotate(img, -rotate_angle, resize=True, order=3)
- shear_pixels = img.shape[1] * np.tan(shear_angle * np.pi / 180)
- if DEBUG:
- print(f"> {filename} shear_pixels: {shear_pixels}")
- return rotate_angle, alts, shear_angle, shear_pixels
- def find_constraint_matrices_old(convex_polygon, R=0.7):
- X = convex_polygon[:, 0]
- Y = convex_polygon[:, 1]
- A0 = np.column_stack((np.roll(Y, -1) - Y, X - np.roll(X, -1)))
- B0 = np.sum(np.multiply(A0, convex_polygon), axis=1).reshape(A0.shape[0], 1)
- A1 = (A0 * np.matrix([[0, 0, R, R], [0, 1, 0, 1]]))
- A1 = np.reshape(A1, (A1.size, 1))
- A0 = np.column_stack((np.repeat(A0, 4, axis=0), A1))
- B0 = np.repeat(B0, 4, axis=0)
- return A0, B0
- def find_constraint_matrices(convex_polygon, R=0.7):
- """
- Gets constraint matrices for a linear programming problem that finds the
- largest rectangle with aspect ratio width:height = R:1 that fits inside a convex
- polygon.
- Parameters
- ----------
- convex_polygon :
- a (N x 2) numpy array which contains points describing a convex polygon.
- The Directio must be counter-clockwise
- R :
- Ratio of width to height of rectangle to find
- Returns
- -------
- A :
- Constraint matrix A_ub
- b :
- Constraint vector b_ub
- """
- NUM_VARIABLES = 3 # x, y, and h
- CPS = 4 # Constraints Per Segment, one for each point of the rectangle
- num_points = len(convex_polygon) # number of points in convex hull polygon
- X = convex_polygon[:, 0] # list of x coordinates
- Y = convex_polygon[:, 1] # list of y coordinates
- y_delta = np.roll(Y, -1) - Y # y_{n+1} - y_{n}
- x_delta = np.roll(X, -1) - X # x_{n+1} - x_{n}
- # Create vector b
- b = y_delta * X - x_delta * Y
- b = np.repeat(b, CPS) # replicate 3 times each value in b (4 total copies)
- b = b.reshape((-1, 1)) # Convert B into column vector
- # Create array A
- A = np.zeros((num_points * CPS, NUM_VARIABLES))
- A[:, 0] = np.repeat(y_delta, CPS) # The x coefficient is the same for all 4 points
- A[:, 1] = np.repeat(-1 * x_delta, CPS) # The y coefficient is the same for all 4 points
- A[1::CPS, 2] = -1 * x_delta # stride index starting at 1, constraints for top left point
- A[2::CPS, 2] = R * y_delta # constraints for bottom right point
- A[3::CPS, 2] = R * y_delta - x_delta # constraints for top right point
- return A, b
- def _find_crop_using_page_borders(img, filename=None):
- img_shape_orig = img.shape
- img_area = img.shape[0] * img.shape[1]
- img = skimage.img_as_ubyte(img)
- if DEBUG and filename:
- img_orig = img.copy()
- # Roughly level the page
- black_pt, white_pt = np.percentile(img, (20, 50))
- img = rescale_intensity(img, in_range=(black_pt, white_pt))
- if DEBUG:
- print(f"> levels: ({black_pt}, {white_pt})")
- # Surface-blur to smooth out noises
- img = cv2.bilateralFilter(img, 9, 150, 150)
- # Convert to binary image
- # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
- # cv2.THRESH_BINARY, 115, 4)
- _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
- img = cv2.medianBlur(img, 11)
- # Extract contours from edges. The largest of contours will be our page
- # THIS DOESN'T WORK FOR MANY PAGES WITH ARTWORK TO THE EDGES
- contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL,
- cv2.CHAIN_APPROX_SIMPLE)
- # Approximate a polygon (most of the time a quadrilateral) from the contour
- # max epsilon (error distance) is 0.01% of image width (basically w/e)
- polys = [cv2.approxPolyDP(contour, 0.0001 * img.shape[1], closed=True)
- for contour in contours]
- page_poly = max(polys, key=lambda p: cv2.contourArea(cv2.convexHull(p)))
- convex_poly = cv2.convexHull(page_poly)
- poly_area = cv2.contourArea(convex_poly)
- if DEBUG and filename:
- img_c = gray2rgb(img_orig)
- img_c = cv2.drawContours(img_c, contours, -1, (255, 0, 255), 1)
- img_c = cv2.drawContours(img_c, [page_poly], -1, (0, 0, 255), 5)
- img_c = cv2.drawContours(img_c, [convex_poly], -1, (255, 0, 0), 6)
- imsave(f"{filename}-crop.jpg", img_c)
- if DEBUG:
- print(f"> poly_area %: {100 * poly_area / img_area}%")
- # Only accept if the page area found is > 80% of image size
- if poly_area / img_area < 0.8:
- print(f"NO CROP FOUND FOR {filename}, EDIT MANUALLY.")
- return (0, 0), (img_shape_orig[1], img_shape_orig[0]), False
- # Run the polygon through a linear optimizer to find the max rectangle
- # with fixed 7:10 ratio that fits inside it
- poly_matrix = convex_poly.reshape(convex_poly.shape[0],
- convex_poly.shape[2])
- R = 0.7
- A, b = find_constraint_matrices(poly_matrix, R)
- bounds = [(0, img.shape[1]), (0, img.shape[0]), (0, img.shape[0])]
- solution = linprog(np.matrix('0; 0; -1'), A, b, bounds=bounds)
- x, y, h = np.rint(solution.x).astype(int)
- w = int(np.rint(h * R))
- if DEBUG and filename:
- img_c = cv2.rectangle(img_c, (x, y), (x + w, y + h), (0, 255, 0), 10)
- imsave(f"{filename}-crop.jpg", img_c)
- return (x, y), (w, h), True
- def _find_crop_4koma(img, filename=None):
- h_grad = np.gradient(np.sum(img, axis=0))
- v_grad = np.gradient(np.sum(img, axis=1))
- find_peakz = lambda x, height: \
- signal.find_peaks(x, height=height, distance=3)[0]
- try:
- panel_left = [x for x in find_peakz(-h_grad, 200) if x > MAX_BORDER][0]
- panel_right = [x for x in find_peakz(h_grad, 200)
- if x < img.shape[1] - MAX_BORDER][-1]
- panel_top = [x for x in find_peakz(-v_grad, 200) if x > MAX_BORDER][0]
- panel_bottom = [x for x in find_peakz(v_grad, 200)
- if x < img.shape[0] - MAX_BORDER][-1]
- except:
- print(f"{filename} crop: failed to find panel borders")
- return (0, 0), (img.shape[1], img.shape[0]), False
- if DEBUG:
- print(f"> {filename} panels: ({panel_top}, {panel_left}, "
- f"{panel_bottom}, {panel_right})")
- # if panel_left > MAX_BORDER or panel_top > MAX_BORDER or\
- # panel_bottom < img.shape[1] - MAX_BORDER or\
- # panel_right < img.shape[0] - MAX_BORDER:
- # print(f"{filename} crop: panel borders don't look right...")
- # return (0, 0), (img.shape[1], img.shape[0])
- crop_left = panel_left - SIDE_MARGIN
- crop_right = panel_right + SIDE_MARGIN
- # v_center = (panel_bottom + panel_top) / 2
- crop_width = crop_right - crop_left
- crop_height = crop_width * 10 / 7
- # crop_top = v_center - (crop_height / 2)
- crop_top = panel_top - TOP_MARGIN
- if DEBUG and filename:
- img_c = gray2rgb(skimage.img_as_ubyte(img))
- panels = np.array([[panel_left, panel_top],
- [panel_left, panel_bottom],
- [panel_right, panel_bottom],
- [panel_right, panel_top]])
- img_c = cv2.drawContours(img_c, [panels], -1, (255, 0, 0), 6)
- imsave(f"{filename}-crop.jpg", img_c)
- return (crop_left, int(round(crop_top))), \
- (crop_width, int(round(crop_height))), True
- def find_crop(img, rotate_angle, shear_angle, filename=None):
- if abs(rotate_angle) > 0:
- img = rotate(img, -rotate_angle, resize=True, order=3)
- tan_shear_angle = np.tan(-shear_angle * np.pi / 180)
- transform_matrix = np.array(
- [[1, 0, 0],
- [tan_shear_angle, 1, -img.shape[1] * tan_shear_angle / 2],
- [0, 0, 1]])
- img = transform.warp(img, inverse_map=transform_matrix, order=3)
- if NOCROP:
- return (0, 0), (img.shape[1], img.shape[0]), False
- if DEBUG and filename:
- img_c = gray2rgb(skimage.img_as_ubyte(img))
- imsave(f"{filename}-crop.jpg", img_c)
- if YON_KOMA:
- return _find_crop_4koma(img, filename)
- return _find_crop_using_page_borders(img, filename)
- GIMP_BATCH = """
- (let* ((input-file "{infile}")
- (output-file "{outfile}")
- (rotates '({rotates}))
- (shear-pixels {shear})
- (image (car (gimp-file-load RUN-NONINTERACTIVE input-file input-file)))
- (orig-layer (car (gimp-image-get-active-layer image)))
- (rotate-layer (lambda (theta)
- (let ((layer-name (string-append (number->string theta) " deg"))
- (new-layer (car (gimp-layer-copy orig-layer FALSE)))
- (theta-rads (* theta 0.0174533)))
- (gimp-item-set-name new-layer layer-name)
- (gimp-image-insert-layer image new-layer 0 -1)
- (gimp-context-set-interpolation INTERPOLATION-NOHALO)
- (gimp-item-transform-rotate new-layer theta-rads TRUE 0 0))))
- (rotated-layers (reverse (map rotate-layer rotates)))
- (top-layer (if (null? rotated-layers) orig-layer (caar rotated-layers)))
- (width (car (gimp-drawable-width top-layer)))
- (height (car (gimp-drawable-height top-layer)))
- (offset-x (car (gimp-drawable-offsets top-layer)))
- (offset-y (cadr (gimp-drawable-offsets top-layer))))
- (if (not (null? rotated-layers))
- (gimp-image-remove-layer image orig-layer)
- '())
- (if (= shear-pixels 0)
- top-layer
- (car (gimp-item-transform-shear
- top-layer ORIENTATION-VERTICAL shear-pixels)))
- (gimp-image-resize image width height (- offset-x) (- offset-y))
- (gimp-image-resize image {width} {height} (- {offset_x}) (- {offset_y}))
- (file-psd-save RUN-NONINTERACTIVE image top-layer
- output-file output-file 1 0)
- (gimp-image-delete image))
- """
- def rotate_and_crop_with_gimp(filename, rotates, shear, crop_offset, crop_size):
- outfile = f"{os.path.splitext(filename)[0]}.psd"
- str_rotates = " ".join([str(a) for a in rotates if abs(a) > 0.0001])
- gimp_batch_cmd = GIMP_BATCH.format(
- infile=filename, outfile=outfile, rotates=str_rotates, shear=shear,
- offset_x=crop_offset[0], offset_y=crop_offset[1],
- width=crop_size[0], height=crop_size[1])
- gimp_cmd = [GIMP_PATH, "-i", "-d", "-f", "-n",
- "-b", gimp_batch_cmd, "-b", "(gimp-quit 0)"]
- subprocess.run(gimp_cmd)
- def process_file(filename):
- start = time.time()
- # Read image file and convert to grayscale
- img = plt.imread(filename)
- if img.ndim == 3 and img.shape[2] == 4:
- img = rgba2rgb(img)
- if img.ndim == 3 and img.shape[2] == 3:
- img = rgb2gray(img)
- rotate_angle, alts, shear_angle, shear_pixels = \
- find_best_angle(img, filename)
- crop_offset, crop_size, crop_found = find_crop(img, rotate_angle, shear_angle, filename)
- print(f"{filename}: rotate {rotate_angle:.3f}\u00b0 clockwise." +
- (f" Alternatives: {alts}." if alts else "") +
- f" Shear: {shear_angle:.3f}\u00b0" +
- f" Crop: {crop_offset}, {crop_size}.")
- if not ANALYZE_ONLY:
- rotate_and_crop_with_gimp(
- filename, alts + [rotate_angle], shear_pixels,
- crop_offset, crop_size)
- runtime = time.time() - start
- print(f"Runtime for {filename}: {runtime}s")
- return rotate_angle, alts, shear_angle, crop_offset, crop_size, crop_found
- if __name__ == "__main__":
- files = sys.argv[1:]
- # IMO it's easier to just edit this list instead of passing arguments.
- files = ["work/3.png",
- "work/4.png",
- "work/5.png",
- "work/6.png",
- "work/7.png",
- "work/8.png",
- "work/9.png",
- "work/10.png",
- "work/11.png",
- "work/12.png",
- "work/13.png",
- "work/14.png",
- "work/15.png",
- "work/16.png",
- "work/17.png",
- "work/18.png"]
- print(f"> files: {files}, ANALYZE_ONLY={ANALYZE_ONLY}, "
- f"YON_KOMA={YON_KOMA}, NOROTATE={NOROTATE}, "
- f"SIDE_MARGIN={SIDE_MARGIN}, TOP_MARGIN={TOP_MARGIN}, "
- f"NOCROP={NOCROP}, GIMP_PATH={GIMP_PATH}")
- prog_start = time.time()
- if USE_GPU:
- # I think GPU is getting thrashed when using multicore so just do it sequentially.
- results = []
- for file in files:
- results.append(process_file(file))
- results = list(zip(files, results))
- else:
- with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
- results = list(zip(files, pool.map(process_file, files)))
- total_runtime = time.time() - prog_start
- print(f"{total_runtime=}s")
- [print(f"{filename}: rotate {rotate:.3f}\u00b0 clockwise." +
- (f" Alternatives: {alts}." if alts else "") +
- f" Shear: {shear:.3f}\u00b0" +
- f" Crop: {crop_offset}, {crop_size}." +
- f" Crop Found: {crop_found}.")
- for filename, (rotate, alts, shear, crop_offset, crop_size, crop_found)
- in results]
- print(f"Manual crop required:")
- for result in results:
- if result[1][5] == False:
- print(f"{result[0]}")
- print("done")
- # if not ANALYZE_ONLY:
- # args = [(filename, alts + [rotate], shear, crop_offset, crop_size)
- # for filename, (rotate, alts, shear, crop_offset, crop_size)
- # in results]
- # pool.starmap(rotate_and_crop_with_gimp, args)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement