Bakkin Auto Straighten GPU 2023-12-28

#!/usr/bin/env python
# Usage: DEBUG=1 python rotate_angles.py *.png
import os
import sys
import multiprocessing
import subprocess

import numpy as np
import matplotlib.pyplot as plt
import skimage
import cv2
import cv2.cuda
import cv2.cuda as cuda
from scipy import ndimage, signal
from scipy.optimize import linprog
from skimage import transform
from skimage.exposure import rescale_intensity
from skimage.io import imsave
from skimage.transform import radon, rotate
from skimage.color import rgb2gray, rgba2rgb, gray2rgb
import time

MAX_ANGLE = 1.5
ANGLE_STEP = 0.001  # Don't go under .01 if not using GPU
PROMINENCE = 250
MAX_BORDER = 200
IGNORE_BORDER = 150

SIDE_MARGIN = int(os.getenv("SIDE_MARGIN", 270))
TOP_MARGIN = int(os.getenv("TOP_MARGIN", 290))

DEBUG = os.getenv("DEBUG") is not None
ANALYZE_ONLY = os.getenv("ANALYZE_ONLY") is not None
YON_KOMA = os.getenv("YON_KOMA") is not None
NOCROP = os.getenv("NOCROP") is not None
NOROTATE = os.getenv("NOROTATE") is not None
GIMP_PATH = os.getenv("GIMP_PATH", "C:/Program Files/GIMP 2/bin/gimp-console-2.10.exe")
USE_GPU = True


def smart_remove_margin(radon_slice, img_height, img_width):
    """ Try to find the page margins, and remove them from calculations
        Margins must be >70% img width apart, >10% img height prominence,
        and at least 90% the max white value on the radon transform slice.
        FOR THIS TO WORK, SCANNER LID MUST BE BLACK!."""
    margin_peaks, _ = signal.find_peaks(
        radon_slice, distance=0.7 * img_width,
        prominence=0.1 * img_height, height=0.9 * max(radon_slice))

    if margin_peaks.size == 0:
        return radon_slice

    elif margin_peaks.size == 1:
        if margin_peaks[0] < (radon_slice.size / 2):
            return radon_slice[margin_peaks[0]:]
        else:
            return radon_slice[: margin_peaks[0]]

    else:
        return radon_slice[margin_peaks[0]: margin_peaks[-1]]


def rotate_img_cuda(img: cuda.GpuMat, angle):
    angle = np.deg2rad(angle)
    (w, h) = img.size()
    center = w // 2
    cos_a, sin_a = np.cos(angle), np.sin(angle)
    R = np.array([[cos_a, sin_a, -center * (cos_a + sin_a - 1)],
                  [-sin_a, cos_a, -center * (cos_a - sin_a - 1)],
                  [0, 0, 1]])

    return cv2.cuda.warpPerspective(src=img,
                                    dsize=(w, h),
                                    M=R,
                                    flags=cv2.INTER_LINEAR)


def _find_variances(img, angles):
    """
    Find variance of second derivative of radon transform at specified angles

    Parameters
    ----------
    img :
        a single channel float32 numpy array
    angles :
        list of angles to try

    Returns
    -------
    variances_gpu :
        Variance of second derivative of radon transform at specified angles.
    """

    # This bit is copied from skimage.transform.radon
    # Basically rotate an image then finding its projection onto one dimension (aka summing up the columns)
    diagonal = int(np.ceil(np.sqrt(2) * max(img.shape)))
    pad = [diagonal - s for s in img.shape]
    new_center = [(s + p) // 2 for s, p in zip(img.shape, pad)]
    old_center = [s // 2 for s in img.shape]
    pad_before = [nc - oc for oc, nc in zip(old_center, new_center)]
    pad_width = [(pb, p - pb) for pb, p in zip(pad_before, pad)]

    # Upload image to GPU
    img_gpu = cv2.cuda.GpuMat()
    img_gpu.upload(img)

    # Pad image so no pixels are lost when rotating
    img_gpu = cv2.cuda.copyMakeBorder(src=img_gpu,
                                      top=pad_width[0][0],
                                      bottom=pad_width[0][1],
                                      left=pad_width[1][0],
                                      right=pad_width[1][1],
                                      borderType=cv2.BORDER_CONSTANT,
                                      value=[0])

    # Output variable
    variances_gpu = np.zeros(len(angles), dtype=img.dtype)

    # rotate images and reduce
    for i, angle in enumerate(angles):
        img_gpu_rot = rotate_img_cuda(img_gpu, -angle)

        # Sum up columns into 1D vector
        img_gpu_rot_sum = cv2.cuda.reduce(img_gpu_rot, dim=0, reduceOp=cv2.REDUCE_SUM)

        # Find laplacian sum vector
        laplace_filter = cv2.cuda.createLaplacianFilter(srcType=img_gpu_rot_sum.type(),
                                                        dstType=img_gpu_rot_sum.type())
        img_gpu_rot_sum_laplaced = laplace_filter.apply(img_gpu_rot_sum)

        # Find mean and std deviation of laplaced vector
        img_gpu_rot_sum_laplaced_stddev = cv2.cuda.meanStdDev(img_gpu_rot_sum_laplaced)

        # Variance = std dev ^ 2
        variances_gpu[i] = img_gpu_rot_sum_laplaced_stddev.download()[0][1] ** 2

    return variances_gpu


def _find_best_angle_in_range(img, angles, filename=None):
    img = skimage.util.invert(img)
    img = img[IGNORE_BORDER:-IGNORE_BORDER, IGNORE_BORDER:-IGNORE_BORDER]

    if USE_GPU:
        variances = _find_variances(img, angles)
    else:
        # Take the radon transformation of the image against those angles.
        # Note circle=False: we don't want any image cropping
        radons = radon(img, angles, circle=False, preserve_range=False)

        # Take the second derivative of the radon transform for each angle
        laplacians = [ndimage.laplace(radons[:, i])
                      for i in range(angles.size)]

        # Turns out chopping off the edges works too. Commenting "smart" way out.
        # h, w = img.shape
        # laplacians = [ndimage.laplace(smart_remove_margin(radons[:, i], h, w))
        #              for i in range(angles.size)]

        # Find the variance of the second derivative of each angle. This is
        # a single number showing how "straight" some edges in the image are.
        variances = np.array([np.var(laplacian) for laplacian in laplacians])

    # Find the peaks in those variance values among the angles. Ideally
    # there should be one peak; but things are not ideal. There are generally
    # two more peaks: one alinging the page border (which might not be the
    # panel borders, and one at 0 because the image is sharpest not rotated.
    # For tankoubon pages, there is normally only one sharp page border, and
    # this peak can generally be filtered with prominence.
    peaks = np.array([])
    prominence = PROMINENCE
    while peaks.size == 0 and prominence >= 10:
        peaks, _ = signal.find_peaks(variances, distance=3,
                                     prominence=prominence)
        prominence /= 2

    # The pixel grid peak, if present along with other peaks, is removed.
    peak_at_zero = [i for i, idx in enumerate(peaks)
                    if idx == int((angles.size - 1) / 2)]
    if peak_at_zero and peaks.size > 1:
        peaks = np.delete(peaks, peak_at_zero[0])

    # Debugging plots
    if DEBUG and filename:
        plt.clf()
        plt.plot(angles, variances)
        plt.plot(angles[peaks], variances[peaks], "xm")
        plt.grid(visible=True, which="both")
        [plt.annotate(f"{angle:.3f}\u00b0", (angle, value),
                      textcoords="offset points", xytext=(0, 5), ha="center")
         for angle, value in zip(angles[peaks], variances[peaks])]
        plt.savefig(filename + "-straightness.png")

    # The final angle to rotate is then returned.
    if peaks.size > 0:
        best_peak = max(peaks, key=lambda i: variances[i])
        best_angle = angles[best_peak]
        alternatives = angles[[pk for pk in peaks if pk != best_peak]].tolist()
    else:
        best_angle = 0
        alternatives = []

    if DEBUG:
        print(f"> {filename}: {angles[peaks]} - "
              f"{variances[peaks].round(decimals=2)}",
              file=sys.stderr)
    return best_angle, alternatives


def find_best_angle(img, filename=None):
    if NOROTATE:
        return 0, [], 0, 0

    # List all the possible rotation angles to be tried
    angles = np.arange(-MAX_ANGLE, MAX_ANGLE + ANGLE_STEP, ANGLE_STEP)
    angles = angles.round(decimals=3)
    rotate_angle, alts = _find_best_angle_in_range(img, angles, filename)

    # Do that again, but for angles around 90 degrees
    angles = np.arange(90 - MAX_ANGLE, 90 + MAX_ANGLE + ANGLE_STEP, ANGLE_STEP)
    angles = angles.round(decimals=3)
    skew_angle, _ = _find_best_angle_in_range(img, angles, f"{filename}-skew")
    shear_angle = skew_angle - 90 - rotate_angle

    # Find how many pixels to shear transform. GIMP doesn't take an angle.
    img = rotate(img, -rotate_angle, resize=True, order=3)
    shear_pixels = img.shape[1] * np.tan(shear_angle * np.pi / 180)

    if DEBUG:
        print(f"> {filename} shear_pixels: {shear_pixels}")
    return rotate_angle, alts, shear_angle, shear_pixels


def find_constraint_matrices_old(convex_polygon, R=0.7):
    X = convex_polygon[:, 0]
    Y = convex_polygon[:, 1]
    A0 = np.column_stack((np.roll(Y, -1) - Y, X - np.roll(X, -1)))
    B0 = np.sum(np.multiply(A0, convex_polygon), axis=1).reshape(A0.shape[0], 1)

    A1 = (A0 * np.matrix([[0, 0, R, R], [0, 1, 0, 1]]))
    A1 = np.reshape(A1, (A1.size, 1))

    A0 = np.column_stack((np.repeat(A0, 4, axis=0), A1))
    B0 = np.repeat(B0, 4, axis=0)
    return A0, B0


def find_constraint_matrices(convex_polygon, R=0.7):
    """
    Gets constraint matrices for a linear programming problem that finds the
    largest rectangle with aspect ratio width:height = R:1 that fits inside a convex
    polygon.

    Parameters
    ----------
    convex_polygon :
        a (N x 2) numpy array which contains points describing a convex polygon.
        The Directio must be counter-clockwise
    R :
        Ratio of width to height of rectangle to find

    Returns
    -------
    A :
        Constraint matrix A_ub
    b :
        Constraint vector b_ub
    """
    NUM_VARIABLES = 3  # x, y, and h
    CPS = 4  # Constraints Per Segment, one for each point of the rectangle

    num_points = len(convex_polygon)  # number of points in convex hull polygon
    X = convex_polygon[:, 0]  # list of x coordinates
    Y = convex_polygon[:, 1]  # list of y coordinates

    y_delta = np.roll(Y, -1) - Y  # y_{n+1} - y_{n}
    x_delta = np.roll(X, -1) - X  # x_{n+1} - x_{n}

    # Create vector b
    b = y_delta * X - x_delta * Y
    b = np.repeat(b, CPS)  # replicate 3 times each value in b (4 total copies)
    b = b.reshape((-1, 1))  # Convert B into column vector

    # Create array A
    A = np.zeros((num_points * CPS, NUM_VARIABLES))
    A[:, 0] = np.repeat(y_delta, CPS)  # The x coefficient is the same for all 4 points
    A[:, 1] = np.repeat(-1 * x_delta, CPS)  # The y coefficient is the same for all 4 points
    A[1::CPS, 2] = -1 * x_delta  # stride index starting at 1, constraints for top left point
    A[2::CPS, 2] = R * y_delta  # constraints for bottom right point
    A[3::CPS, 2] = R * y_delta - x_delta  # constraints for top right point
    return A, b


def _find_crop_using_page_borders(img, filename=None):
    img_shape_orig = img.shape
    img_area = img.shape[0] * img.shape[1]
    img = skimage.img_as_ubyte(img)
    if DEBUG and filename:
        img_orig = img.copy()

    # Roughly level the page
    black_pt, white_pt = np.percentile(img, (20, 50))
    img = rescale_intensity(img, in_range=(black_pt, white_pt))
    if DEBUG:
        print(f"> levels: ({black_pt}, {white_pt})")

    # Surface-blur to smooth out noises
    img = cv2.bilateralFilter(img, 9, 150, 150)

    # Convert to binary image
    # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    #                            cv2.THRESH_BINARY, 115, 4)
    _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
    img = cv2.medianBlur(img, 11)

    # Extract contours from edges. The largest of contours will be our page
    # THIS DOESN'T WORK FOR MANY PAGES WITH ARTWORK TO THE EDGES
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL,
                                   cv2.CHAIN_APPROX_SIMPLE)

    # Approximate a polygon (most of the time a quadrilateral) from the contour
    # max epsilon (error distance) is 0.01% of image width (basically w/e)
    polys = [cv2.approxPolyDP(contour, 0.0001 * img.shape[1], closed=True)
             for contour in contours]
    page_poly = max(polys, key=lambda p: cv2.contourArea(cv2.convexHull(p)))
    convex_poly = cv2.convexHull(page_poly)
    poly_area = cv2.contourArea(convex_poly)

    if DEBUG and filename:
        img_c = gray2rgb(img_orig)
        img_c = cv2.drawContours(img_c, contours, -1, (255, 0, 255), 1)
        img_c = cv2.drawContours(img_c, [page_poly], -1, (0, 0, 255), 5)
        img_c = cv2.drawContours(img_c, [convex_poly], -1, (255, 0, 0), 6)
        imsave(f"{filename}-crop.jpg", img_c)

    if DEBUG:
        print(f"> poly_area %: {100 * poly_area / img_area}%")

    # Only accept if the page area found is > 80% of image size
    if poly_area / img_area < 0.8:
        print(f"NO CROP FOUND FOR {filename}, EDIT MANUALLY.")
        return (0, 0), (img_shape_orig[1], img_shape_orig[0]), False

    # Run the polygon through a linear optimizer to find the max rectangle
    # with fixed 7:10 ratio that fits inside it
    poly_matrix = convex_poly.reshape(convex_poly.shape[0],
                                      convex_poly.shape[2])

    R = 0.7
    A, b = find_constraint_matrices(poly_matrix, R)

    bounds = [(0, img.shape[1]), (0, img.shape[0]), (0, img.shape[0])]
    solution = linprog(np.matrix('0; 0; -1'), A, b, bounds=bounds)
    x, y, h = np.rint(solution.x).astype(int)
    w = int(np.rint(h * R))

    if DEBUG and filename:
        img_c = cv2.rectangle(img_c, (x, y), (x + w, y + h), (0, 255, 0), 10)
        imsave(f"{filename}-crop.jpg", img_c)

    return (x, y), (w, h), True


def _find_crop_4koma(img, filename=None):
    h_grad = np.gradient(np.sum(img, axis=0))
    v_grad = np.gradient(np.sum(img, axis=1))
    find_peakz = lambda x, height: \
        signal.find_peaks(x, height=height, distance=3)[0]

    try:
        panel_left = [x for x in find_peakz(-h_grad, 200) if x > MAX_BORDER][0]
        panel_right = [x for x in find_peakz(h_grad, 200)
                       if x < img.shape[1] - MAX_BORDER][-1]
        panel_top = [x for x in find_peakz(-v_grad, 200) if x > MAX_BORDER][0]
        panel_bottom = [x for x in find_peakz(v_grad, 200)
                        if x < img.shape[0] - MAX_BORDER][-1]
    except:
        print(f"{filename} crop: failed to find panel borders")
        return (0, 0), (img.shape[1], img.shape[0]), False

    if DEBUG:
        print(f"> {filename} panels: ({panel_top}, {panel_left}, "
              f"{panel_bottom}, {panel_right})")

    # if panel_left > MAX_BORDER or panel_top > MAX_BORDER or\
    #    panel_bottom < img.shape[1] - MAX_BORDER or\
    #    panel_right < img.shape[0] - MAX_BORDER:
    #    print(f"{filename} crop: panel borders don't look right...")
    #    return (0, 0), (img.shape[1], img.shape[0])

    crop_left = panel_left - SIDE_MARGIN
    crop_right = panel_right + SIDE_MARGIN
    # v_center = (panel_bottom + panel_top) / 2
    crop_width = crop_right - crop_left
    crop_height = crop_width * 10 / 7
    # crop_top = v_center - (crop_height / 2)
    crop_top = panel_top - TOP_MARGIN

    if DEBUG and filename:
        img_c = gray2rgb(skimage.img_as_ubyte(img))
        panels = np.array([[panel_left, panel_top],
                           [panel_left, panel_bottom],
                           [panel_right, panel_bottom],
                           [panel_right, panel_top]])
        img_c = cv2.drawContours(img_c, [panels], -1, (255, 0, 0), 6)
        imsave(f"{filename}-crop.jpg", img_c)

    return (crop_left, int(round(crop_top))), \
           (crop_width, int(round(crop_height))), True


def find_crop(img, rotate_angle, shear_angle, filename=None):
    if abs(rotate_angle) > 0:
        img = rotate(img, -rotate_angle, resize=True, order=3)
        tan_shear_angle = np.tan(-shear_angle * np.pi / 180)
        transform_matrix = np.array(
            [[1, 0, 0],
             [tan_shear_angle, 1, -img.shape[1] * tan_shear_angle / 2],
             [0, 0, 1]])
        img = transform.warp(img, inverse_map=transform_matrix, order=3)
    if NOCROP:
        return (0, 0), (img.shape[1], img.shape[0]), False

    if DEBUG and filename:
        img_c = gray2rgb(skimage.img_as_ubyte(img))
        imsave(f"{filename}-crop.jpg", img_c)

    if YON_KOMA:
        return _find_crop_4koma(img, filename)
    return _find_crop_using_page_borders(img, filename)


GIMP_BATCH = """
(let* ((input-file "{infile}")
       (output-file "{outfile}")
       (rotates '({rotates}))
       (shear-pixels {shear})
       (image (car (gimp-file-load RUN-NONINTERACTIVE input-file input-file)))
       (orig-layer (car (gimp-image-get-active-layer image)))
       (rotate-layer (lambda (theta)
            (let ((layer-name (string-append (number->string theta) " deg"))
                  (new-layer (car (gimp-layer-copy orig-layer FALSE)))
                  (theta-rads (* theta 0.0174533)))
                (gimp-item-set-name new-layer layer-name)
                (gimp-image-insert-layer image new-layer 0 -1)
                (gimp-context-set-interpolation INTERPOLATION-NOHALO)
                (gimp-item-transform-rotate new-layer theta-rads TRUE 0 0))))
       (rotated-layers (reverse (map rotate-layer rotates)))
       (top-layer (if (null? rotated-layers) orig-layer (caar rotated-layers)))
       (width (car (gimp-drawable-width top-layer)))
       (height (car (gimp-drawable-height top-layer)))
       (offset-x (car (gimp-drawable-offsets top-layer)))
       (offset-y (cadr (gimp-drawable-offsets top-layer))))
    (if (not (null? rotated-layers))
        (gimp-image-remove-layer image orig-layer)
        '())
    (if (= shear-pixels 0)
        top-layer
        (car (gimp-item-transform-shear
                top-layer ORIENTATION-VERTICAL shear-pixels)))
    (gimp-image-resize image width height (- offset-x) (- offset-y))
    (gimp-image-resize image {width} {height} (- {offset_x}) (- {offset_y}))
    (file-psd-save RUN-NONINTERACTIVE image top-layer
                   output-file output-file 1 0)
    (gimp-image-delete image))
"""


def rotate_and_crop_with_gimp(filename, rotates, shear, crop_offset, crop_size):
    outfile = f"{os.path.splitext(filename)[0]}.psd"
    str_rotates = " ".join([str(a) for a in rotates if abs(a) > 0.0001])
    gimp_batch_cmd = GIMP_BATCH.format(
        infile=filename, outfile=outfile, rotates=str_rotates, shear=shear,
        offset_x=crop_offset[0], offset_y=crop_offset[1],
        width=crop_size[0], height=crop_size[1])
    gimp_cmd = [GIMP_PATH, "-i", "-d", "-f", "-n",
                "-b", gimp_batch_cmd, "-b", "(gimp-quit 0)"]
    subprocess.run(gimp_cmd)


def process_file(filename):
    start = time.time()
    # Read image file and convert to grayscale
    img = plt.imread(filename)
    if img.ndim == 3 and img.shape[2] == 4:
        img = rgba2rgb(img)
    if img.ndim == 3 and img.shape[2] == 3:
        img = rgb2gray(img)

    rotate_angle, alts, shear_angle, shear_pixels = \
        find_best_angle(img, filename)

    crop_offset, crop_size, crop_found = find_crop(img, rotate_angle, shear_angle, filename)

    print(f"{filename}: rotate {rotate_angle:.3f}\u00b0 clockwise." +
          (f" Alternatives: {alts}." if alts else "") +
          f" Shear: {shear_angle:.3f}\u00b0" +
          f" Crop: {crop_offset}, {crop_size}.")

    if not ANALYZE_ONLY:
        rotate_and_crop_with_gimp(
            filename, alts + [rotate_angle], shear_pixels,
            crop_offset, crop_size)

    runtime = time.time() - start
    print(f"Runtime for {filename}: {runtime}s")
    return rotate_angle, alts, shear_angle, crop_offset, crop_size, crop_found


if __name__ == "__main__":
    files = sys.argv[1:]

    # IMO it's easier to just edit this list instead of passing arguments.
    files = ["work/3.png",
             "work/4.png",
             "work/5.png",
             "work/6.png",
             "work/7.png",
             "work/8.png",
             "work/9.png",
             "work/10.png",
             "work/11.png",
             "work/12.png",
             "work/13.png",
             "work/14.png",
             "work/15.png",
             "work/16.png",
             "work/17.png",
             "work/18.png"]

    print(f"> files: {files}, ANALYZE_ONLY={ANALYZE_ONLY}, "
          f"YON_KOMA={YON_KOMA}, NOROTATE={NOROTATE}, "
          f"SIDE_MARGIN={SIDE_MARGIN}, TOP_MARGIN={TOP_MARGIN}, "
          f"NOCROP={NOCROP}, GIMP_PATH={GIMP_PATH}")

    prog_start = time.time()

    if USE_GPU:
        # I think GPU is getting thrashed when using multicore so just do it sequentially.
        results = []
        for file in files:
            results.append(process_file(file))
        results = list(zip(files, results))
    else:
        with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
            results = list(zip(files, pool.map(process_file, files)))

    total_runtime = time.time() - prog_start
    print(f"{total_runtime=}s")

    [print(f"{filename}: rotate {rotate:.3f}\u00b0 clockwise." +
           (f" Alternatives: {alts}." if alts else "") +
           f" Shear: {shear:.3f}\u00b0" +
           f" Crop: {crop_offset}, {crop_size}." +
           f" Crop Found: {crop_found}.")
     for filename, (rotate, alts, shear, crop_offset, crop_size, crop_found)
     in results]

    print(f"Manual crop required:")
    for result in results:
        if result[1][5] == False:
            print(f"{result[0]}")

    print("done")

    # if not ANALYZE_ONLY:
    #    args = [(filename, alts + [rotate], shear, crop_offset, crop_size)
    #            for filename, (rotate, alts, shear, crop_offset, crop_size)
    #            in results]
    #    pool.starmap(rotate_and_crop_with_gimp, args)