perspective_control.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""This program demonstrates how perspective control could work in Lensfun.  It
must be converted to Lensfun C++ code of course.  However, its structure is
already very similar to how Lensfun is structured.

Usage:
    python3 perspective_control.py <image files>

Every image file must have a sidecar JSON file with the extension .json with
the following content:

[
    18,
    1.534,
    6,
    [   8,   59,  289,  229,   8,  289],
    [ 188,  154,  187,  154, 188,  187]
]

Here, 18 is the focal length in mm, 1.534 is the crop factor, 6 is a scaling
parameter, the first array are the x values of the control points, and the
second array are the y values.  The (x, y) values must be image pixel
coordinates.  There are 4, 6, or 8 points allowed, see
`initialize_perspective_correction` below.

"""

import sys, subprocess, os, array, json, multiprocessing, tempfile
from math import sin, cos, tan, atan, sqrt, copysign, acos, log
from math import pi as π

def read_ppm(input_file, read_data=True):
    def read_token(skip_trailing_whitespace=True):
        nonlocal current_character
        token = b""
        while current_character:
            if current_character == b"#":
                while current_character and current_character not in b"\r\n":
                    current_character = input_file.read(1)
            elif current_character.isspace():
                if skip_trailing_whitespace:
                    while current_character.isspace():
                        current_character = input_file.read(1)
                return token
            else:
                token += current_character
            current_character = input_file.read(1)
    state = "start"
    current_character = input_file.read(1)
    magic_number = read_token()
    assert magic_number == b"P6"
    width = int(read_token())
    height = int(read_token())
    max_value = int(read_token(skip_trailing_whitespace=False))
    assert max_value == 255
    if read_data:
        image_data = array.array("B")
        image_data.fromfile(input_file, width * height * 3)
        image_data.byteswap()
        return image_data, width, height
    else:
        return width, height

def read_image_file(filepath):
    convert_process = subprocess.Popen(
        ["convert", filepath, "-set", "colorspace", "RGB", "ppm:-"], stderr=open(os.devnull, "w"), stdout=subprocess.PIPE)
    return read_ppm(convert_process.stdout)

def write_image_file(image_data, width, height, filepath):
    image_data.byteswap()
    with tempfile.NamedTemporaryFile(delete=False) as outfile:
        outfile.write("P6\n{} {}\n255\n".format(width, height).encode("ascii") + image_data.tobytes())
    subprocess.call(["convert", outfile.name, "-set", "colorspace", "RGB", filepath])
    image_data.byteswap()


def intersection(x1, y1, x2, y2, x3, y3, x4, y4):
    A = x1 * y2 - y1 * x2
    B = x3 * y4 - y3 * x4
    C = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)

    numerator_x = (A * (x3 - x4) - B * (x1 - x2))
    numerator_y = (A * (y3 - y4) - B * (y1 - y2))
    try:
        return numerator_x / C, numerator_y / C
    except ZeroDivisionError:
        return math.copysign(float("inf"), numerator_x), math.copysign(float("inf"), numerator_y)


# In the following, I refer to these two rotation matrices: (See
# <http://en.wikipedia.org/wiki/Rotation_matrix#In_three_dimensions>.)
#
#         ⎛ 1     0         0   ⎞
# Rₓ(ϑ) = ⎜ 0   cos ϑ   - sin ϑ ⎟
#         ⎝ 0   sin ϑ     cos ϑ ⎠
#
#          ⎛  cos ϑ   0   sin ϑ ⎞
# R_y(ϑ) = ⎜   0      1    0    ⎟
#          ⎝- sin ϑ   0   cos ϑ ⎠
#
#          ⎛ cos ϑ   - sin ϑ  0 ⎞
# R_z(ϑ) = ⎜ sin ϑ     cos ϑ  0 ⎟
#          ⎝   0         0    1 ⎠


def rotate_ρ_δ(ρ, δ, x, y, z):
    # This matrix is: Rₓ(δ) · R_y(ρ)
    A11, A12, A13 = cos(ρ), 0, sin(ρ)
    A21, A22, A23 = sin(ρ) * sin(δ), cos(δ), - cos(ρ) * sin(δ)
    A31, A32, A33 = - sin(ρ) * cos(δ), sin(δ), cos(ρ) * cos(δ)
    return \
        A11 * x + A12 * y + A13 * z, \
        A21 * x + A22 * y + A23 * z, \
        A31 * x + A32 * y + A33 * z

def rotate_ρ_δ_ρh(ρ, δ, ρ_h, x, y, z):
    # This matrix is: R_y(ρₕ) · Rₓ(δ) · R_y(ρ)
    A11, A12, A13 = cos(ρ) * cos(ρ_h) - sin(ρ) * cos(δ) * sin(ρ_h), sin(δ) * sin(ρ_h), sin(ρ) * cos(ρ_h) + cos(ρ) * cos(δ) * sin(ρ_h)
    A21, A22, A23 = sin(ρ) * sin(δ), cos(δ), - cos(ρ) * sin(δ)
    A31, A32, A33 = - cos(ρ) * sin(ρ_h) -  sin(ρ) * cos(δ) * cos(ρ_h), \
                    sin(δ) * cos(ρ_h), \
                    - sin(ρ) * sin(ρ_h) + cos(ρ) * cos(δ) * cos(ρ_h)
    return \
        A11 * x + A12 * y + A13 * z, \
        A21 * x + A22 * y + A23 * z, \
        A31 * x + A32 * y + A33 * z

def calculate_angles(x, y, f, normalized_in_millimeters):
    # Calculate the center of gravity of the control points
    if len(x) == 8:
        center_x = sum(x) / 8
        center_y = sum(y) / 8
    else:
        center_x = sum(x[:4]) / 4
        center_y = sum(y[:4]) / 4

    x_v, y_v = intersection(x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3])
    if len(x) == 8:
        # The problem is over-determined.  I prefer the fourth line over the
        # focal length.  Maybe this is useful in cases where the focal length
        # is not known.
        x_h, y_h = intersection(x[4], y[4], x[5], y[5], x[6], y[6], x[7], y[7])
        try:
            f_normalized = sqrt(- x_h * x_v - y_h * y_v)
        except ValueError:
            # FixMe: Is really always an f given (even if it is inaccurate)?
            f_normalized = f / normalized_in_millimeters
        else:
            print(f_normalized * normalized_in_millimeters, "mm")
    else:
        f_normalized = f / normalized_in_millimeters

    # Calculate vertex in polar coordinates
    ρ = atan(- x_v / f_normalized)
    δ = π / 2 - atan(- y_v / sqrt(x_v**2 + f_normalized**2))
    if rotate_ρ_δ(ρ, δ, center_x, center_y, f_normalized)[2] < 0:
        # We have to move the vertex into the nadir instead of the zenith.
        δ -= π
    a = normalize(x_v - x[0], y_v - y[0])
    b = normalize(x_v - x[2], y_v - y[2])
    c = (a[0] + b[0], a[1] + b[1])
    if abs(c[0]) > abs(c[1]):
        # The first two lines denote *horizontal* lines, the last two
        # *verticals*.  The final rotation is by final_rotation·π/2.
        final_rotation = copysign(1, ρ)
    else:
        final_rotation = 0

    # Calculate angle of intersection of horizontal great circle with equator,
    # after the vertex was moved into the zenith
    if len(x) == 4:
        # This results from assuming that at y = 0, there is a horizontal in
        # the picture.  y = 0 is arbitrary, but then, so is every other value.
        ρ_h = 0
    else:
        z4 = z5 = f_normalized
        x4_, y4_, z4_ = rotate_ρ_δ(ρ, δ, x[4], y[4], z4)
        x5_, y5_, z5_ = rotate_ρ_δ(ρ, δ, x[5], y[5], z5)
        if y4_ == y5_:
            # The horizontal vanishing point is perfectly to the left/right, so
            # no rotation necessary.  ρ_h is undefined if also y4_ == 0
            # (horizontal great circle is on the equator), but we simply set
            # ρ_h = 0 in this case, too.
            ρ_h = 0
        else:
            λ = y4_ / (y4_ - y5_)
            x_h = x4_ + λ * (x5_ - x4_)
            z_h = z4_ + λ * (z5_ - z4_)
            if z_h == 0:
                ρ_h = 0 if x_h > 0 else π
            else:
                ρ_h = π / 2 - atan(x_h / z_h)
            if rotate_ρ_δ_ρh(ρ, δ, ρ_h, center_x, center_y, f_normalized)[2] < 0:
                # We have to move the vertex to the left instead of right
                ρ_h -= π
    return ρ, δ, ρ_h, f_normalized, final_rotation, center_x, center_y

def generate_rotation_matrix(ρ1, δ, ρ2, d):
    """Returns a rotation matrix which combines three rotations.  First, around the
    y axis by ρ₁, then, around the x axis by δ, and finally, around the y axis
    again by ρ₂.
    """
    # We calculate the quaternion by multiplying the three quaternions for the
    # three rotations (in reverse order).  We use quaternions here to be able
    # to apply the d parameter in a reasonable way.
    s_ρ2, c_ρ2 = sin(ρ2/2), cos(ρ2/2)
    s_δ, c_δ = sin(δ/2), cos(δ/2)
    s_ρ1, c_ρ1 = sin(ρ1/2), cos(ρ1/2)
    w = c_ρ2 * c_δ * c_ρ1 - s_ρ2 * c_δ * s_ρ1
    x = c_ρ2 * s_δ * c_ρ1 + s_ρ2 * s_δ * s_ρ1
    y = c_ρ2 * c_δ * s_ρ1 + s_ρ2 * c_δ * c_ρ1
    z = c_ρ2 * s_δ * s_ρ1 - s_ρ2 * s_δ * c_ρ1
    # Now, decompose the quaternion into θ and the axis unit vector.
    θ = 2 * acos(w)
    if θ > π:
        θ -= 2*π
    s_θ = sin(θ/2)
    x, y, z = x / s_θ, y / s_θ, z / s_θ
    # Apply d
    compression = 10
    θ *= d + 1 if d <= 0 else 1 + 1 / compression * log(compression * d + 1)
    if θ > 0.9 * π:
        θ = 0.9 * π
    elif θ < - 0.9 * π:
        θ = - 0.9 * π
    # Compose the quaternion again.
    w = cos(θ/2)
    s_θ = sin(θ/2)
    x, y, z = x * s_θ, y * s_θ, z * s_θ
    # Convert the quaternion to a rotation matrix, see e.g.
    # <https://en.wikipedia.org/wiki/Rotation_matrix#Quaternion>.  This matrix
    # is (if d=0): R_y(ρ2) · Rₓ(δ) · R_y(ρ1)
    A11, A12, A13 = 1 - 2 * y**2 - 2 * z**2,  2 * x * y - 2 * z * w,    2 * x * z + 2 * y * w
    A21, A22, A23 = 2 * x * y + 2 * z * w,    1 - 2 * x**2 - 2 * z**2,  2 * y * z - 2 * x * w
    A31, A32, A33 = 2 * x * z - 2 * y * w,    2 * y * z + 2 * x * w,    1 - 2 * x**2 - 2 * y**2
    return A11, A12, A13, \
           A21, A22, A23, \
           A31, A32, A33

def normalize(x, y):
    norm = sqrt(x**2 + y**2)
    try:
        return x / norm, y / norm
    except ZeroDivisionError:
        return math.copysign(float("inf"), x), math.copysign(float("inf"), y)

class Modifier:

    """First, the coordinate system.  It is right-handed.  The centre of the image
    is the origin of the x-y plane.  x is to the right, y to the bottom, and z
    into the back.  All rotations rotate points in mathematical positive
    direction around the axes.  The axes keep their position (extrinsic
    rotations).

    The general algorithm is as follows: The original image (note that the
    sensor sees everything mirror-inverted, but this is annihilated by the
    back-projection later) is positioned parallely to the x-y plane at z = -
    focal length.  Then, it is central-projected to the full sphere (radius
    equals focal length), so it gets positive z coordinates and can be viewed
    properly from the origin.

    This way, also the vertical vanishing point is on the sphere at (- ρ, 90° -
    δ).  Then, the sphere with the image is rotated around the y axis by ρ so
    that the vanishing point is on the y-z plane with z > 0.  Then, the sphere
    is rotated around the y axis by δ so that the vanishing point is in the
    zenith – where it belongs.

    Next, the intersection of the horizontal control line with the x-z plane is
    determined.  Its rectascension is called 90° - ρₕ.  So, the sphere is
    rotation around the y axis a second time, this time by ρₕ.  Now, the right
    vanishing point truely is to the right.

    The rotated image plane is central-projected back into the sensor plane at
    z = - focal length.  In general, it will be way off the center of the
    image, so that the image must be shifted.  We shift so that the image
    center is a fix point.  If however the image center is not visible in the
    result, or too close to the pole (i.e. magnification > 10), we use the
    center of gravity of the control points instead.

    Finally, the scaling in the center of the image is kept constant.  One can
    finetune with an additinal scaling, but this way, one has a good starting
    point.

    Note that in reality, this whole process is performed backwards because we
    make a lookup in the original picture.  In particular, the rotation by ρ,
    δ, ρₕ becomes - ρₕ, - δ, - ρ.  We still need the forward direction in some
    places.

    And then there is the d parameter which allows finetuning of the
    correction.  It is between -1 and 1, with 0 meaning full correction, 0 no
    correction, and 1 increased rotation by factor 1.25.  For example, if you
    have tilted the camera by 40°, d = -1 means the original tilt of 40°, d = 0
    means no tilt (perfect correction), and d = 1 means a tilt of 10° in the
    opposite direction (over-correction).  This way, one can finetune the slope
    of the lines as well as the aspect ratio.

    """

    def __init__(self, crop_factor, width, height):
        assert width > height
        self.crop_factor, self.width, self.height = crop_factor, width, height
        aspect_ratio = width / height
        aspect_ratio_correction = sqrt(1 + aspect_ratio**2)
        self.normalized_in_millimeters = sqrt(36**2 + 24**2) / 2 / aspect_ratio_correction / self.crop_factor
        self.norm_scale = 2 / (height - 1)
        self.norm_unscale = (height - 1) / 2
        self.center_x = width / height
        self.center_y = 1

    def initialize(self, focal_length):
        self.focal_length = focal_length

    def initialize_perspective_correction(self, x, y, d):
        """Depending on the number of control points given, there are three possible
        modes:

        4 control points: c0 and c1 define one vertical lines, c2 and c3 the other.
        The focal length is used to get the proper aspect ratio.

        6 control points: c0 to c3 like above.  c4 and c5 define a horizontal line.
        The focal length is used to get the proper aspect ratio.

        8 control points: c0 to c5 like above.  c6 and c7 define a second
        horizontal line.  The focal length given is discarded and calculated from
        the control points.

        If the lines constructed from the first four control points are more
        horizontal than vertical, in all of the above, "horizontal" and
        "vertical" need to be swapped.

        All control points must be given as pixel coordinates in the original
        image.  In particular, cropping, rotating, shifting, or scaling must be
        switched off or removed by mapping.  For best precision,
        anti-distortion should have been applied already.  Moreover, fisheye
        images must have been transformed into rectilinear of the same focal
        length.  The control points can be written to a sidecar file because
        they describe the perspective correction universally,
        i.e. independently of all corrections that can be activated (scaling,
        anti-distortion, projection transform, etc).

        FixMe: Maybe a static routine should be offered which takes a modifier,
        and an (x, y) pair, and maps them on x and y coordinates as if only the
        anti-distortion and the geometry transformation to rectilinear was
        active.

        The d parameter is supposed to be offered to the user as a slider.  It
        can take values from -1 to +1.  0 denotes the perfect correction.  -1
        is the unchanged image.  +1 is an increase of the tilting angle by 25%.

        """
        if self.focal_length <= 0 or len(x) not in [4, 6, 8] or len(x) != len(y):
            # Don't add any callback
            return False
        if d <= - 1:
            d = - 1
        elif d >= 1:
            d = 1
        # x, y = x[:4], y[:4]  # For testing only four points
        # x[0:6] = x[0], x[2], x[1], x[3], x[0], x[1]  # For testing horizonal mode
        # y[0:6] = y[0], y[2], y[1], y[3], y[0], y[1]  # For testing horizonal mode
        # x[6:8] = [x[1], x[3]]  # for faking an explicit second horizonal line
        # y[6:8] = [y[1], y[3]]  # for faking an explicit second horizonal line
        x = [value * self.norm_scale - self.center_x for value in x]
        y = [value * self.norm_scale - self.center_y for value in y]

        ρ, δ, ρ_h, f_normalized, final_rotation, center_of_control_points_x, center_of_control_points_y = \
                calculate_angles(x, y, self.focal_length, self.normalized_in_millimeters)

        # Transform center point to get shift
        z = rotate_ρ_δ_ρh(ρ, δ, ρ_h, 0, 0, f_normalized)[2]
        # If the image centre is too much outside, or even at infinity, take the
        # center of gravity of the control points instead.
        new_image_center = "control points center" if z <= 0 or f_normalized / z > 10 else "old image center"

        # Generate a rotation matrix in forward direction, for getting the
        # proper shift of the image center.
        A11, A12, A13, \
        A21, A22, A23, \
        A31, A32, A33 = generate_rotation_matrix(ρ, δ, ρ_h, d)

        if new_image_center == "old image center":
            x, y, z = A13 * f_normalized, A23 * f_normalized, A33 * f_normalized
        elif new_image_center == "control points center":
            x, y, z = A11 * center_of_control_points_x + A12 * center_of_control_points_y + A13 * f_normalized, \
                      A21 * center_of_control_points_x + A22 * center_of_control_points_y + A23 * f_normalized, \
                      A31 * center_of_control_points_x + A32 * center_of_control_points_y + A33 * f_normalized
        if z <= 0:
            return False
        # This is the mapping scale in the image center
        mapping_scale = f_normalized / z
        # Central projection through the origin
        Δa = - x * mapping_scale
        Δb = - y * mapping_scale

#        print(ρ * 180 / π, δ * 180 / π, ρ_h * 180 / π, final_rotation * 90)

        # Finally, generate a rotation matrix in backward (lookup) direction
        A11, A12, A13, \
        A21, A22, A23, \
        A31, A32, A33 = generate_rotation_matrix(- ρ_h, - δ, - ρ, d)

        if final_rotation:
            # This matrix is: R_y(- ρ) · Rₓ(- δ) · R_y(- ρₕ) ·
            # R_z(final_rotation·π/2).  final_rotation is eigher -1 or +1.
            A11, A12, A13 = final_rotation * A12, - final_rotation * A11, A13
            A21, A22, A23 = final_rotation * A22, - final_rotation * A21, A23
            A31, A32, A33 = final_rotation * A32, - final_rotation * A31, A33
            Δa, Δb = final_rotation * Δb, - final_rotation * Δa

        # The occurances of mapping_scale here avoid an additional
        # multiplication in the inner loop of perspective_correction_callback.
        self.callback_data = A11 * mapping_scale, A12 * mapping_scale, A13, \
                             A21 * mapping_scale, A22 * mapping_scale, A23, \
                             A31 * mapping_scale, A32 * mapping_scale, A33, \
                             f_normalized, Δa / mapping_scale, Δb / mapping_scale
        return True

    @staticmethod
    def perspective_correction_callback(data, iocoord, offset, count):
        A11, A12, A13, \
        A21, A22, A23, \
        A31, A32, A33 = data[:9]
        f_normalized = data[9]
        Δa, Δb = data[10:12]
        for i in range(count):
            x = iocoord[offset + i * 2] - Δa
            y = iocoord[offset + i * 2 + 1] - Δb
            z_ = A31 * x + A32 * y + A33 * f_normalized
            if z_ > 0:
                x_ = A11 * x + A12 * y + A13 * f_normalized
                y_ = A21 * x + A22 * y + A23 * f_normalized
                # Central projection through the origin
                stretch_factor = f_normalized / z_
                iocoord[offset + i * 2] = x_ * stretch_factor
                iocoord[offset + i * 2 + 1] = y_ * stretch_factor
            else:
                iocoord[offset + i * 2] = iocoord[offset + i * 2 + 1] = float("nan")

    @staticmethod
    def scaling_callback(data, iocoord, offset, count):
        scaling = data[0]
        for i in range(count):
            iocoord[offset + i * 2] *= scaling
            iocoord[offset + i * 2 + 1] *= scaling

    def apply_perspective_correction(self, xu, yu, width, height, res):
        y = yu * self.norm_scale - self.center_y
        offset = 0   # In the C code, res itself is moved forward
        for j in range(height):
            x = xu * self.norm_scale - self.center_x
            for i in range(width):
                res[offset + i * 2] = x
                res[offset + i * 2 + 1] = y
                x += self.norm_scale
            #self.scaling_callback([self.scaling_factor], res, offset, width)
            self.perspective_correction_callback(self.callback_data, res, offset, width)
            for i in range(width):
                res[offset] = (res[offset] + self.center_x) * self.norm_unscale
                res[offset + 1] = (res[offset + 1] + self.center_y) * self.norm_unscale
                offset += 2
            y += self.norm_scale

def process_image(filepath, d, index):
    image_data, width, height = read_image_file(filepath)
    f, crop_factor, shrinking, x, y = json.load(open(os.path.splitext(filepath)[0] + ".json"))
    modifier = Modifier(crop_factor, width, height)
    modifier.initialize(f)
    if modifier.initialize_perspective_correction(x, y, d):
        res = array.array("f", width * height * 2 * [0])
        modifier.scaling_factor = shrinking
        modifier.apply_perspective_correction(0, 0, width, height, res)
        destination_image_data = array.array("B", width * height * 3 * [0])
        for y in range(height):
            for x in range(width):
                offset = (width * y + x) * 2
                try:
                    a = int(round(res[offset]))
                    b = int(round(res[offset + 1]))
                except ValueError:
                    # NaN
                    continue
                if 0 <= a < width and 0 <= b < height:
                    destination_offset = (width * y + x) * 3
                    image_offset = (width * b + a) * 3
                    destination_image_data[destination_offset] = image_data[image_offset]
                    destination_image_data[destination_offset + 1] = image_data[image_offset + 1]
                    destination_image_data[destination_offset + 2] = image_data[image_offset + 2]
    else:
        destination_image_data = image_data
    basename, extension = os.path.splitext(filepath)
    write_image_file(destination_image_data, width, height, "{}_{:03}_{}{}".format(basename, index, d, extension))

pool = multiprocessing.Pool()
results = set()
for filename in sys.argv[1:]:
    results.add(pool.apply_async(process_image, (filename, 0, 0)))
    # for i, d in enumerate(numpy.arange(-1, 1.02, 0.02)):
    #     results.add(pool.apply_async(process_image, (filename, d, i)))
pool.close()
pool.join()
for result in results:
    # Just to get tracebacks if necessary
    result.get()