%%capture
!pip install kornia
!pip install kornia-rs
Image Alignment by Homography Optimization
import io
import requests
def download_image(url: str, filename: str = "") -> str:
= url.split("/")[-1] if len(filename) == 0 else filename
filename # Download
= io.BytesIO(requests.get(url).content)
bytesio # Save file
with open(filename, "wb") as outfile:
outfile.write(bytesio.getbuffer())
return filename
"https://github.com/kornia/data/raw/main/homography/H1to2p")
download_image("https://github.com/kornia/data/raw/main/homography/img1.ppm")
download_image("https://github.com/kornia/data/raw/main/homography/img2.ppm") download_image(
'img2.ppm'
Import needed libraries
import os
from typing import List
import kornia as K
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from kornia.geometry import resize
# computer vision libs :D
Define the hyper parameters to perform the online optimisation
float = 1e-3 # the gradient optimisation update step
learning_rate: int = 100 # the number of iterations until convergence
num_iterations: int = 6 # the total number of image pyramid levels
num_levels: float = 1e-8 # the optimisation error tolerance
error_tol:
int = 100 # print log every N iterations
log_interval: = K.utils.get_cuda_or_mps_device_if_available()
device print("Using ", device)
Using cpu
Define a container to hold the homography as a nn.Parameter
so that cen be used by the autograd within the torch.optim
framework.
We initialize the homography with the identity transformation.
class MyHomography(nn.Module):
def __init__(self) -> None:
super().__init__()
self.homography = nn.Parameter(torch.Tensor(3, 3))
self.reset_parameters()
def reset_parameters(self):
self.homography)
torch.nn.init.eye_(
def forward(self) -> torch.Tensor:
return torch.unsqueeze(self.homography, dim=0) # 1x3x3
Read the images and the ground truth homograpy to convert to tensor. In addition, we normalize the homography in order to smooth the gradiens during the optimisation process.
= K.io.load_image("img1.ppm", K.io.ImageLoadType.RGB32, device=device)[None, ...]
img_src: torch.Tensor = K.io.load_image("img2.ppm", K.io.ImageLoadType.RGB32, device=device)[None, ...]
img_dst: torch.Tensor print(img_src.shape)
print(img_dst.shape)
= np.loadtxt("H1to2p")
dst_homo_src_gt = torch.from_numpy(dst_homo_src_gt)[None].float().to(device)
dst_homo_src_gt print(dst_homo_src_gt.shape)
print(dst_homo_src_gt)
= img_src.shape[-2:]
height, width
# warp image in normalized coordinates
= K.geometry.normal_transform_pixel(height, width, device=device)
normal_transform_pixel: torch.Tensor
= normal_transform_pixel @ dst_homo_src_gt @ torch.inverse(normal_transform_pixel)
dst_homo_src_gt_norm: torch.Tensor
= K.geometry.homography_warp(img_src, torch.inverse(dst_homo_src_gt_norm), (height, width))
img_src_to_dst_gt: torch.Tensor
= K.utils.tensor_to_image(K.color.bgr_to_rgb(img_src))
img_src_vis: np.ndarray = K.utils.tensor_to_image(K.color.bgr_to_rgb(img_dst))
img_dst_vis: np.ndarray = K.utils.tensor_to_image(K.color.bgr_to_rgb(img_src_to_dst_gt)) img_src_to_dst_gt_vis: np.ndarray
torch.Size([1, 3, 640, 800])
torch.Size([1, 3, 640, 800])
torch.Size([1, 3, 3])
tensor([[[ 8.7977e-01, 3.1245e-01, -3.9431e+01],
[-1.8389e-01, 9.3847e-01, 1.5316e+02],
[ 1.9641e-04, -1.6015e-05, 1.0000e+00]]])
Show the source image, the target and the source image warped to the target using the ground truth homography transformation.
= plt.subplots(1, 3, sharey=True)
fig, (ax1, ax2, ax3) 15)
fig.set_figheight(15)
fig.set_figwidth(
ax1.imshow(img_src_vis)"Source image")
ax1.set_title(
ax2.imshow(img_dst_vis)"Destination image")
ax2.set_title(
ax3.imshow(img_src_to_dst_gt_vis)"Source to Destination image")
ax3.set_title( plt.show()
Initialize the homography warper and pass the parameters to the torch.optim.Adam
optimizer to perform an online gradient descent optimisation to approximate the mapping transformation between the two images.
# create homography parameters
= MyHomography().to(device)
dst_homo_src
# create optimizer
= optim.Adam(dst_homo_src.parameters(), lr=learning_rate)
optimizer
# send data to device
= img_src.to(device), img_dst.to(device) img_src, img_dst
In order to perform the online optimisation, we will apply a know fine-to-coarse strategy. For this reason, we precompute a gaussian pyramid from each image with a certain number of levels.
### compute Gaussian Pyramid
def get_gaussian_pyramid(img: torch.Tensor, num_levels: int) -> List[torch.Tensor]:
r"""Utility function to compute a gaussian pyramid."""
= []
pyramid
pyramid.append(img)for _ in range(num_levels - 1):
= pyramid[-1]
img_curr = K.geometry.pyrdown(img_curr)
img_down
pyramid.append(img_down)return pyramid
# compute the gaussian pyramids
= get_gaussian_pyramid(img_src, num_levels)
img_src_pyr: List[torch.Tensor] = get_gaussian_pyramid(img_dst, num_levels) img_dst_pyr: List[torch.Tensor]
Main optimization loop
Define the loss function to minimize the photometric error at each pyramid level:
$ L = |I_{ref} - (I_{dst}, H_{ref}^{dst}))|$
def compute_scale_loss(
img_src: torch.Tensor,
img_dst: torch.Tensor,
dst_homo_src: nn.Module,
optimizer: torch.optim,int,
num_iterations: float,
error_tol: -> torch.Tensor:
) assert len(img_src.shape) == len(img_dst.shape), (img_src.shape, img_dst.shape)
# init loop parameters
= torch.tensor(error_tol)
loss_tol = torch.finfo(img_src.dtype).max
loss_prev
for i in range(num_iterations):
# create homography warper
= torch.inverse(dst_homo_src)
src_homo_dst: torch.Tensor
= img_src.shape[-2:]
_height, _width = K.geometry.HomographyWarper(_height, _width)
warper = warper(img_src, src_homo_dst)
img_src_to_dst
# compute and mask loss
= F.l1_loss(img_src_to_dst, img_dst, reduction="none") # 1x3xHxW
loss
= warper(torch.ones_like(img_src), src_homo_dst)
ones = loss.masked_select(ones > 0.9).mean()
loss
# compute gradient and update optimizer parameters
optimizer.zero_grad()
loss.backward() optimizer.step()
Run the main body loop to warp the images from each pyramid level and evaluate the loss to perform gradient update.
# pyramid loop
for iter_idx in range(num_levels):
# get current pyramid data
int = (num_levels - 1) - iter_idx
scale: = img_src_pyr[scale]
img_src = img_dst_pyr[scale]
img_dst
# compute scale loss
compute_scale_loss(img_src, img_dst, dst_homo_src(), optimizer, num_iterations, error_tol)
print(f"Optimization iteration: {iter_idx}/{num_levels}")
# merge warped and target image for visualization
= img_src.shape[-2:]
h, w = K.geometry.HomographyWarper(h, w)
warper = warper(img_src, torch.inverse(dst_homo_src()))
img_src_to_dst = warper(img_src, torch.inverse(dst_homo_src_gt_norm))
img_src_to_dst_gt
# compute the reprojection error
= F.l1_loss(img_src_to_dst, img_src_to_dst_gt, reduction="none")
error print(f"Reprojection error: {error.mean()}")
# show data
= K.utils.tensor_to_image(K.color.bgr_to_rgb(img_src))
img_src_vis = K.utils.tensor_to_image(K.color.bgr_to_rgb(img_dst))
img_dst_vis = 0.65 * img_src_to_dst + 0.35 * img_dst
img_src_to_dst_merge = K.utils.tensor_to_image(K.color.bgr_to_rgb(img_src_to_dst_merge))
img_src_to_dst_vis = K.utils.tensor_to_image(K.color.bgr_to_rgb(img_src_to_dst_gt))
img_src_to_dst_gt_vis
= error.mean(dim=1, keepdim=True)
error_sum = K.utils.tensor_to_image(error_sum)
error_vis
# show the original images at each scale level, the result of warping using
# the homography at moment, and the estimated error against the GT homography.
%matplotlib inline
= plt.subplots(1, 5, sharey=False)
fig, (ax1, ax2, ax3, ax4, ax5) 15)
fig.set_figheight(15)
fig.set_figwidth(
ax1.imshow(img_src_vis)"Source image")
ax1.set_title(
ax2.imshow(img_dst_vis)"Destination image")
ax2.set_title(
ax3.imshow(img_src_to_dst_vis)"Source to Destination image")
ax3.set_title(
ax4.imshow(img_src_to_dst_gt_vis)"Source to Destination image GT")
ax4.set_title(
="gray", vmin=0, vmax=1)
ax5.imshow(error_vis, cmap"Error")
ax5.set_title( plt.show()
Optimization iteration: 0/6
Reprojection error: 0.17220830917358398
Optimization iteration: 1/6
Reprojection error: 0.11565501242876053
Optimization iteration: 2/6
Reprojection error: 0.018368173390626907
Optimization iteration: 3/6
Reprojection error: 0.013175368309020996
Optimization iteration: 4/6
Reprojection error: 0.008068887516856194
Optimization iteration: 5/6
Reprojection error: 0.005315570626407862