如何为这些 'simple' 图像编写图像拼接软件？

Question

TLDR: 需要帮助尝试计算 2 个图形之间的重叠区域。

所以我正在尝试拼接这两张图片：

因为我知道我要拼接的图像肯定来自同一图像，所以我觉得我应该能够自己编写代码。使用像 OpenCV 这样的库对我来说有点矫枉过正。

我目前的想法是，我可以通过对每张图片执行以下步骤来简化此任务：

使用 PIL 加载图像
将图像转换为黑白（PIL 图像模式“L”）
[可选：通过肉眼检查将图像裁剪到重叠区域]
创建向量row_sum，它是每行的总和
[可选：log row_sum，以减少我们正在处理的值的大小]
剧情row_sum.

这将减少（潜在的）(3*2) 维问题，将 2D 图像上每个像素的 3 个 RGB 通道减少到 (1*2)-D 问题，其中黑白像素用于二维图像代替。然后，跨行求和将其简化为一维问题。

我用下面的代码来实现上面的：

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

class Stitcher():
    def combine_2(self, img1, img2):
        # thr1, thr2 = self.get_cropped_bw(img1, 115, img2, 80)
        thr1, thr2 = self.get_cropped_bw(img1, 0, img2, 0)
        
        row_sum1 = np.log(thr1.sum(1))
        row_sum2 = np.log(thr2.sum(1))
        
        self.plot_4x4(thr1, thr2, row_sum1, row_sum2)
    
    def get_cropped_bw(self, img1, img1_keep_from, img2, img2_keep_till):    
        im1 = Image.open(img1).convert("L")
        im2 = Image.open(img2).convert("L")
        
        data1 = (np.array(im1)[img1_keep_from:] 
                if img1_keep_from != 0 else np.array(im1))
        data2 = (np.array(im2)[:img2_keep_till] 
                if img2_keep_till != 0 else np.array(im2))
        
        return data1, data2
    
    def plot_4x4(self, thr1, thr2, row_sum1, row_sum2):
        fig, ax = plt.subplots(2, 2, sharey="row", constrained_layout=True)
        
        ax[0, 0].imshow(thr1, cmap="Greys")
        ax[0, 1].imshow(thr2, cmap="Greys")
        
        ax[1, 0].plot(row_sum1, "k.")
        ax[1, 1].plot(row_sum2, "r.")
        
        ax[1, 0].set(
            xlabel="Index Value",
            ylabel="Row Sum",
        )
        
        plt.show()


imgs = (r"combine\imgs\test_image_part_1.jpg",
        r"combine\imgs\test_image_part_2.jpg")

s = Stitcher()
s.combine_2(*imgs)

这给了我这张图：

（我添加了那些黄色框，以指示重叠区域。）

这是我卡住的地方。我想准确地找到：

第一个图像黄色框左侧的索引值和
第二张图片黄色框右侧的索引值。

我将重叠区域定义为第一个图的结尾 'matches' 第二个图的开始的最长范围。找重叠区域的方法，如果行和值不完全相同怎么办（如果一个是另一个按某种因子缩放怎么办）？

我觉得这可能是一个可以使用点积来找到 2 个图之间的相似性的问题？但是我想不出如何实现这个。

Answer 1

我从中获得了比预期更多的乐趣。我使用 opencv 编写了这个，但这只是为了加载和显示图像。其他一切都是用 numpy 完成的，所以将其转换为 PIL 应该不会太困难。

我正在使用强力匹配器。我还写了一个运行时间短得多的随机启动爬山程序，但我不能保证它会找到正确的答案，因为梯度 space 不平滑。我不会将它包含在我的代码中，因为它又长又简陋，但如果你真的需要时间效率，我可以稍后再添加它。

我在图像中添加了随机裁剪和一些椒盐噪声以测试稳健性。

暴力匹配器基于我们不知道两幅图像的哪一部分重叠的想法进行操作，因此我们需要从左到右、从上到下将较小的图像与较大的图像进行卷积。这意味着我们的搜索 space 是：

horizontal = small_width + big_width
vertical = small_height + big_height
area = horizontal * vertical

这会随着图像大小的增加而增长得非常快。我通过给它一个更大的重叠点来激励算法，但它会因为重叠区域的颜色差异而失去更多的点数。

以下是该程序执行过程中的一些图片

import cv2
import numpy as np
import random

# randomly snips edges
def randCrop(image, maxMargin):
    c = [random.randint(0,maxMargin) for a in range(4)];
    return image[c[0]:-c[1], c[2]:-c[3]];

# adds noise to image
def saltPepper(image, minNoise, maxNoise):
    h,w = image.shape;
    randNum = random.randint(minNoise, maxNoise);
    for a in range(randNum):
        x = random.randint(0, w-1);
        y = random.randint(0, h-1);
        image[y,x] = random.randint(0, 255);
    return image;

# evaluate layout
def getScore(one, two):
    # do raw subtraction
    left = one - two;
    right = two - one;
    sub = np.minimum(left, right);
    return np.count_nonzero(sub);

# return 2d random position within range
def randPos(img, big_shape):
    th,tw = big_shape;
    h,w = img.shape;
    x = random.randint(0, tw - w);
    y = random.randint(0, th - h);
    return [x,y];

# overlays small image onto big image
def overlay(small, big, pos):
    # unpack
    h,w = small.shape;
    x,y = pos;

    # copy and place
    copy = big.copy();
    copy[y:y+h, x:x+w] = small;
    return copy;

# calculates overlap region
def overlap(one, two, pos_one, pos_two):
    # unpack
    h1,w1 = one.shape;
    h2,w2 = two.shape;
    x1,y1 = pos_one;
    x2,y2 = pos_two;

    # set edges
    l1 = x1;
    l2 = x2;
    r1 = x1 + w1;
    r2 = x2 + w2;
    t1 = y1;
    t2 = y2;
    b1 = y1 + h1;
    b2 = y2 + h2;

    # go
    left = max(l1, l2);
    right = min(r1, r2);
    top = max(t1, t2);
    bottom = min(b1, b2);
    return [left, right, top, bottom];

# wrapper for overlay + getScore
def fullScore(one, two, pos_one, pos_two, big_empty):
    # check positions
    x,y = pos_two;
    h,w = two.shape;
    th,tw = big_empty.shape;
    if y+h > th or x+w > tw or x < 0 or y < 0:
        return -99999999;

    # overlay
    temp_one = overlay(one, big_empty, pos_one);
    temp_two = overlay(two, big_empty, pos_two);

    # get overlap
    l,r,t,b = overlap(one, two, pos_one, pos_two);
    temp_one = temp_one[t:b, l:r];
    temp_two = temp_two[t:b, l:r];

    # score
    diff = getScore(temp_one, temp_two);
    score = (r-l) * (b-t);
    score -= diff*2;
    return score;

# do brute force
def bruteForce(one, two):
    # calculate search space
    # unpack size
    h,w = one.shape;
    one_size = h*w;
    h,w = two.shape;
    two_size = h*w;

    # small and big
    if one_size < two_size:
        small = one;
        big = two;
    else:
        small = two;
        big = one;

    # unpack size
    sh, sw = small.shape;
    bh, bw = big.shape;
    total_width = bw + sw * 2;
    total_height = bh + sh * 2;

    # set up empty images
    empty = np.zeros((total_height, total_width), np.uint8);
    
    # set global best
    best_score = -999999;
    best_pos = None;

    # start scrolling
    ybound = total_height - sh;
    xbound = total_width - sw;
    for y in range(ybound):
        print("y: " + str(y) + " || " + str(empty.shape));
        for x in range(xbound):
            # get score
            score = fullScore(big, small, [sw,sh], [x,y], empty);

            # show
            # prog = overlay(big, empty, [sw,sh]);
            # prog = overlay(small, prog, [x,y]);
            # cv2.imshow("prog", prog);
            # cv2.waitKey(1);

            # compare
            if score > best_score:
                best_score = score;
                best_pos = [x,y];
                print("best_score: " + str(best_score));
    return best_pos, [sw,sh], small, big, empty;

# do a step of hill climber
def hillStep(one, two, best_pos, big_empty, step):
    # make a step
    new_pos = best_pos[1][:];
    new_pos[0] += step[0];
    new_pos[1] += step[1];

    # get score
    return fullScore(one, two, best_pos[0], new_pos, big_empty), new_pos;

# hunt around for good position
# let's do a random-start hillclimber
def randHill(one, two, shape):
    # set up empty images
    big_empty = np.zeros(shape, np.uint8);

    # set global best
    g_best_score = -999999;
    g_best_pos = None;

    # lets do 200 iterations
    iters = 200;
    for a in range(iters):
        # progress check
        print(str(a) + " of " + str(iters));

        # start with random position
        h,w = two.shape[:2];
        pos_one = [w,h];
        pos_two = randPos(two, shape);

        # get score
        best_score = fullScore(one, two, pos_one, pos_two, big_empty);
        best_pos = [pos_one, pos_two];

        # hill climb (only on second image)
        while True:
            # end condition: no step improves score
            end_flag = True;

            # 8-way
            for y in range(-1, 1+1):
                for x in range(-1, 1+1):
                    if x != 0 or y != 0:
                        # get score and update
                        score, new_pos = hillStep(one, two, best_pos, big_empty, [x,y]);
                        if score > best_score:
                            best_score = score;
                            best_pos[1] = new_pos[:];
                            end_flag = False;

            # end
            if end_flag:
                break;
            else:
                # show
                # prog = overlay(one, big_empty, best_pos[0]);
                # prog = overlay(two, prog, best_pos[1]);
                # cv2.imshow("prog", prog);
                # cv2.waitKey(1);
                pass;

        # check for new global best
        if best_score > g_best_score:
            g_best_score = best_score;
            g_best_pos = best_pos[:];
            print("top score: " + str(g_best_score));
    return g_best_score, g_best_pos;

# load both images
top = cv2.imread("top.jpg");
bottom = cv2.imread("bottom.jpg");
top = cv2.cvtColor(top, cv2.COLOR_BGR2GRAY);
bottom = cv2.cvtColor(bottom, cv2.COLOR_BGR2GRAY);

# randomly crop
top = randCrop(top, 20);
bottom = randCrop(bottom, 20);

# randomly add noise
saltPepper(top, 200, 1000);
saltPepper(bottom, 200, 1000);

# set up max image (assume no overlap whatsoever)
tw = 0;
th = 0;
h, w = top.shape;
tw += w;
th += h;
h, w = bottom.shape;
tw += w*2;
th += h*2;

# do random-start hill climb
_, best_pos = randHill(top, bottom, (th, tw));

# show
empty = np.zeros((th, tw), np.uint8);
pos1, pos2 = best_pos;
image = overlay(top, empty, pos1);
image = overlay(bottom, image, pos2);

# do brute force
# small_pos, big_pos, small, big, empty = bruteForce(top, bottom);
# image = overlay(big, empty, big_pos);
# image = overlay(small, image, small_pos);

# recolor overlap
h,w = empty.shape;
color = np.zeros((h,w,3), np.uint8);
l,r,t,b = overlap(top, bottom, pos1, pos2);
color[:,:,0] = image;
color[:,:,1] = image;
color[:,:,2] = image;
color[t:b, l:r, 0] += 100;

# show images
cv2.imshow("top", top);
cv2.imshow("bottom", bottom);
cv2.imshow("overlayed", image);
cv2.imshow("Color", color);
cv2.waitKey(0);

编辑：我添加了随机启动的爬山者

如何为这些 'simple' 图像编写图像拼接软件？

How to code up an image stitching software for these 'simple' images?

python

image-processing

graph

overlap