如何在不对扭曲或光线敏感的情况下从照片中定位和提取迷宫

How to locate and extract a maze from a photo without being sensitive to warp or light

我一直在问几个关于从SOF上的照片中定位和提取迷宫的问题, 但是 none 我得到的答案是跨不同照片的,甚至不是跨 4 张测试照片。 每次当我调整代码使其适用于一张照片时,由于 corners/parts 或光线等原因,其余照片都会失败。我觉得我需要找到一种对扭曲图像不敏感的方法以及不同强度的光或不同颜色的迷宫墙(迷宫内的线条)。

我一直在尝试让它工作 3 周,但没有成功。在我放弃这个想法之前,我想问一下是否可以只使用图像处理而不使用人工智能来从照片中定位和提取迷宫?如果是的话,你能告诉我怎么做吗?

代码和图片如下:

import cv2    
import numpy as np

from skimage.exposure import rescale_intensity
from skimage.feature import corner_harris, corner_subpix, corner_peaks
from skimage.io import imread, imshow
from skimage.morphology import reconstruction, binary_erosion, skeletonize, dilation, square
from skimage.morphology.convex_hull import convex_hull_image
from skimage.util import invert
from skmpe import parameters, mpe, OdeSolverMethod

maze=cv2.imread("simple.jpg",0)
ret, maze=cv2.threshold(maze,100,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
h, w = maze.shape
seed = np.zeros_like(maze)
size = 40
hh = h // 2
hw = w // 2
seed[hh-size:hh+size, hw-size:hw+size] = maze[hh-size:hh+size, hw-size:hw+size]
rec1 = reconstruction(seed, maze)
seed2 = np.ones_like(rec1)
ker = np.ones((2,2))
rec1_thicker = cv2.erode(rec1, ker, iterations=1)    

seed2 = seed2 * 255
size2 = 240
lhh = hh - size2
hhh = hh + size2
lhw = hw - size2
hhw = hw + size2
seed2[lhh:hhh, lhw:hhw]=rec1_thicker[lhh:hhh, lhw:hhw]
rec2 = reconstruction(seed2,rec1_thicker, method='erosion')
rec2_inv = invert(rec2 / 255.)
hull = convex_hull_image(rec2_inv)
hull_eroded = binary_erosion(hull, selem=np.ones((5,5)))
coords = corner_peaks(corner_harris(hull_eroded), min_distance=5, threshold_rel=0.02)

import matplotlib.pyplot as plt
fig, axe = plt.subplots(1,4,figsize=(16,8))
axe[0].imshow(maze, 'gray')
axe[1].imshow(rec1, 'gray')
axe[2].imshow(rec2, 'gray')
axe[3].imshow(hull, 'gray')

这是输出图像:

正如你所看到的,第 3 个图是提取的迷宫,这段代码运行良好,但仅针对这两张照片,在本例中它们是 simple.jpg 和 'maze.jpg'.. .

如果你试过 `hard.jpg' 那么它看起来像这样:

并且它在 middle.jpg 上也失败了:

我已经将所有 4 张测试照片上传到 OneDrive 以供有兴趣的人试用。


更新 1

我绘制了所有面具以查看每个面具的作用。

mask = (sat < 16).astype(np.uint8) * 255
mask1 = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT, (31, 31)))
mask2 = cv2.copyMakeBorder(mask1, 10, 10, 10, 10, cv2.BORDER_CONSTANT, 0)
mask3 = cv2.morphologyEx(mask2, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT, (201, 201)))

plt.figure(figsize=(18, 8))
plt.subplot(1, 6, 1), plt.imshow(maze[..., ::-1]), plt.title('White balanced image')
plt.subplot(1, 6, 2), plt.imshow(sat, 'gray'), plt.title('Saturation channel')
plt.subplot(1, 6, 3), plt.imshow(mask, 'gray'), plt.title('sat < 16')
plt.subplot(1, 6, 4), plt.imshow(mask1, 'gray'), plt.title('closed')
plt.subplot(1, 6, 5), plt.imshow(mask2, 'gray'), plt.title('border')
plt.subplot(1, 6, 6), plt.imshow(mask3, 'gray'), plt.title('rect')
plt.tight_layout(), plt.show()

所以在我看来,在整个图像周围制作边框的 mask2 是没有必要的。 为什么我们需要 mask2?

我还发现 mask2 和 mask3 的分辨率在每个维度上都大了 2 个像素:

maze.shape, sat.shape, mask.shape, mask1.shape, mask2.shape, mask3.shape
((4000, 1840, 3),
 (4000, 1840),
 (4000, 1840),
 (4000, 1840),
 (4002, 1842),
 (4002, 1842))

为什么?

你真的很想得到这些 6.9 美元的菜肴,他?


对于给定的四张图像,我可以使用以下工作流程获得非常好的结果:

  • White balance the input image to enforce nearly white paper. I took 使用图像中心的小块,从该块中,我获取了具有最高 R + G + B 值的像素 - 假设迷宫始终位于图像的中心,并且在小补丁中有一些来自白纸的像素。
  • 使用 HSV color space 中的饱和度通道遮盖白纸,并(粗略地)从图像中裁剪该部分。
  • 在该作物上,执行现有的 reconstruction 方法。

结果如下:

maze.jpg

simple.jpg

middle.jpg

hard.jpg

这是完整代码:

import cv2
import matplotlib.pyplot as plt
import numpy as np
from skimage.morphology import binary_erosion, reconstruction
from skimage.morphology.convex_hull import convex_hull_image


# 
def simple_white_balancing(image):
    h, w = image.shape[:2]
    patch = image[int(h/2-20):int(h/2+20), int(w/2-20):int(w/2+20)]
    x, y = cv2.minMaxLoc(np.sum(patch.astype(int), axis=2))[3]
    white_b, white_g, white_r = patch[y, x, ...].astype(float)
    lum = (white_r + white_g + white_b) / 3
    image[..., 0] = image[..., 0] * lum / white_b
    image[..., 1] = image[..., 1] * lum / white_g
    image[..., 2] = image[..., 2] * lum / white_r
    return image


for file in ['maze.jpg', 'simple.jpg', 'middle.jpg', 'hard.jpg']:

    # Read image
    img = cv2.imread(file)

    # Initialize hull image
    h, w = img.shape[:2]
    hull = np.zeros((h, w), np.uint8)

    # Simple white balancing, cf. 
    img = cv2.GaussianBlur(img, (11, 11), None)
    maze = simple_white_balancing(img.copy())

    # Mask low saturation area
    sat = cv2.cvtColor(maze, cv2.COLOR_BGR2HSV)[..., 1]
    mask = (sat < 16).astype(np.uint8) * 255
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE,
                            cv2.getStructuringElement(cv2.MORPH_RECT,
                                                      (31, 31)))
    mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, 0)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN,
                            cv2.getStructuringElement(cv2.MORPH_RECT,
                                                      (201, 201)))

    # Find largest contour in mask (w.r.t. the OpenCV version)
    cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnt = max(cnts, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(cnt)

    # Crop to low saturation area
    cut = cv2.cvtColor(maze[y+1:y+1+h, x+1:x+1+w], cv2.COLOR_BGR2GRAY)

    # Use existing reconstruction approach on low saturation area
    h_c, w_c = cut.shape
    seed = np.zeros_like(cut)
    size = 40
    hh = h_c // 2
    hw = w_c // 2
    seed[hh-size:hh+size, hw-size:hw+size] = cut[hh-size:hh+size, hw-size:hw+size]
    rec = reconstruction(seed, cut)
    rec = cv2.erode(rec, np.ones((2, 2)), iterations=1)

    seed = np.ones_like(rec) * 255
    size = 240
    seed[hh-size:hh+size, hw-size:hw+size] = rec[hh-size:hh+size, hw-size:hw+size]
    rec = reconstruction(seed, rec, method='erosion').astype(np.uint8)
    rec = cv2.threshold(rec, np.quantile(rec, 0.25), 255, cv2.THRESH_BINARY_INV)[1]

    hull[y+1:y+1+h, x+1:x+1+w] = convex_hull_image(rec) * 255

    plt.figure(figsize=(18, 8))
    plt.subplot(1, 5, 1), plt.imshow(img[..., ::-1]), plt.title('Original image')
    plt.subplot(1, 5, 2), plt.imshow(maze[..., ::-1]), plt.title('White balanced image')
    plt.subplot(1, 5, 3), plt.imshow(sat, 'gray'), plt.title('Saturation channel')
    plt.subplot(1, 5, 4), plt.imshow(hull, 'gray'), plt.title('Obtained convex hull')
    plt.subplot(1, 5, 5), plt.imshow(cv2.bitwise_and(img, img, mask=hull)[..., ::-1])
    plt.tight_layout(), plt.savefig(file + 'output.png'), plt.show()

当然,不能保证这种方法对接下来的大约五张图像有效,您正在处理。一般来说,尝试标准化图像采集(旋转、照明)以获得更一致的图像。否则,您最终将需要一些机器学习方法...

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.16299-SP0
Python:        3.9.1
PyCharm:       2021.1.1
Matplotlib:    3.4.1
NumPy:         1.20.2
OpenCV:        4.5.1
scikit-image:  0.18.1
----------------------------------------