文字全景拼接
Panorama stitching for text
我正在寻找一个好的文本全景图拼接库。我试过了OpenCV and OpenPano。它们都适用于普通照片,但不适用于文本。例如我需要拼接以下3张图片:
图像之间有大约 45% 的重叠。
如果有一个选项可以让提到的库之一在文本图像上很好地工作,而不是寻找另一个库,那就太好了。
- 我需要库在 linux arm 上工作。
OpenPano 拼接文本失败,因为它无法检索 足够的特征点 (或关键点)来进行拼接过程。
文本拼接不需要对旋转具有鲁棒性但只对翻译具有鲁棒性的匹配方法。 OpenCV conveniently offers such a function. It is called : Template Matching.
我将开发的解决方案基于此 OpenCV 的功能。
管道
我现在将解释我的解决方案的主要步骤(有关详细信息,请查看下面提供的代码)。
匹配过程
为了匹配两个连续的图像(在matchImages
函数中完成,见下面的代码):
- 我们创建了一个 模板 图像,如下所示:
这一步在我的代码中由函数 genTemplate
.
完成
- 我们向第二张图片添加黑边(我们要在其中找到 模板 )。如果输入图像中的文本 未对齐 (尽管这些示例图像就是这种情况),则此步骤是必要的。这是边距处理后图像的样子。如您所见,只需要图像下方和上方的边距:
模板图像理论上可以在这个边缘图像的任何地方找到。此过程在 addBlackMargins
函数中完成。
- 我们在 模板 图像和我们想要找到它的图像上应用 canny filter(在
Mat2Edges
函数中完成)。这将为 匹配过程 添加 鲁棒性 。这是一个例子:
- 我们使用
matchTemplate
and we retrieve the best match location with the minMaxLoc
函数将 模板 与图像匹配。
正在计算最终图像大小
此步骤包括计算 最终矩阵 的 大小 ,我们将在其中将所有图像拼接在一起。如果所有输入图像的高度不同,则尤其需要这样做。
此步骤在 calcFinalImgSize
函数内完成。我不会在这里详细介绍,因为尽管它看起来有点复杂(至少对我而言),但这只是简单的数学运算(加法、减法、乘法)。想看懂公式就拿笔和纸吧
拼接过程
一旦我们有了每个输入图像的匹配位置,我们只需要做简单的数学运算即可复制 在最终图像的正确位置输入图像。同样,我建议您检查代码以了解实现细节(请参阅 stitchImages
函数)。
结果
这是您输入图像的结果:
如您所见,结果不是“像素完美”,但对于 OCR.
应该足够好了
这是输入图像不同高度的另一个结果:
代码 (Python)
我的程序是用Python写的,使用了cv2
(OpenCV)和numpy
模块。然而,它可以(轻松地)移植到其他语言中,例如 C++、Java 和 C#.
import numpy as np
import cv2
def genTemplate(img):
global H_templ_ratio
# we get the image's width and height
h, w = img.shape[:2]
# we compute the template's bounds
x1 = int(float(w)*(1-H_templ_ratio))
y1 = 0
x2 = w
y2 = h
return(img[y1:y2,x1:x2]) # and crop the input image
def mat2Edges(img): # applies a Canny filter to get the edges
edged = cv2.Canny(img, 100, 200)
return(edged)
def addBlackMargins(img, top, bottom, left, right): # top, bottom, left, right: margins width in pixels
h, w = img.shape[:2]
result = np.zeros((h+top+bottom, w+left+right, 3), np.uint8)
result[top:top+h,left:left+w] = img
return(result)
# return the y_offset of the first image to stitch and the final image size needed
def calcFinalImgSize(imgs, loc):
global V_templ_ratio, H_templ_ratio
y_offset = 0
max_margin_top = 0; max_margin_bottom = 0 # maximum margins that will be needed above and bellow the first image in order to stitch all the images into one mat
current_margin_top = 0; current_margin_bottom = 0
h_init, w_init = imgs[0].shape[:2]
w_final = w_init
for i in range(0,len(loc)):
h, w = imgs[i].shape[:2]
h2, w2 = imgs[i+1].shape[:2]
# we compute the max top/bottom margins that will be needed (relatively to the first input image) in order to stitch all the images
current_margin_top += loc[i][1] # here, we assume that the template top-left corner Y-coordinate is 0 (relatively to its original image)
current_margin_bottom += (h2 - loc[i][1]) - h
if(current_margin_top > max_margin_top): max_margin_top = current_margin_top
if(current_margin_bottom > max_margin_bottom): max_margin_bottom = current_margin_bottom
# we compute the width needed for the final result
x_templ = int(float(w)*H_templ_ratio) # x-coordinate of the template relatively to its original image
w_final += (w2 - x_templ - loc[i][0]) # width needed to stitch all the images into one mat
h_final = h_init + max_margin_top + max_margin_bottom
return (max_margin_top, h_final, w_final)
# match each input image with its following image (1->2, 2->3)
def matchImages(imgs, templates_loc):
for i in range(0,len(imgs)-1):
template = genTemplate(imgs[i])
template = mat2Edges(template)
h_templ, w_templ = template.shape[:2]
# Apply template Matching
margin_top = margin_bottom = h_templ; margin_left = margin_right = 0
img = addBlackMargins(imgs[i+1],margin_top, margin_bottom, margin_left, margin_right) # we need to enlarge the input image prior to call matchTemplate (template needs to be strictly smaller than the input image)
img = mat2Edges(img)
res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF) # matching function
_, _, _, templ_pos = cv2.minMaxLoc(res) # minMaxLoc gets the best match position
# as we added margins to the input image we need to subtract the margins width to get the template position relatively to the initial input image (without the black margins)
rectified_templ_pos = (templ_pos[0]-margin_left, templ_pos[1]-margin_top)
templates_loc.append(rectified_templ_pos)
print("max_loc", rectified_templ_pos)
def stitchImages(imgs, templates_loc):
y_offset, h_final, w_final = calcFinalImgSize(imgs, templates_loc) # we calculate the "surface" needed to stitch all the images into one mat (and y_offset, the Y offset of the first image to be stitched)
result = np.zeros((h_final, w_final, 3), np.uint8)
#initial stitch
h_init, w_init = imgs[0].shape[:2]
result[y_offset:y_offset+h_init, 0:w_init] = imgs[0]
origin = (y_offset, 0) # top-left corner of the last stitched image (y,x)
# stitching loop
for j in range(0,len(templates_loc)):
h, w = imgs[j].shape[:2]
h2, w2 = imgs[j+1].shape[:2]
# we compute the coordinates where to stitch imgs[j+1]
y1 = origin[0] - templates_loc[j][1]
y2 = origin[0] - templates_loc[j][1] + h2
x_templ = int(float(w)*(1-H_templ_ratio)) # x-coordinate of the template relatively to its original image's right side
x1 = origin[1] + x_templ - templates_loc[j][0]
x2 = origin[1] + x_templ - templates_loc[j][0] + w2
result[y1:y2, x1:x2] = imgs[j+1] # we copy the input image into the result mat
origin = (y1,x1) # we update the origin point with the last stitched image
return(result)
if __name__ == '__main__':
# input images
part1 = cv2.imread('part1.jpg')
part2 = cv2.imread('part2.jpg')
part3 = cv2.imread('part3.jpg')
imgs = [part1, part2, part3]
H_templ_ratio = 0.45 # H_templ_ratio: horizontal ratio of the input that we will keep to create a template
templates_loc = [] # templates location
matchImages(imgs, templates_loc)
result = stitchImages(imgs, templates_loc)
cv2.imshow("result", result)
OpenCV 3 有一个 Stitcher class,它可以对文本和照片进行拼接。
import cv2
imageFiles = [YOUR IMAGE FILE NAMES]
images = []
for filename in imagefiles:
img = cv2.imread(filename)
images.append(img)</p>
<p>stitcher = cv2.createStitcher()</p>
<p>status, result = stitcher.stitch(images)
</pre>
我用你的图片得到了这个结果。
我正在寻找一个好的文本全景图拼接库。我试过了OpenCV and OpenPano。它们都适用于普通照片,但不适用于文本。例如我需要拼接以下3张图片:
图像之间有大约 45% 的重叠。
如果有一个选项可以让提到的库之一在文本图像上很好地工作,而不是寻找另一个库,那就太好了。
- 我需要库在 linux arm 上工作。
OpenPano 拼接文本失败,因为它无法检索 足够的特征点 (或关键点)来进行拼接过程。
文本拼接不需要对旋转具有鲁棒性但只对翻译具有鲁棒性的匹配方法。 OpenCV conveniently offers such a function. It is called : Template Matching.
我将开发的解决方案基于此 OpenCV 的功能。
管道
我现在将解释我的解决方案的主要步骤(有关详细信息,请查看下面提供的代码)。
匹配过程
为了匹配两个连续的图像(在matchImages
函数中完成,见下面的代码):
- 我们创建了一个 模板 图像,如下所示:
这一步在我的代码中由函数 genTemplate
.
- 我们向第二张图片添加黑边(我们要在其中找到 模板 )。如果输入图像中的文本 未对齐 (尽管这些示例图像就是这种情况),则此步骤是必要的。这是边距处理后图像的样子。如您所见,只需要图像下方和上方的边距:
模板图像理论上可以在这个边缘图像的任何地方找到。此过程在 addBlackMargins
函数中完成。
- 我们在 模板 图像和我们想要找到它的图像上应用 canny filter(在
Mat2Edges
函数中完成)。这将为 匹配过程 添加 鲁棒性 。这是一个例子:
- 我们使用
matchTemplate
and we retrieve the best match location with theminMaxLoc
函数将 模板 与图像匹配。
正在计算最终图像大小
此步骤包括计算 最终矩阵 的 大小 ,我们将在其中将所有图像拼接在一起。如果所有输入图像的高度不同,则尤其需要这样做。
此步骤在 calcFinalImgSize
函数内完成。我不会在这里详细介绍,因为尽管它看起来有点复杂(至少对我而言),但这只是简单的数学运算(加法、减法、乘法)。想看懂公式就拿笔和纸吧
拼接过程
一旦我们有了每个输入图像的匹配位置,我们只需要做简单的数学运算即可复制 在最终图像的正确位置输入图像。同样,我建议您检查代码以了解实现细节(请参阅 stitchImages
函数)。
结果
这是您输入图像的结果:
如您所见,结果不是“像素完美”,但对于 OCR.
应该足够好了这是输入图像不同高度的另一个结果:
代码 (Python)
我的程序是用Python写的,使用了cv2
(OpenCV)和numpy
模块。然而,它可以(轻松地)移植到其他语言中,例如 C++、Java 和 C#.
import numpy as np
import cv2
def genTemplate(img):
global H_templ_ratio
# we get the image's width and height
h, w = img.shape[:2]
# we compute the template's bounds
x1 = int(float(w)*(1-H_templ_ratio))
y1 = 0
x2 = w
y2 = h
return(img[y1:y2,x1:x2]) # and crop the input image
def mat2Edges(img): # applies a Canny filter to get the edges
edged = cv2.Canny(img, 100, 200)
return(edged)
def addBlackMargins(img, top, bottom, left, right): # top, bottom, left, right: margins width in pixels
h, w = img.shape[:2]
result = np.zeros((h+top+bottom, w+left+right, 3), np.uint8)
result[top:top+h,left:left+w] = img
return(result)
# return the y_offset of the first image to stitch and the final image size needed
def calcFinalImgSize(imgs, loc):
global V_templ_ratio, H_templ_ratio
y_offset = 0
max_margin_top = 0; max_margin_bottom = 0 # maximum margins that will be needed above and bellow the first image in order to stitch all the images into one mat
current_margin_top = 0; current_margin_bottom = 0
h_init, w_init = imgs[0].shape[:2]
w_final = w_init
for i in range(0,len(loc)):
h, w = imgs[i].shape[:2]
h2, w2 = imgs[i+1].shape[:2]
# we compute the max top/bottom margins that will be needed (relatively to the first input image) in order to stitch all the images
current_margin_top += loc[i][1] # here, we assume that the template top-left corner Y-coordinate is 0 (relatively to its original image)
current_margin_bottom += (h2 - loc[i][1]) - h
if(current_margin_top > max_margin_top): max_margin_top = current_margin_top
if(current_margin_bottom > max_margin_bottom): max_margin_bottom = current_margin_bottom
# we compute the width needed for the final result
x_templ = int(float(w)*H_templ_ratio) # x-coordinate of the template relatively to its original image
w_final += (w2 - x_templ - loc[i][0]) # width needed to stitch all the images into one mat
h_final = h_init + max_margin_top + max_margin_bottom
return (max_margin_top, h_final, w_final)
# match each input image with its following image (1->2, 2->3)
def matchImages(imgs, templates_loc):
for i in range(0,len(imgs)-1):
template = genTemplate(imgs[i])
template = mat2Edges(template)
h_templ, w_templ = template.shape[:2]
# Apply template Matching
margin_top = margin_bottom = h_templ; margin_left = margin_right = 0
img = addBlackMargins(imgs[i+1],margin_top, margin_bottom, margin_left, margin_right) # we need to enlarge the input image prior to call matchTemplate (template needs to be strictly smaller than the input image)
img = mat2Edges(img)
res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF) # matching function
_, _, _, templ_pos = cv2.minMaxLoc(res) # minMaxLoc gets the best match position
# as we added margins to the input image we need to subtract the margins width to get the template position relatively to the initial input image (without the black margins)
rectified_templ_pos = (templ_pos[0]-margin_left, templ_pos[1]-margin_top)
templates_loc.append(rectified_templ_pos)
print("max_loc", rectified_templ_pos)
def stitchImages(imgs, templates_loc):
y_offset, h_final, w_final = calcFinalImgSize(imgs, templates_loc) # we calculate the "surface" needed to stitch all the images into one mat (and y_offset, the Y offset of the first image to be stitched)
result = np.zeros((h_final, w_final, 3), np.uint8)
#initial stitch
h_init, w_init = imgs[0].shape[:2]
result[y_offset:y_offset+h_init, 0:w_init] = imgs[0]
origin = (y_offset, 0) # top-left corner of the last stitched image (y,x)
# stitching loop
for j in range(0,len(templates_loc)):
h, w = imgs[j].shape[:2]
h2, w2 = imgs[j+1].shape[:2]
# we compute the coordinates where to stitch imgs[j+1]
y1 = origin[0] - templates_loc[j][1]
y2 = origin[0] - templates_loc[j][1] + h2
x_templ = int(float(w)*(1-H_templ_ratio)) # x-coordinate of the template relatively to its original image's right side
x1 = origin[1] + x_templ - templates_loc[j][0]
x2 = origin[1] + x_templ - templates_loc[j][0] + w2
result[y1:y2, x1:x2] = imgs[j+1] # we copy the input image into the result mat
origin = (y1,x1) # we update the origin point with the last stitched image
return(result)
if __name__ == '__main__':
# input images
part1 = cv2.imread('part1.jpg')
part2 = cv2.imread('part2.jpg')
part3 = cv2.imread('part3.jpg')
imgs = [part1, part2, part3]
H_templ_ratio = 0.45 # H_templ_ratio: horizontal ratio of the input that we will keep to create a template
templates_loc = [] # templates location
matchImages(imgs, templates_loc)
result = stitchImages(imgs, templates_loc)
cv2.imshow("result", result)
OpenCV 3 有一个 Stitcher class,它可以对文本和照片进行拼接。
import cv2 imageFiles = [YOUR IMAGE FILE NAMES] images = [] for filename in imagefiles: img = cv2.imread(filename) images.append(img)</p> <p>stitcher = cv2.createStitcher()</p> <p>status, result = stitcher.stitch(images) </pre>
我用你的图片得到了这个结果。