确定报纸文章中的栏数
Determining the number of columns within a newspaper article
假设下面的报纸文章需要分析栏的数量(解决方案应该是 3 个文本栏)。我尝试使用带有 python 的 cv2 库检索列数,并在 Whosebug 上找到以下建议:Detect number of rows and columns in table image with OpenCV
但是,由于该解决方案的 table 结构良好,可以很容易地提取列和行的数量。基于该解决方案,这是我想出的:
import numpy as np
from imutils import contours
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('example_newspaper_article.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 240, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and remove text inside cells
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 10000:
cv2.drawContours(thresh, [c], -1, (255, 255, 255), 30)
# Invert image
invert = thresh
offset, old_cY, first = 10, 0, True
visualize = cv2.cvtColor(invert, cv2.COLOR_GRAY2BGR)
# Find contours, sort from top-to-bottom and then sum up column/rows
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
# Find centroid
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# New row
if (abs(cY) - abs(old_cY)) > offset:
if first:
row, table = [], []
first = False
old_cY = cY
table.append(row)
row = []
# Cell in same row
if ((abs(cY) - abs(old_cY)) <= offset) or first:
row.append(1)
# Uncomment to visualize
#cv2.circle(visualize, (cX, cY), 10, (36, 255, 12), -1)
#cv2.imshow('visualize', visualize)
#cv2.waitKey(200)
print('Rows: {}'.format(len(table)))
print('Columns: {}'.format(len(table[1])))
cv2.imshow('invert', invert)
cv2.imshow('thresh', thresh)
cv2.waitKey()
我认为,增加 drawContours 方法的厚度参数会有所帮助,但不幸的是,这并不能解决问题。结果如下所示:
我想,在文本区域上绘制矩形会更有帮助吗?
有谁知道解决方案并可以帮助我吗?
提前致谢!
每当有这样的任务时,我都会沿着 y-axis 计算像素数,并尝试找出相邻列之间的(大)差异。那就是我的完整管道:
- 将图像转换为灰度;使用 Otsu 的逆二进制阈值在黑色背景上获得白色像素。
- 做一些形态学上的闭合,这里使用一个大的垂直线核来连接同一列中的所有像素。
- 计算所有白色像素;计算相邻列之间的绝对差异。
- 在该“信号”中查找峰值 – 手动或如此处所示,使用
scipy.signal.find_peaks
。峰值标识每个文本列的开始和结束,因此文本列的数量是峰值数量的一半。
这是包括一些可视化的完整代码:
import cv2
import matplotlib.pyplot as plt # Only for visualization output
import numpy as np
from scipy import signal
from skimage import io # Only for web grabbing images
# Read image from web (attention: RGB order here, scikit-image)
image = io.imread('https://i.stack.imgur.com/jbAeZ.png')
# Convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Inverse binary threshold by Otsu's
thr = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
# Morphological closing with large vertical line kernel
thr_mod = cv2.morphologyEx(thr, cv2.MORPH_CLOSE, np.ones((image.shape[0], 1)))
# Count white pixels along y-axis
y_count = np.sum(thr_mod / 255, 0)
# Calculate absolute difference between neighbouring x-axis values
y_count_diff = np.abs(np.diff(y_count))
# Find peaks in that "signal"
peaks = signal.find_peaks(y_count_diff, distance=50)[0]
# Number of columns is half the number of found peaks
n_cols = np.int(peaks.shape[0] / 2)
# Text output
print('Number of columns: ' + str(n_cols))
# Some visualization output
plt.figure(0)
plt.subplot(221)
plt.imshow(image)
plt.title('Original image')
plt.subplot(222)
plt.imshow(thr_mod, cmap='gray')
plt.title('Thresholded, morphlogically closed image')
plt.subplot(223)
plt.plot(y_count)
plt.plot(peaks, y_count[peaks], 'r.')
plt.title('Summed white pixels along y-axis')
plt.subplot(224)
plt.plot(y_count_diff)
plt.plot(peaks, y_count_diff[peaks], 'r.')
plt.title('Absolute difference in summed white pixels')
plt.tight_layout()
plt.show()
文本输出:
Number of columns: 3
可视化输出:
局限性:如果您的图像倾斜等,您可能会得到不好的结果。如果你有很多(大)图像穿过文本列,你也可能会得到不好的结果。一般来说,您需要调整给定实现中的细节以满足您的实际需求(没有给出更多示例)。
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.5
Matplotlib: 3.3.1
NumPy: 1.19.1
OpenCV: 4.4.0
SciPy: 1.5.2
----------------------------------------
您可以在搜索列之前以稍微不同的方式准备图像。例如,您可以先水平连接文本(使用一些形态学操作)。这将为您提供具有一定高度的轮廓(标题将垂直连接为每行一个轮廓,列中的文本将连接为每行一个轮廓)。然后搜索所有轮廓并在高于您设置的特定值(可以手动计算或设置)的轮廓上绘制边界矩形。之后用更大的内核(水平和垂直)再次执行形态学操作,这样如果它们靠得很近,您就可以将所有剩余的文本连接起来。
这是一个示例代码:
import cv2
import numpy as np
img = cv2.imread("columns.png") # read image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # grayscale transform
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1] # OTSU thresold
kernel = np.ones((5, 10), dtype=np.uint8) # kernel for first closing procedure (connect blobs in x direction)
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # closing
cv2.imwrite("closing1.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
heights = [] # all of contours heights
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
heights.append(h) # append height of one contours
boundary = np.mean(heights, axis=0) # mean of heights will serve as boundary but
# this will probably not be the case on other samples - you would need to make
# a function to determin this boundary or manualy set it
# iterate through contours
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
if h > boundary: # condition - contour must be higher than height boundary
cv2.rectangle(closing, (x, y), (x+w, y+h), (0, 0, 0), -1) # draw filled rectangle on the closing image
cv2.imwrite("closing1-filled.png", closing)
kernel = np.ones((25, 25), dtype=np.uint8) # kernel for second closing (connect blobs in x and y direction)
closing = cv2.morphologyEx(closing, cv2.MORPH_CLOSE, kernel) # closing again
cv2.imwrite("closing2.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours again
# iterate through contours
print("Number of columns: ", len(contours)) # this is the number of columns
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # this are height, width and coordinates of the columns
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3) # draw bouning rectangle on original image
cv2.imwrite("result.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
结果:
列数:3
第 1 步:
第 2 步:
第 3 步:
假设下面的报纸文章需要分析栏的数量(解决方案应该是 3 个文本栏)。我尝试使用带有 python 的 cv2 库检索列数,并在 Whosebug 上找到以下建议:Detect number of rows and columns in table image with OpenCV
但是,由于该解决方案的 table 结构良好,可以很容易地提取列和行的数量。基于该解决方案,这是我想出的:
import numpy as np
from imutils import contours
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('example_newspaper_article.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 240, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and remove text inside cells
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 10000:
cv2.drawContours(thresh, [c], -1, (255, 255, 255), 30)
# Invert image
invert = thresh
offset, old_cY, first = 10, 0, True
visualize = cv2.cvtColor(invert, cv2.COLOR_GRAY2BGR)
# Find contours, sort from top-to-bottom and then sum up column/rows
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
# Find centroid
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# New row
if (abs(cY) - abs(old_cY)) > offset:
if first:
row, table = [], []
first = False
old_cY = cY
table.append(row)
row = []
# Cell in same row
if ((abs(cY) - abs(old_cY)) <= offset) or first:
row.append(1)
# Uncomment to visualize
#cv2.circle(visualize, (cX, cY), 10, (36, 255, 12), -1)
#cv2.imshow('visualize', visualize)
#cv2.waitKey(200)
print('Rows: {}'.format(len(table)))
print('Columns: {}'.format(len(table[1])))
cv2.imshow('invert', invert)
cv2.imshow('thresh', thresh)
cv2.waitKey()
我认为,增加 drawContours 方法的厚度参数会有所帮助,但不幸的是,这并不能解决问题。结果如下所示:
我想,在文本区域上绘制矩形会更有帮助吗? 有谁知道解决方案并可以帮助我吗? 提前致谢!
每当有这样的任务时,我都会沿着 y-axis 计算像素数,并尝试找出相邻列之间的(大)差异。那就是我的完整管道:
- 将图像转换为灰度;使用 Otsu 的逆二进制阈值在黑色背景上获得白色像素。
- 做一些形态学上的闭合,这里使用一个大的垂直线核来连接同一列中的所有像素。
- 计算所有白色像素;计算相邻列之间的绝对差异。
- 在该“信号”中查找峰值 – 手动或如此处所示,使用
scipy.signal.find_peaks
。峰值标识每个文本列的开始和结束,因此文本列的数量是峰值数量的一半。
这是包括一些可视化的完整代码:
import cv2
import matplotlib.pyplot as plt # Only for visualization output
import numpy as np
from scipy import signal
from skimage import io # Only for web grabbing images
# Read image from web (attention: RGB order here, scikit-image)
image = io.imread('https://i.stack.imgur.com/jbAeZ.png')
# Convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Inverse binary threshold by Otsu's
thr = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
# Morphological closing with large vertical line kernel
thr_mod = cv2.morphologyEx(thr, cv2.MORPH_CLOSE, np.ones((image.shape[0], 1)))
# Count white pixels along y-axis
y_count = np.sum(thr_mod / 255, 0)
# Calculate absolute difference between neighbouring x-axis values
y_count_diff = np.abs(np.diff(y_count))
# Find peaks in that "signal"
peaks = signal.find_peaks(y_count_diff, distance=50)[0]
# Number of columns is half the number of found peaks
n_cols = np.int(peaks.shape[0] / 2)
# Text output
print('Number of columns: ' + str(n_cols))
# Some visualization output
plt.figure(0)
plt.subplot(221)
plt.imshow(image)
plt.title('Original image')
plt.subplot(222)
plt.imshow(thr_mod, cmap='gray')
plt.title('Thresholded, morphlogically closed image')
plt.subplot(223)
plt.plot(y_count)
plt.plot(peaks, y_count[peaks], 'r.')
plt.title('Summed white pixels along y-axis')
plt.subplot(224)
plt.plot(y_count_diff)
plt.plot(peaks, y_count_diff[peaks], 'r.')
plt.title('Absolute difference in summed white pixels')
plt.tight_layout()
plt.show()
文本输出:
Number of columns: 3
可视化输出:
局限性:如果您的图像倾斜等,您可能会得到不好的结果。如果你有很多(大)图像穿过文本列,你也可能会得到不好的结果。一般来说,您需要调整给定实现中的细节以满足您的实际需求(没有给出更多示例)。
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.5
Matplotlib: 3.3.1
NumPy: 1.19.1
OpenCV: 4.4.0
SciPy: 1.5.2
----------------------------------------
您可以在搜索列之前以稍微不同的方式准备图像。例如,您可以先水平连接文本(使用一些形态学操作)。这将为您提供具有一定高度的轮廓(标题将垂直连接为每行一个轮廓,列中的文本将连接为每行一个轮廓)。然后搜索所有轮廓并在高于您设置的特定值(可以手动计算或设置)的轮廓上绘制边界矩形。之后用更大的内核(水平和垂直)再次执行形态学操作,这样如果它们靠得很近,您就可以将所有剩余的文本连接起来。
这是一个示例代码:
import cv2
import numpy as np
img = cv2.imread("columns.png") # read image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # grayscale transform
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1] # OTSU thresold
kernel = np.ones((5, 10), dtype=np.uint8) # kernel for first closing procedure (connect blobs in x direction)
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) # closing
cv2.imwrite("closing1.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
heights = [] # all of contours heights
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
heights.append(h) # append height of one contours
boundary = np.mean(heights, axis=0) # mean of heights will serve as boundary but
# this will probably not be the case on other samples - you would need to make
# a function to determin this boundary or manualy set it
# iterate through contours
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # bounding rectangles height, width and coordinates
if h > boundary: # condition - contour must be higher than height boundary
cv2.rectangle(closing, (x, y), (x+w, y+h), (0, 0, 0), -1) # draw filled rectangle on the closing image
cv2.imwrite("closing1-filled.png", closing)
kernel = np.ones((25, 25), dtype=np.uint8) # kernel for second closing (connect blobs in x and y direction)
closing = cv2.morphologyEx(closing, cv2.MORPH_CLOSE, kernel) # closing again
cv2.imwrite("closing2.png", closing)
contours = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours again
# iterate through contours
print("Number of columns: ", len(contours)) # this is the number of columns
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) # this are height, width and coordinates of the columns
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3) # draw bouning rectangle on original image
cv2.imwrite("result.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
结果:
列数:3
第 1 步:
第 2 步:
第 3 步: