将扫描文档中的图表转换为数据

Question

我目前正在尝试编写一些可以从书中一些不常见的图表中提取数据的东西。我扫描了这本书的页面，并通过使用 opencv 我想检测图表中的一些特征，以便将其转换为可用数据。在左图中，我正在寻找 "triangles" 的高度，在右图中，我正在寻找从中心到虚线与灰色区域相交的点的距离。在这两种情况下，我都想将这些值转换为数字数据以供进一步使用。

我首先想到的是检测图表的线条，希望能以某种方式测量它们的长度或位置。为此，我使用霍夫线变换。以下代码片段显示了我已经走了多远。

import numpy as np
import cv2

# Reading the image
img = cv2.imread('test2.jpg')
# Convert the image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Apply edge detection
edges = cv2.Canny(gray,50,150,apertureSize = 3)

# Line detection
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=50,maxLineGap=20)

for line in lines:
    x1,y1,x2,y2 = line[0]
    cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)

cv2.imwrite('linesDetected.jpg',img)

唯一的问题是这个检测算法一点都不准确。至少不适合我。为了从图表中提取一些数据，线条的检测应该比较准确。他们有什么办法可以做到这一点吗？还是我检测线条的策略一开始就错了？我是否应该从检测其他东西开始，比如圆圈、物体大小、轮廓或颜色？

Answer 1

使用颜色分割是将此图转换为数据的简便方法。此方法确实需要一些手动注释。图形被分割后，计算每种颜色的像素。查看 OpenCV 库中包含的演示文件中的 'watershed' 演示：

import numpy as np
import cv2 as cv
from common import Sketcher

class App:
    def __init__(self, fn):
        self.img = cv.imread(fn)
        self.img = cv.resize(self.img, (654,654))
        h, w = self.img.shape[:2]
        self.markers = np.zeros((h, w), np.int32)
        self.markers_vis = self.img.copy()
        self.cur_marker = 1
        self.colors = np.int32( list(np.ndindex(2, 2, 3)) ) * 123
        self.auto_update = True
        self.sketch = Sketcher('img', [self.markers_vis, self.markers], self.get_colors)

    def get_colors(self):
        return list(map(int, self.colors[self.cur_marker])), self.cur_marker

    def watershed(self):
        m = self.markers.copy()
        cv.watershed(self.img, m)
        cv.imshow('img', self.img)        
        overlay = self.colors[np.maximum(m, 0)]
        vis = cv.addWeighted(self.img, 0.5, overlay, 0.5, 0.0, dtype=cv.CV_8UC3)
        cv.imshow('overlay', np.array(overlay, np.uint8))
        cv.imwrite('/home/stephen/Desktop/overlay.png', np.array(overlay, np.uint8))
        cv.imshow('watershed', vis)

    def run(self):
        while cv.getWindowProperty('img', 0) != -1 or cv.getWindowProperty('watershed', 0) != -1:
            ch = cv.waitKey(50)
            if ch >= ord('1') and ch <= ord('9'):
                self.cur_marker = ch - ord('0')
                print('marker: ', self.cur_marker)
            if self.sketch.dirty and self.auto_update:
                self.watershed()
                self.sketch.dirty = False
            if ch == 27: break
        cv.destroyAllWindows()


fn = '/home/stephen/Desktop/test.png'
App(cv.samples.findFile(fn)).run()

输出将是这样的图像：

您可以使用以下代码计算每种颜色的像素：

# Extract the values from the image
vals = []
img = cv.imread('/home/stephen/Desktop/overlay.png')
# Get the colors in the image
flat = img.reshape(-1, img.shape[-1])
colors = np.unique(flat, axis=0)
# Iterate through the colors (ignore the first and last colors)
for color in colors[1:-1]:
    a,b,c = color
    lower = a-1, b-1, c-1
    upper = a+1,b+1,c+1
    lower = np.array(lower)
    upper = np.array(upper)
    mask = cv.inRange(img, lower, upper)
    vals.append(sum(sum(mask)))
    cv.imshow('mask', mask)
    cv.waitKey(0)
cv.destroyAllWindows()

并使用此代码打印出输出数据：

names = ['alcohol', 'esters', 'biter', 'hoppy', 'acid', 'zoetheid', 'mout']
print(list(zip(names, vals)))

输出为：

[('alcohol', 22118), ('esters', 26000), ('biter', 16245), ('hoppy', 21170), ('acid', 19156), ('zoetheid', 11090), ('mout', 7167)]

将扫描文档中的图表转换为数据

Converting graphs from a scanned document into data

python

opencv

hough-transform