google 视觉 OCR 返回的信息过多
google vision OCR returning too much information
我创建了一个简单的 class 来测试谷歌视觉 OCR API。我正在传递一个包含 5 个字母的简单图像,它应该 return 一个带有 "CRAIG" 的字符串。虽然这个 API 调用 return 有很多额外的信息:
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 183,
"y": 105
},
{
"x": 674,
"y": 105
},
{
"x": 674,
"y": 253
},
{
"x": 183,
"y": 253
}
]
},
"symbols": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 183,
"y": 105
},
{
"x": 257,
"y": 105
},
{
"x": 257,
"y": 253
},
{
"x": 183,
"y": 253
}
]
},
"text": "C",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 249,
"y": 105
},
{
"x": 371,
"y": 105
},
{
"x": 371,
"y": 253
},
{
"x": 249,
"y": 253
}
]
},
"text": "R",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 459,
"y": 105
},
{
"x": 581,
"y": 105
},
{
"x": 581,
"y": 253
},
{
"x": 459,
"y": 253
}
]
},
"text": "A",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 582,
"y": 105
},
{
"x": 638,
"y": 105
},
{
"x": 638,
"y": 253
},
{
"x": 582,
"y": 253
}
]
},
"text": "I",
"confidence": 0.98
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
],
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 636,
"y": 105
},
{
"x": 674,
"y": 105
},
{
"x": 674,
"y": 253
},
{
"x": 636,
"y": 253
}
]
},
"text": "G",
"confidence": 0.99
}
],
"confidence": 0.98
}
我怎样才能只得到字母 returned?
class:
public static void Main(string[] args)
{
string credential_path = @"C:\Users385\nodal.json";
System.Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", credential_path);
// Instantiates a client
var client = ImageAnnotatorClient.Create();
// Load the image file into memory
var image = Image.FromFile("vision.jpg");
// Performs label detection on the image file
var response = client.DetectDocumentText(image);
foreach (var page in response.Pages)
{
foreach (var block in page.Blocks)
{
foreach (var paragraph in block.Paragraphs)
{
Console.WriteLine(string.Join("\n", paragraph.Words));
}
}
}
}
我传入的图片是我用paint画出来的一个简单的字:
经过一些研究,以下内容为我提供了单词以及更清晰的输出:
Block Text at (183, 105) - (674, 105) - (674, 253) - (183, 253)
Paragraph at (183, 105) - (674, 105) - (674, 253) - (183, 253)
Word: CRAIG
方法:
foreach (var page in response.Pages)
{
foreach (var block in page.Blocks)
{
string box = string.Join(" - ", block.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})"));
Console.WriteLine($"Block {block.BlockType} at {box}");
foreach (var paragraph in block.Paragraphs)
{
box = string.Join(" - ", paragraph.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})"));
Console.WriteLine($" Paragraph at {box}");
foreach (var word in paragraph.Words)
{
Console.WriteLine($" Word: {string.Join("", word.Symbols.Select(s => s.Text))}");
}
}
}
}
尝试改变..
var response = client.DetectDocumentText(image);
到
var response = client.DetectText(image);
说明
以下是来自 GOOGLE CLOUD VISION API 文档
的一些信息
Vision API 可以从图像中检测和提取文本。有两种支持光学字符识别 (OCR) 的注释功能:
TEXT_DETECTION 检测并从任何图像中提取文本。例如,照片可能包含街道标志或交通标志。 JSON 包括整个提取的字符串,以及单个单词及其边界框。
DOCUMENT_TEXT_DETECTION 也从图像中提取文本,但针对密集文本和文档优化了响应。 JSON 包括页面、块、段落、单词和中断信息。
我创建了一个简单的 class 来测试谷歌视觉 OCR API。我正在传递一个包含 5 个字母的简单图像,它应该 return 一个带有 "CRAIG" 的字符串。虽然这个 API 调用 return 有很多额外的信息:
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 183,
"y": 105
},
{
"x": 674,
"y": 105
},
{
"x": 674,
"y": 253
},
{
"x": 183,
"y": 253
}
]
},
"symbols": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 183,
"y": 105
},
{
"x": 257,
"y": 105
},
{
"x": 257,
"y": 253
},
{
"x": 183,
"y": 253
}
]
},
"text": "C",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 249,
"y": 105
},
{
"x": 371,
"y": 105
},
{
"x": 371,
"y": 253
},
{
"x": 249,
"y": 253
}
]
},
"text": "R",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 459,
"y": 105
},
{
"x": 581,
"y": 105
},
{
"x": 581,
"y": 253
},
{
"x": 459,
"y": 253
}
]
},
"text": "A",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"vertices": [
{
"x": 582,
"y": 105
},
{
"x": 638,
"y": 105
},
{
"x": 638,
"y": 253
},
{
"x": 582,
"y": 253
}
]
},
"text": "I",
"confidence": 0.98
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
],
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"boundingBox": {
"vertices": [
{
"x": 636,
"y": 105
},
{
"x": 674,
"y": 105
},
{
"x": 674,
"y": 253
},
{
"x": 636,
"y": 253
}
]
},
"text": "G",
"confidence": 0.99
}
],
"confidence": 0.98
}
我怎样才能只得到字母 returned?
class:
public static void Main(string[] args)
{
string credential_path = @"C:\Users385\nodal.json";
System.Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", credential_path);
// Instantiates a client
var client = ImageAnnotatorClient.Create();
// Load the image file into memory
var image = Image.FromFile("vision.jpg");
// Performs label detection on the image file
var response = client.DetectDocumentText(image);
foreach (var page in response.Pages)
{
foreach (var block in page.Blocks)
{
foreach (var paragraph in block.Paragraphs)
{
Console.WriteLine(string.Join("\n", paragraph.Words));
}
}
}
}
我传入的图片是我用paint画出来的一个简单的字:
经过一些研究,以下内容为我提供了单词以及更清晰的输出:
Block Text at (183, 105) - (674, 105) - (674, 253) - (183, 253)
Paragraph at (183, 105) - (674, 105) - (674, 253) - (183, 253)
Word: CRAIG
方法:
foreach (var page in response.Pages)
{
foreach (var block in page.Blocks)
{
string box = string.Join(" - ", block.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})"));
Console.WriteLine($"Block {block.BlockType} at {box}");
foreach (var paragraph in block.Paragraphs)
{
box = string.Join(" - ", paragraph.BoundingBox.Vertices.Select(v => $"({v.X}, {v.Y})"));
Console.WriteLine($" Paragraph at {box}");
foreach (var word in paragraph.Words)
{
Console.WriteLine($" Word: {string.Join("", word.Symbols.Select(s => s.Text))}");
}
}
}
}
尝试改变..
var response = client.DetectDocumentText(image);
到
var response = client.DetectText(image);
说明
以下是来自 GOOGLE CLOUD VISION API 文档
的一些信息Vision API 可以从图像中检测和提取文本。有两种支持光学字符识别 (OCR) 的注释功能:
TEXT_DETECTION 检测并从任何图像中提取文本。例如,照片可能包含街道标志或交通标志。 JSON 包括整个提取的字符串,以及单个单词及其边界框。
DOCUMENT_TEXT_DETECTION 也从图像中提取文本,但针对密集文本和文档优化了响应。 JSON 包括页面、块、段落、单词和中断信息。