当我当时从单词中提取字符时,为什么单词中相似字符的置信度值不同?
while i am extract the character from the word at that time why confidence values of the similar character in words are different?
看,我正在从图像中提取单词并检查该单词中每个字符的置信度。每次我得到不同的置信度,即使它属于同一个词。
下面的例子。
01.01.2012 单词,其中 3 次遇到 0 个字符,3 次我得到不同的置信度。
这是输入图像,我得到的输出是文本文件,您可以在下面看到。我的要求是只需要数字数据的方式。所以在图像中,如果任何单词包含 0 到 9 之间的数字,那么我将特定单词和相应的字符和置信度值存储到文件中。正如你在下面看到的那样。我制作了从 0 到 9 的单个文件。出于参考目的,我显示了 0 个字符文件。
WORD CHAR confidence
7/11/2014 0 94.3153
01.01.2012 0 91.9117
01.01.2012 0 95.059
01.01.2012 0 95.1877
31.12.2012 0 92.1003
05.07.2013 0 94.4376
05.07.2013 0 97.3389
05.07.2013 0 92.4576
2012 0 94.0608
2012 0 93.1969
31.12.2012 0 93.8993
31.12.2011 0 94.513
606 0 93.5746
405 0 93.6727
109.821 0 96.2786
331.028 0 96.1837
109.506; 0 93.1421
109.506; 0 93.7133
110.427 0 93.7141
946.130 0 96.3438
200.274 0 95.7532
200.274 0 94.8678
10.553.331 0 96.3162
10.186.341 0 94.15
63.401 0 94.6042
346.350 0 96.2305
343.044 0 95.9801
346.350; 0 93.5741
343.044 0 93.8484
284.506 0 94.6761
0 0 95.9303
420 0 94.0067
0 0 93.2645
7.355.042 0 95.9187
9.108 0 96.3331
10. 0 93.9019
12.042 0 97.3823
294.704 0 93.4084
4.350 0 96.0915
0 0 95.4884
20.559.209 0 95.4821
20.559.209 0 94.1849
19.207.660 0 95.9933
19.207.660 0 95.1577
31.12.2012 0 92.7785
31.12.2011 0 94.6773
14.054 0 95.3734
44.706 0 93.6371
58.760 0 97.2008
4.111.720 0 94.4336
2.873.806 0 95.8218
11.083.608 0 96.1708
11.083.608 0 94.3456
10.721.302 0 93.3877
10.721.302 0 94.978
5.045.424 0 95.424
4.242.083 0 95.424
谁能告诉我所见背后的置信度水平如何运作?
#include "./include/header.h"
#include "./include/enum.h"
class RECT
{
public:
int col;
int row;
int width;
int height;
char *ocrResult;
};
class OCR
{
public:
int *g_pixelBuffer;
int *g_pixelBufferForWord;
int *g_histogram;
int g_Id;
int initialize (const int row, const int col)
{
/// find the size of the image
int size = row * col ;
/// allocate the memory for pixel bufferr
g_pixelBuffer = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBuffer == NULL) return MemoryNotAllocated;
g_pixelBufferForWord = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBufferForWord == NULL) return MemoryNotAllocated;
/// allocating memory for histogram
g_histogram = (int *)calloc(size ,sizeof(int));
/// check proper allocation
if(g_histogram == NULL) return MemoryNotAllocated;
g_Id = 1;
return Success;
}
vector<RECT > processImage(Mat &image, int size, int ,int );
void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void release()
{
// release pixel_buffer memory
free(g_pixelBuffer);
free(g_pixelBufferForWord);
/// release histogram memory
free(g_histogram);
}
};
ofstream myfile1("1.txt");
ofstream myfile2("2.txt");
ofstream myfile3("3.txt");
ofstream myfile4("4.txt");
ofstream myfile5("5.txt");
ofstream myfile6("6.txt");
ofstream myfile7("7.txt");
ofstream myfile8("8.txt");
ofstream myfile9("9.txt");
ofstream myfile0("0.txt");
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow
, int *PixelBufferForWord, int);
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char);
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer);
void dumpDataIntoFile (int *, int collenth, int strow, int stcol,
int enrow, int encols, char *output);
//void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void GetBinaryImage(Mat &image ,OCR *,const int size);
void getBinaryImage(OCR *,int size);
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord);
int main(int argc ,char **argv)
{
int rs = Success;
Mat image = imread(argv[1]);//read the image
if(!image.data){
cout << "can't able to read the image" << endl;
return 0;
}
int rows = image.rows;// get the rows
int cols = image.cols;// get the col
int size = rows * cols;// get the size
OCR ocr;
/// Allocate or initialize memory
rs = ocr.initialize (rows, cols);
/// check proper allocation
if(rs == MemoryNotAllocated) return rs;
myfile1 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile2 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile3 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile4 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile5 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile6 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile7 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile8 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile9 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile0 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
vector <RECT> rectBuffer = ocr.processImage(image, size, rows, cols);
ocr.dumpIntoFile(rectBuffer, argv[2] );
//dumpDataIntoFile (ocr.g_pixelBufferForWord, cols, 0, 0, rows - 1, cols - 1, ( char *)"test123456.pbm");
ocr.release();
}
vector <RECT> OCR ::processImage(Mat &image, int size, int rows, int cols)
{
GetBinaryImage (image, this, size);// convert the image into the binary
for(int i = 0; i < size; i++)
{
g_pixelBufferForWord[i] = g_pixelBuffer[i];
}
// dumpDataIntoFile (this, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tess;
if (tess.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tess.SetImage((unsigned char*)g_pixelBuffer, cols, rows, sizeof(int)
,sizeof(int) * cols);
tess.Recognize(0);
tesseract::ResultIterator *riLine = tess.GetIterator();
tesseract::PageIteratorLevel levelLine = tesseract:: RIL_TEXTLINE;
RECT rectLine;
vector <RECT> rectBuffer;
if(riLine!=0)
{
do {
char *Line = riLine->GetUTF8Text(levelLine);
if(Line != NULL)
{
int startCol, startRow, endCol, endRow;
riLine->BoundingBox(levelLine, &startCol, &startRow, &endCol, &endRow);
int width = endCol - startCol + 1;
int height = endRow - startRow + 1;
rectLine.col = startCol;
rectLine.row = startRow;
rectLine.width = width;
rectLine.height = height;
int length = strlen(Line) + 1;
rectLine.ocrResult = (char *)calloc( length, sizeof(char));
if(rectLine.ocrResult == NULL){
cout << "rectLine.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectLine.ocrResult, Line);
rectBuffer.push_back(rectLine);
getWordDataFromLine(g_pixelBuffer,g_pixelBufferForWord, rectLine,cols, rectBuffer);
free(Line);
//delete Line;
}
} while (riLine->Next(levelLine));
}
//dumpIntoFile(rectBuffer, argv[2]);
tess.End();
delete riLine;
return rectBuffer;
}
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer)
{
int index;
int *SubImageBuffer = (int *)calloc(rectLine.width * rectLine.height, sizeof(int));
if(!SubImageBuffer){
cout << "SubImageBuffer not allocate" << endl;
}
int i = 0;
for(int r = rectLine.row ; r < rectLine.row + rectLine.height; r++)
{
for(int c = rectLine.col ; c < rectLine.col + rectLine.width; c++)
{
index = r * mainImageCol + c;
SubImageBuffer[i++] = PixelBuffer[index];
}
}
//dumpDataIntoFile (SubImageBuffer, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessWord;
if (tessWord.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tessWord.SetImage((unsigned char*)SubImageBuffer, rectLine.width,
rectLine.height, sizeof(int) ,sizeof(int) * rectLine.width);
tessWord.Recognize(0);
tesseract::ResultIterator *riWord = tessWord.GetIterator();
tesseract::PageIteratorLevel levelWord = tesseract:: RIL_WORD;
RECT rectWord;
if(riWord!=0)
{
do {
char *Word = riWord->GetUTF8Text(levelWord);
if(Word != NULL)
{
int startCol, startRow, endCol, endRow;
int staCol = 0, staRow = 0, edCol = 0, edRow = 0;
riWord->BoundingBox(levelWord, &startCol, &startRow, &endCol, &endRow);
staCol = startCol;
staRow = startRow;
edCol = endCol;
edRow = endRow;
staCol += rectLine.col;
staRow += rectLine.row;
edRow += rectLine.row;
edCol += rectLine.col;
rectWord.col = staCol;
rectWord.row = staRow;
rectWord.width = edCol - staCol + 1;
rectWord.height = edRow - staRow + 1;
int length = strlen(Word) + 1;
rectWord.ocrResult = (char *)calloc( length, sizeof(char));
if(rectWord.ocrResult == NULL){
cout << "rectWord.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectWord.ocrResult, Word);
rectBuffer.push_back(rectWord);
//displayBoundingBox(staCol, staRow, edCol, edRow ,PixelBufferForWord, mainImageCol);
getCharDataFromWord(SubImageBuffer, PixelBufferForWord, startCol, startRow ,endCol ,endRow,
rectLine,mainImageCol, rectBuffer, rectWord);
//delete Word;
free(Word);
}
}while (riWord->Next(levelWord));
}
delete riWord;
tessWord.End();
free(SubImageBuffer);
}
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord)
{
int index;
int width = endColWord - startColWord + 1;
int height = endRowWord - startRowWord + 1;
int *SubImageBufferForChar = (int *)calloc(width * height, sizeof(int));
if(!SubImageBufferForChar){
cout << "SubImageBuffer not read" << endl;
}
int i = 0;
for(int r = startRowWord ; r <= endRowWord; r++)
{
for(int c = startColWord; c <= endColWord; c++)
{
index = r * rectLine.width + c;
SubImageBufferForChar[i++] = PixelBuffer[index];
}
}
//dumpDataIntoFile (SubImageBufferForChar, width, 0, 0, height - 1, width - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessChar;
if (tessChar.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tessChar.SetImage((unsigned char*)SubImageBufferForChar, width,
height, sizeof(int) ,sizeof(int) * width);
tessChar.Recognize(0);
tesseract::ResultIterator *riChar = tessChar.GetIterator();
tesseract::PageIteratorLevel levelChar = tesseract:: RIL_SYMBOL;
RECT rectChar;
if(riChar!=0)
{
do {
char *Char = riChar->GetUTF8Text(levelChar);
if(Char != NULL)
{
float conf = riChar->Confidence(levelChar);
int startCol, startRow, endCol, endRow;
riChar->BoundingBox(levelChar, &startCol, &startRow, &endCol, &endRow);
startCol += rectWord.col;
startRow += rectWord.row;
endRow += rectWord.row;
endCol += rectWord.col;
rectChar.col = startCol;
rectChar.row = startRow;
rectChar.width = endCol - startCol + 1;
rectChar.height = endRow - startRow + 1;
int length = strlen(Char) + 1;
rectChar.ocrResult = (char *)calloc( length, sizeof(char));
if(rectChar.ocrResult == NULL){
cout << "rectChar.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectChar.ocrResult, Char);
rectBuffer.push_back(rectChar);
dumpNumberConfidenceIntoFile(rectWord.ocrResult, conf, Char);
//displayBoundingBox(startCol, startRow, endCol, endRow ,PixelBufferForChar, mainImageCol);
//delete Char;
free(Char);
}
} while (riChar->Next(levelChar));
}
delete riChar;
tessChar.End();
free(SubImageBufferForChar);
}
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char)
{
if(Char[0] >= '0' && Char[0] <= '9')
{
if(Char[0] == '0'){
myfile0 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '1'){
myfile1 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '2'){
myfile2 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '3'){
myfile3 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '4'){
myfile4 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '5'){
myfile5 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '6'){
myfile6 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '7'){
myfile7 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '8'){
myfile8 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '9'){
myfile9 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
}
}
void OCR ::dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile)
{
ofstream myfile(outputFile);
myfile << "ID" << '\t' << "CORD_X" << '\t' << "CORD_Y" << '\t' <<
"CORD_W" << '\t' << "CORD_H" << '\t' << "STRING" << endl;
for(auto it = rectBuffer.begin(); it != rectBuffer.end(); it++)
{
myfile << g_Id++ << '\t' << it->col << '\t' << it->row << '\t' <<
it->width << '\t' << it->height << '\t';
int length = strlen(it->ocrResult);
//cout << "in the string (" << length << ") ::" << endl;
for(int j = 0; j < length && it->ocrResult[j] != '\n'; j++)
{
myfile << it->ocrResult[j];
}
myfile << endl;
}
}
void getBinaryImage(OCR *ocr, int size)
{
long long int total = size;
long long int sum = 0;
long long int q1 = 0;
long long int q2 = 0;
float SUM = 0;
float u1 = 0;
float u2 = 0;
float result = 0;
float var_max = 0;
int threshold = 0;
for(int i = 0; i < 256 ;i++)
sum = sum + i * ocr->g_histogram[i];
for(int t = 0; t < 256 ; t++)
{
q1 = q1 + ocr->g_histogram[t];
q2 = total - q1;
SUM = SUM + t * ocr->g_histogram[t];
u1 = SUM / q1 ;
u2 = (sum - SUM) / q2;
result = q1 * q2 * (u1 - u2) * (u1 - u2);
if(result > var_max)
{
threshold = t;
var_max = result;
}
}
for(int i = 0; i < size; i++)
{
if(ocr->g_pixelBuffer[i] > threshold){
ocr->g_pixelBuffer[i] = 0;
}else{
ocr->g_pixelBuffer[i] = 1;
}
}
}
void GetBinaryImage(Mat &image ,OCR *ocr ,const int size)
{
Mat channel[3];// convert the image first into Binary image
split(image,channel); // spilt the image
uchar *Blue = channel[0].data; // get the blue value
uchar *Green = channel[1].data; // get the green value
uchar *Red = channel[2].data; // get the red value
for(int i = 0; i < size; i++){
ocr->g_pixelBuffer[i]= ((Red[i]) + (Green[i]) + (Blue[i])) / 3;// get the gray data
}
for(int i = 0; i < size; i++) {
ocr->g_histogram[ocr->g_pixelBuffer[i]]++;// create the histogram for the OTSU thersholding
}
getBinaryImage (ocr,size);// got the binary image
}
void dumpDataIntoFile (int *pixelBuffer, int collenth, int strow, int stcol,
int enrow, int encols, char *output)
{
int i, j, index;
int cols = encols - stcol + 1;
int rows = enrow - strow + 1;
ofstream myfile(output);
myfile << "P1" << endl;
myfile << cols << " " << rows << endl;
for(i = strow; i <= enrow ; i++ ) {
for(j = stcol; j <= encols; j++ ) {
index = i * collenth + j;
if(pixelBuffer[index] != 0) {
myfile << "1" << " ";
} else {
myfile << "0" << " ";
}
}
myfile << endl;
}
myfile.close();
}
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow , int *PixelBufferForWord,int mainImageCol)
{
int index;
for(int i = staRow; i < edRow ; i++ ) {
index = i * mainImageCol + staCol ;//height left
PixelBufferForWord[index] = 255;
index = i * mainImageCol + edCol ;// height right
PixelBufferForWord[index] = 255;
}
for(int j = staCol; j < edCol; j++ ) {
index = staRow * mainImageCol + j;//top
PixelBufferForWord[index] = 255;
index = edRow * mainImageCol + j;// bottom
PixelBufferForWord[index] = 255;
}
}
问:- 如何计算置信度。
置信度算法计算识别字符与可用字符的距离。
此 link 的 "linguistic analysis" 部分提供了您正在寻找的答案:- https://github.com/tesseract-ocr/docs/blob/master/tesseracticdar2007.pdf
看,我正在从图像中提取单词并检查该单词中每个字符的置信度。每次我得到不同的置信度,即使它属于同一个词。
下面的例子。
01.01.2012 单词,其中 3 次遇到 0 个字符,3 次我得到不同的置信度。
这是输入图像,我得到的输出是文本文件,您可以在下面看到。我的要求是只需要数字数据的方式。所以在图像中,如果任何单词包含 0 到 9 之间的数字,那么我将特定单词和相应的字符和置信度值存储到文件中。正如你在下面看到的那样。我制作了从 0 到 9 的单个文件。出于参考目的,我显示了 0 个字符文件。
WORD CHAR confidence
7/11/2014 0 94.3153
01.01.2012 0 91.9117
01.01.2012 0 95.059
01.01.2012 0 95.1877
31.12.2012 0 92.1003
05.07.2013 0 94.4376
05.07.2013 0 97.3389
05.07.2013 0 92.4576
2012 0 94.0608
2012 0 93.1969
31.12.2012 0 93.8993
31.12.2011 0 94.513
606 0 93.5746
405 0 93.6727
109.821 0 96.2786
331.028 0 96.1837
109.506; 0 93.1421
109.506; 0 93.7133
110.427 0 93.7141
946.130 0 96.3438
200.274 0 95.7532
200.274 0 94.8678
10.553.331 0 96.3162
10.186.341 0 94.15
63.401 0 94.6042
346.350 0 96.2305
343.044 0 95.9801
346.350; 0 93.5741
343.044 0 93.8484
284.506 0 94.6761
0 0 95.9303
420 0 94.0067
0 0 93.2645
7.355.042 0 95.9187
9.108 0 96.3331
10. 0 93.9019
12.042 0 97.3823
294.704 0 93.4084
4.350 0 96.0915
0 0 95.4884
20.559.209 0 95.4821
20.559.209 0 94.1849
19.207.660 0 95.9933
19.207.660 0 95.1577
31.12.2012 0 92.7785
31.12.2011 0 94.6773
14.054 0 95.3734
44.706 0 93.6371
58.760 0 97.2008
4.111.720 0 94.4336
2.873.806 0 95.8218
11.083.608 0 96.1708
11.083.608 0 94.3456
10.721.302 0 93.3877
10.721.302 0 94.978
5.045.424 0 95.424
4.242.083 0 95.424
谁能告诉我所见背后的置信度水平如何运作?
#include "./include/header.h"
#include "./include/enum.h"
class RECT
{
public:
int col;
int row;
int width;
int height;
char *ocrResult;
};
class OCR
{
public:
int *g_pixelBuffer;
int *g_pixelBufferForWord;
int *g_histogram;
int g_Id;
int initialize (const int row, const int col)
{
/// find the size of the image
int size = row * col ;
/// allocate the memory for pixel bufferr
g_pixelBuffer = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBuffer == NULL) return MemoryNotAllocated;
g_pixelBufferForWord = (int *)calloc(size, sizeof(int));
/// Check for proper allocation
if(g_pixelBufferForWord == NULL) return MemoryNotAllocated;
/// allocating memory for histogram
g_histogram = (int *)calloc(size ,sizeof(int));
/// check proper allocation
if(g_histogram == NULL) return MemoryNotAllocated;
g_Id = 1;
return Success;
}
vector<RECT > processImage(Mat &image, int size, int ,int );
void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void release()
{
// release pixel_buffer memory
free(g_pixelBuffer);
free(g_pixelBufferForWord);
/// release histogram memory
free(g_histogram);
}
};
ofstream myfile1("1.txt");
ofstream myfile2("2.txt");
ofstream myfile3("3.txt");
ofstream myfile4("4.txt");
ofstream myfile5("5.txt");
ofstream myfile6("6.txt");
ofstream myfile7("7.txt");
ofstream myfile8("8.txt");
ofstream myfile9("9.txt");
ofstream myfile0("0.txt");
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow
, int *PixelBufferForWord, int);
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char);
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer);
void dumpDataIntoFile (int *, int collenth, int strow, int stcol,
int enrow, int encols, char *output);
//void dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile);
void GetBinaryImage(Mat &image ,OCR *,const int size);
void getBinaryImage(OCR *,int size);
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord);
int main(int argc ,char **argv)
{
int rs = Success;
Mat image = imread(argv[1]);//read the image
if(!image.data){
cout << "can't able to read the image" << endl;
return 0;
}
int rows = image.rows;// get the rows
int cols = image.cols;// get the col
int size = rows * cols;// get the size
OCR ocr;
/// Allocate or initialize memory
rs = ocr.initialize (rows, cols);
/// check proper allocation
if(rs == MemoryNotAllocated) return rs;
myfile1 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile2 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile3 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile4 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile5 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile6 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile7 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' <<"confidence" << endl;
myfile8 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile9 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
myfile0 << "WORD" << '\t' << '\t' << "CHAR" << '\t' << '\t' << "confidence" << endl;
vector <RECT> rectBuffer = ocr.processImage(image, size, rows, cols);
ocr.dumpIntoFile(rectBuffer, argv[2] );
//dumpDataIntoFile (ocr.g_pixelBufferForWord, cols, 0, 0, rows - 1, cols - 1, ( char *)"test123456.pbm");
ocr.release();
}
vector <RECT> OCR ::processImage(Mat &image, int size, int rows, int cols)
{
GetBinaryImage (image, this, size);// convert the image into the binary
for(int i = 0; i < size; i++)
{
g_pixelBufferForWord[i] = g_pixelBuffer[i];
}
// dumpDataIntoFile (this, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tess;
if (tess.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tess.SetImage((unsigned char*)g_pixelBuffer, cols, rows, sizeof(int)
,sizeof(int) * cols);
tess.Recognize(0);
tesseract::ResultIterator *riLine = tess.GetIterator();
tesseract::PageIteratorLevel levelLine = tesseract:: RIL_TEXTLINE;
RECT rectLine;
vector <RECT> rectBuffer;
if(riLine!=0)
{
do {
char *Line = riLine->GetUTF8Text(levelLine);
if(Line != NULL)
{
int startCol, startRow, endCol, endRow;
riLine->BoundingBox(levelLine, &startCol, &startRow, &endCol, &endRow);
int width = endCol - startCol + 1;
int height = endRow - startRow + 1;
rectLine.col = startCol;
rectLine.row = startRow;
rectLine.width = width;
rectLine.height = height;
int length = strlen(Line) + 1;
rectLine.ocrResult = (char *)calloc( length, sizeof(char));
if(rectLine.ocrResult == NULL){
cout << "rectLine.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectLine.ocrResult, Line);
rectBuffer.push_back(rectLine);
getWordDataFromLine(g_pixelBuffer,g_pixelBufferForWord, rectLine,cols, rectBuffer);
free(Line);
//delete Line;
}
} while (riLine->Next(levelLine));
}
//dumpIntoFile(rectBuffer, argv[2]);
tess.End();
delete riLine;
return rectBuffer;
}
void getWordDataFromLine(const int *PixelBuffer, int *PixelBufferForWord, RECT &rectLine,
int mainImageCol, vector <RECT> &rectBuffer)
{
int index;
int *SubImageBuffer = (int *)calloc(rectLine.width * rectLine.height, sizeof(int));
if(!SubImageBuffer){
cout << "SubImageBuffer not allocate" << endl;
}
int i = 0;
for(int r = rectLine.row ; r < rectLine.row + rectLine.height; r++)
{
for(int c = rectLine.col ; c < rectLine.col + rectLine.width; c++)
{
index = r * mainImageCol + c;
SubImageBuffer[i++] = PixelBuffer[index];
}
}
//dumpDataIntoFile (SubImageBuffer, w, 0, 0, h - 1, w - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessWord;
if (tessWord.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tessWord.SetImage((unsigned char*)SubImageBuffer, rectLine.width,
rectLine.height, sizeof(int) ,sizeof(int) * rectLine.width);
tessWord.Recognize(0);
tesseract::ResultIterator *riWord = tessWord.GetIterator();
tesseract::PageIteratorLevel levelWord = tesseract:: RIL_WORD;
RECT rectWord;
if(riWord!=0)
{
do {
char *Word = riWord->GetUTF8Text(levelWord);
if(Word != NULL)
{
int startCol, startRow, endCol, endRow;
int staCol = 0, staRow = 0, edCol = 0, edRow = 0;
riWord->BoundingBox(levelWord, &startCol, &startRow, &endCol, &endRow);
staCol = startCol;
staRow = startRow;
edCol = endCol;
edRow = endRow;
staCol += rectLine.col;
staRow += rectLine.row;
edRow += rectLine.row;
edCol += rectLine.col;
rectWord.col = staCol;
rectWord.row = staRow;
rectWord.width = edCol - staCol + 1;
rectWord.height = edRow - staRow + 1;
int length = strlen(Word) + 1;
rectWord.ocrResult = (char *)calloc( length, sizeof(char));
if(rectWord.ocrResult == NULL){
cout << "rectWord.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectWord.ocrResult, Word);
rectBuffer.push_back(rectWord);
//displayBoundingBox(staCol, staRow, edCol, edRow ,PixelBufferForWord, mainImageCol);
getCharDataFromWord(SubImageBuffer, PixelBufferForWord, startCol, startRow ,endCol ,endRow,
rectLine,mainImageCol, rectBuffer, rectWord);
//delete Word;
free(Word);
}
}while (riWord->Next(levelWord));
}
delete riWord;
tessWord.End();
free(SubImageBuffer);
}
void getCharDataFromWord(const int *PixelBuffer,int *PixelBufferForChar, int startColWord ,int startRowWord,
int endColWord, int endRowWord, RECT &rectLine,
int mainImageCol,vector<RECT > &rectBuffer, RECT &rectWord)
{
int index;
int width = endColWord - startColWord + 1;
int height = endRowWord - startRowWord + 1;
int *SubImageBufferForChar = (int *)calloc(width * height, sizeof(int));
if(!SubImageBufferForChar){
cout << "SubImageBuffer not read" << endl;
}
int i = 0;
for(int r = startRowWord ; r <= endRowWord; r++)
{
for(int c = startColWord; c <= endColWord; c++)
{
index = r * rectLine.width + c;
SubImageBufferForChar[i++] = PixelBuffer[index];
}
}
//dumpDataIntoFile (SubImageBufferForChar, width, 0, 0, height - 1, width - 1, (char *)"test123.pbm");
tesseract::TessBaseAPI tessChar;
if (tessChar.Init("/usr/share/tesseract/tessdata", "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tessChar.SetImage((unsigned char*)SubImageBufferForChar, width,
height, sizeof(int) ,sizeof(int) * width);
tessChar.Recognize(0);
tesseract::ResultIterator *riChar = tessChar.GetIterator();
tesseract::PageIteratorLevel levelChar = tesseract:: RIL_SYMBOL;
RECT rectChar;
if(riChar!=0)
{
do {
char *Char = riChar->GetUTF8Text(levelChar);
if(Char != NULL)
{
float conf = riChar->Confidence(levelChar);
int startCol, startRow, endCol, endRow;
riChar->BoundingBox(levelChar, &startCol, &startRow, &endCol, &endRow);
startCol += rectWord.col;
startRow += rectWord.row;
endRow += rectWord.row;
endCol += rectWord.col;
rectChar.col = startCol;
rectChar.row = startRow;
rectChar.width = endCol - startCol + 1;
rectChar.height = endRow - startRow + 1;
int length = strlen(Char) + 1;
rectChar.ocrResult = (char *)calloc( length, sizeof(char));
if(rectChar.ocrResult == NULL){
cout << "rectChar.ocrResult is not allocate"<< endl;
exit(1);
}
strcpy(rectChar.ocrResult, Char);
rectBuffer.push_back(rectChar);
dumpNumberConfidenceIntoFile(rectWord.ocrResult, conf, Char);
//displayBoundingBox(startCol, startRow, endCol, endRow ,PixelBufferForChar, mainImageCol);
//delete Char;
free(Char);
}
} while (riChar->Next(levelChar));
}
delete riChar;
tessChar.End();
free(SubImageBufferForChar);
}
void dumpNumberConfidenceIntoFile(char *word, float confi, char *Char)
{
if(Char[0] >= '0' && Char[0] <= '9')
{
if(Char[0] == '0'){
myfile0 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '1'){
myfile1 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '2'){
myfile2 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '3'){
myfile3 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '4'){
myfile4 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '5'){
myfile5 << word << '\t' << '\t' << Char << '\t' << '\t' <<confi << endl;
}
else if(Char[0] == '6'){
myfile6 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '7'){
myfile7 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '8'){
myfile8 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
else if(Char[0] == '9'){
myfile9 << word << '\t' << '\t' << Char << '\t' << '\t' << confi << endl;
}
}
}
void OCR ::dumpIntoFile(vector<RECT > &rectBuffer, char *outputFile)
{
ofstream myfile(outputFile);
myfile << "ID" << '\t' << "CORD_X" << '\t' << "CORD_Y" << '\t' <<
"CORD_W" << '\t' << "CORD_H" << '\t' << "STRING" << endl;
for(auto it = rectBuffer.begin(); it != rectBuffer.end(); it++)
{
myfile << g_Id++ << '\t' << it->col << '\t' << it->row << '\t' <<
it->width << '\t' << it->height << '\t';
int length = strlen(it->ocrResult);
//cout << "in the string (" << length << ") ::" << endl;
for(int j = 0; j < length && it->ocrResult[j] != '\n'; j++)
{
myfile << it->ocrResult[j];
}
myfile << endl;
}
}
void getBinaryImage(OCR *ocr, int size)
{
long long int total = size;
long long int sum = 0;
long long int q1 = 0;
long long int q2 = 0;
float SUM = 0;
float u1 = 0;
float u2 = 0;
float result = 0;
float var_max = 0;
int threshold = 0;
for(int i = 0; i < 256 ;i++)
sum = sum + i * ocr->g_histogram[i];
for(int t = 0; t < 256 ; t++)
{
q1 = q1 + ocr->g_histogram[t];
q2 = total - q1;
SUM = SUM + t * ocr->g_histogram[t];
u1 = SUM / q1 ;
u2 = (sum - SUM) / q2;
result = q1 * q2 * (u1 - u2) * (u1 - u2);
if(result > var_max)
{
threshold = t;
var_max = result;
}
}
for(int i = 0; i < size; i++)
{
if(ocr->g_pixelBuffer[i] > threshold){
ocr->g_pixelBuffer[i] = 0;
}else{
ocr->g_pixelBuffer[i] = 1;
}
}
}
void GetBinaryImage(Mat &image ,OCR *ocr ,const int size)
{
Mat channel[3];// convert the image first into Binary image
split(image,channel); // spilt the image
uchar *Blue = channel[0].data; // get the blue value
uchar *Green = channel[1].data; // get the green value
uchar *Red = channel[2].data; // get the red value
for(int i = 0; i < size; i++){
ocr->g_pixelBuffer[i]= ((Red[i]) + (Green[i]) + (Blue[i])) / 3;// get the gray data
}
for(int i = 0; i < size; i++) {
ocr->g_histogram[ocr->g_pixelBuffer[i]]++;// create the histogram for the OTSU thersholding
}
getBinaryImage (ocr,size);// got the binary image
}
void dumpDataIntoFile (int *pixelBuffer, int collenth, int strow, int stcol,
int enrow, int encols, char *output)
{
int i, j, index;
int cols = encols - stcol + 1;
int rows = enrow - strow + 1;
ofstream myfile(output);
myfile << "P1" << endl;
myfile << cols << " " << rows << endl;
for(i = strow; i <= enrow ; i++ ) {
for(j = stcol; j <= encols; j++ ) {
index = i * collenth + j;
if(pixelBuffer[index] != 0) {
myfile << "1" << " ";
} else {
myfile << "0" << " ";
}
}
myfile << endl;
}
myfile.close();
}
void displayBoundingBox(int staCol, int staRow, int edCol, int edRow , int *PixelBufferForWord,int mainImageCol)
{
int index;
for(int i = staRow; i < edRow ; i++ ) {
index = i * mainImageCol + staCol ;//height left
PixelBufferForWord[index] = 255;
index = i * mainImageCol + edCol ;// height right
PixelBufferForWord[index] = 255;
}
for(int j = staCol; j < edCol; j++ ) {
index = staRow * mainImageCol + j;//top
PixelBufferForWord[index] = 255;
index = edRow * mainImageCol + j;// bottom
PixelBufferForWord[index] = 255;
}
}
问:- 如何计算置信度。
置信度算法计算识别字符与可用字符的距离。 此 link 的 "linguistic analysis" 部分提供了您正在寻找的答案:- https://github.com/tesseract-ocr/docs/blob/master/tesseracticdar2007.pdf