c# 使用 charlesw tesseract 生成 hocr 文件
c# generate hocr file using charlesw tesseract
如何使用 tesseract 包装器生成 hocr here
目前我需要将 tessdata 的位置动态添加到环境变量和运行我的代码
System.Diagnostics.Process pProcess = new System.Diagnostics.Process();
pProcess.StartInfo.FileName = System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]) + @"\tesseract-3.05.00dev-win32-vc19\tesseract.exe";
string inputImg = @"00067.jpg";
string hocrLocation = @"00067";
string argsPdf = "\"" + inputImg + "\"" + " " + "\"" + hocrLocation + "\"" + " hocr ";
Console.WriteLine(argsPdf);
pProcess.StartInfo.Arguments = argsPdf;
pProcess.StartInfo.CreateNoWindow = false;
pProcess.StartInfo.UseShellExecute = false;
pProcess.StartInfo.RedirectStandardOutput = true;
pProcess.Start();
string strOutput = pProcess.StandardOutput.ReadToEnd();
Console.WriteLine("OUtput: " + strOutput);
pProcess.WaitForExit();
然后我找到了 tesseract 包装器。如何使用包装器生成 hocr 文件?我找不到如何操作的示例。
这是我正在使用的当前代码(来自示例),但是如何输出 hocr 文件?
var testImagePath = "./phototest.tif";
if (args.Length > 0)
{
testImagePath = args[0];
}
try
{
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (var page = engine.Process(img))
{
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
string hocrText = page.GetHOCRText(pageNum - 1);
如何使用 tesseract 包装器生成 hocr here
目前我需要将 tessdata 的位置动态添加到环境变量和运行我的代码
System.Diagnostics.Process pProcess = new System.Diagnostics.Process();
pProcess.StartInfo.FileName = System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]) + @"\tesseract-3.05.00dev-win32-vc19\tesseract.exe";
string inputImg = @"00067.jpg";
string hocrLocation = @"00067";
string argsPdf = "\"" + inputImg + "\"" + " " + "\"" + hocrLocation + "\"" + " hocr ";
Console.WriteLine(argsPdf);
pProcess.StartInfo.Arguments = argsPdf;
pProcess.StartInfo.CreateNoWindow = false;
pProcess.StartInfo.UseShellExecute = false;
pProcess.StartInfo.RedirectStandardOutput = true;
pProcess.Start();
string strOutput = pProcess.StandardOutput.ReadToEnd();
Console.WriteLine("OUtput: " + strOutput);
pProcess.WaitForExit();
然后我找到了 tesseract 包装器。如何使用包装器生成 hocr 文件?我找不到如何操作的示例。
这是我正在使用的当前代码(来自示例),但是如何输出 hocr 文件?
var testImagePath = "./phototest.tif";
if (args.Length > 0)
{
testImagePath = args[0];
}
try
{
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (var page = engine.Process(img))
{
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
string hocrText = page.GetHOCRText(pageNum - 1);