从图像到字符串的手写识别
Handwriting recognition from image to string
我正在使用 Encog,我 运行 ocr 样本。它工作正常。但是,我想传递一个图像文件(png、jpg、...)作为参数。此图像包含要识别的文本。然后,系统应该 return 一个带有 "same" 文本的字符串。
有人做过类似的事情吗?我应该如何开始?
谢谢!
第 1 步:在 GUI 中创建文件输入并从用户获取文件
JFileChooser fc;
JButton b, b1;
JTextField tf;
FileInputStream in;
Socket s;
DataOutputStream dout;
DataInputStream din;
int i;
public void actionPerformed(ActionEvent e) {
try {
if (e.getSource() == b) {
int x = fc.showOpenDialog(null);
if (x == JFileChooser.APPROVE_OPTION) {
fileToBeSent = fc.getSelectedFile();
tf.setText(f1.getAbsolutePath());
b1.setEnabled(true);
} else {
fileToBeSent = null;
tf.setText(null;);
b1.setEnabled(false);
}
}
if (e.getSource() == b1) {
send();
}
} catch (Exception ex) {
}
}
public void copy() throws IOException {
File f1 = fc.getSelectedFile();
tf.setText(f1.getAbsolutePath());
in = new FileInputStream(f1.getAbsolutePath());
while ((i = in.read()) != -1) {
System.out.print(i);
}
}
public void send() throws IOException {
dout.write(i);
dout.flush();
}
第 2 步:向下采样
private void processNetwork() throws IOException {
System.out.println("Downsampling images...");
for (final ImagePair pair : this.imageList) {
final MLData ideal = new BasicMLData(this.outputCount);
final int idx = pair.getIdentity();
for (int i = 0; i < this.outputCount; i++) {
if (i == idx) {
ideal.setData(i, 1);
} else {
ideal.setData(i, -1);
}
}
final Image img = ImageIO.read(fc.getFile());
final ImageMLData data = new ImageMLData(img);
this.training.add(data, ideal);
}
final String strHidden1 = getArg("hidden1");
final String strHidden2 = getArg("hidden2");
this.training.downsample(this.downsampleHeight, this.downsampleWidth);
final int hidden1 = Integer.parseInt(strHidden1);
final int hidden2 = Integer.parseInt(strHidden2);
this.network = EncogUtility.simpleFeedForward(this.training
.getInputSize(), hidden1, hidden2,
this.training.getIdealSize(), true);
System.out.println("Created network: " + this.network.toString());
}
第 3 步:使用训练集开始训练
private void processTrain() throws IOException {
final String strMode = getArg("mode");
final String strMinutes = getArg("minutes");
final String strStrategyError = getArg("strategyerror");
final String strStrategyCycles = getArg("strategycycles");
System.out.println("Training Beginning... Output patterns="
+ this.outputCount);
final double strategyError = Double.parseDouble(strStrategyError);
final int strategyCycles = Integer.parseInt(strStrategyCycles);
final ResilientPropagation train = new ResilientPropagation(this.network, this.training);
train.addStrategy(new ResetStrategy(strategyError, strategyCycles));
if (strMode.equalsIgnoreCase("gui")) {
TrainingDialog.trainDialog(train, this.network, this.training);
} else {
final int minutes = Integer.parseInt(strMinutes);
EncogUtility.trainConsole(train, this.network, this.training,
minutes);
}
System.out.println("Training Stopped...");
}
第四步:将采样文件传给神经网络
public void processWhatIs() throws IOException {
final String filename = getArg("image");
final File file = new File(filename);
final Image img = ImageIO.read(file);
final ImageMLData input = new ImageMLData(img);
input.downsample(this.downsample, false, this.downsampleHeight,
this.downsampleWidth, 1, -1);
final int winner = this.network.winner(input);
System.out.println("What is: " + filename + ", it seems to be: "
+ this.neuron2identity.get(winner));
}
第 5 步:检查结果
我正在使用 Encog,我 运行 ocr 样本。它工作正常。但是,我想传递一个图像文件(png、jpg、...)作为参数。此图像包含要识别的文本。然后,系统应该 return 一个带有 "same" 文本的字符串。
有人做过类似的事情吗?我应该如何开始?
谢谢!
第 1 步:在 GUI 中创建文件输入并从用户获取文件
JFileChooser fc;
JButton b, b1;
JTextField tf;
FileInputStream in;
Socket s;
DataOutputStream dout;
DataInputStream din;
int i;
public void actionPerformed(ActionEvent e) {
try {
if (e.getSource() == b) {
int x = fc.showOpenDialog(null);
if (x == JFileChooser.APPROVE_OPTION) {
fileToBeSent = fc.getSelectedFile();
tf.setText(f1.getAbsolutePath());
b1.setEnabled(true);
} else {
fileToBeSent = null;
tf.setText(null;);
b1.setEnabled(false);
}
}
if (e.getSource() == b1) {
send();
}
} catch (Exception ex) {
}
}
public void copy() throws IOException {
File f1 = fc.getSelectedFile();
tf.setText(f1.getAbsolutePath());
in = new FileInputStream(f1.getAbsolutePath());
while ((i = in.read()) != -1) {
System.out.print(i);
}
}
public void send() throws IOException {
dout.write(i);
dout.flush();
}
第 2 步:向下采样
private void processNetwork() throws IOException {
System.out.println("Downsampling images...");
for (final ImagePair pair : this.imageList) {
final MLData ideal = new BasicMLData(this.outputCount);
final int idx = pair.getIdentity();
for (int i = 0; i < this.outputCount; i++) {
if (i == idx) {
ideal.setData(i, 1);
} else {
ideal.setData(i, -1);
}
}
final Image img = ImageIO.read(fc.getFile());
final ImageMLData data = new ImageMLData(img);
this.training.add(data, ideal);
}
final String strHidden1 = getArg("hidden1");
final String strHidden2 = getArg("hidden2");
this.training.downsample(this.downsampleHeight, this.downsampleWidth);
final int hidden1 = Integer.parseInt(strHidden1);
final int hidden2 = Integer.parseInt(strHidden2);
this.network = EncogUtility.simpleFeedForward(this.training
.getInputSize(), hidden1, hidden2,
this.training.getIdealSize(), true);
System.out.println("Created network: " + this.network.toString());
}
第 3 步:使用训练集开始训练
private void processTrain() throws IOException {
final String strMode = getArg("mode");
final String strMinutes = getArg("minutes");
final String strStrategyError = getArg("strategyerror");
final String strStrategyCycles = getArg("strategycycles");
System.out.println("Training Beginning... Output patterns="
+ this.outputCount);
final double strategyError = Double.parseDouble(strStrategyError);
final int strategyCycles = Integer.parseInt(strStrategyCycles);
final ResilientPropagation train = new ResilientPropagation(this.network, this.training);
train.addStrategy(new ResetStrategy(strategyError, strategyCycles));
if (strMode.equalsIgnoreCase("gui")) {
TrainingDialog.trainDialog(train, this.network, this.training);
} else {
final int minutes = Integer.parseInt(strMinutes);
EncogUtility.trainConsole(train, this.network, this.training,
minutes);
}
System.out.println("Training Stopped...");
}
第四步:将采样文件传给神经网络
public void processWhatIs() throws IOException {
final String filename = getArg("image");
final File file = new File(filename);
final Image img = ImageIO.read(file);
final ImageMLData input = new ImageMLData(img);
input.downsample(this.downsample, false, this.downsampleHeight,
this.downsampleWidth, 1, -1);
final int winner = this.network.winner(input);
System.out.println("What is: " + filename + ", it seems to be: "
+ this.neuron2identity.get(winner));
}
第 5 步:检查结果