张量流中 mnist csv 数据的准确性不佳
Poor accuracy at mnist csv data in tensorflow
我正在尝试使用 csv 数据为初学者进行 mnist。
我从 here 获得了 csv 数据,并使每个标签成为一个热向量。
每行有 794dims(colum1~10 作为标签,11~794 作为像素)。
这是我写的代码,导致了可怕的准确性。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tensorflow as tf
import numpy as np
FLAGS = None
def main(_):
# Import data
def csv_to_numpy_array(filepath, delimiter):
return np.genfromtxt(filepath,delimiter=delimiter, dtype=None)
def import_data():
print("loading training data")
traindata = csv_to_numpy_array("data/mnist_train_onehot.csv",delimiter=",")
[trainY, trainX] = np.hsplit(traindata,[10]);
print("loading test data")
[testY, testX] = np.hsplit(testdata,[10]);
return trainX, trainY, testX, testY
x_train, y_train, x_test, y_test = import_data()
numX = x_train.shape[1] #784
numY = y_train.shape[1] #10
# Prepare the placeholder
x = tf.placeholder(tf.float32, [None, numX]) #input box
y_ = tf.placeholder(tf.float32, [None, numY]) #output box
#define weight and biases
w = tf.Variable(tf.zeros([numX,numY]))
b = tf.Variable(tf.zeros([numY]))
#create the model
def model(X, w, b):
pyx = tf.nn.softmax(tf.matmul(X, w) + b)
return pyx
y = model(x, w, b)
#cost function
loss = -tf.reduce_sum(y_*tf.log(y))
# the loss and acc
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y),reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
init = tf.initialize_all_variables()
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(1000):
ind = np.random.choice(100,100)
x_train_batch = x_train[ind]
y_train_batch = y_train[ind]
#run optimization op (backprop) and cost op (to get loss value)
_,c = sess.run([train_step, loss], feed_dict={x: x_train_batch, y_: y_train_batch})
if i % 50 == 0:
train_acc = accuracy.eval({x: x_train_batch, y_: y_train_batch})
print('step: %d, acc: %6.3f' % (i, train_acc) )
# Test trained model
print(sess.run(accuracy, feed_dict={x: x_test,
y_: y_test}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
精度为0.098pt。
有人可以试试这段代码并告诉我这段代码有什么问题吗?
非常感谢您。
可能存在的问题:
1- 随机初始化你的变量而不是零
2-你可能对.csv文件格式有误解,你拿到.csv文件的地方,说格式是label, pix-11, pix-12, pix-13, ...
3- 尝试使用 tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits , llabels ))
您用来计算损失的方法在数值上不稳定。 更新: 在这种情况下不要使用 tf.nn.softmax
因为 tf.nn.softmax_cross_entropy_with_logits
内部有 softmax 归一化器和交叉熵。(感谢 @ml4294 评论)
这是您进行了必要更改的代码。具体来说,您可以使用 tf.nn.softmax_cross_entropy_with_logits 为您完成计算交叉熵的繁重工作。另一个改进是使用 loss = tf.reduce_mean... 而不是 loss = tf.reduce_sum...*。这将使您的训练修正成为所有错误的平均值,而不是总和。如果你使用 sum ,你会得到疯狂和不受控制的训练波动,你将不得不使用非常小的梯度下降因子来补偿。如果你发现你必须在梯度下降中使用高于 1 或低于 .1 的东西,你可能可以通过使用 reduce_mean 作为损失来解决这个问题。
这是您的代码。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tensorflow as tf
import numpy as np
FLAGS = None
def main(_):
# Import data
def csv_to_numpy_array(filepath, delimiter):
return np.genfromtxt(filepath,delimiter=delimiter, dtype=None)
def import_data():
print("loading training data")
traindata = csv_to_numpy_array("data/mnist_train_onehot.csv",delimiter=",")
[trainY, trainX] = np.hsplit(traindata,[10]);
print("loading test data")
[testY, testX] = np.hsplit(testdata,[10]);
return trainX, trainY, testX, testY
x_train, y_train, x_test, y_test = import_data()
numX = x_train.shape[1] #784
numY = y_train.shape[1] #10
# Prepare the placeholder
x = tf.placeholder(tf.float32, [None, numX]) #input box
y_ = tf.placeholder(tf.float32, [None, numY]) #output box
#define weight and biases
w = tf.Variable(tf.zeros([numX,numY]))
b = tf.Variable(tf.zeros([numY]))
y = tf.matmul(x, w) + b
# unused for this model
keep_prob = tf.placeholder(tf.float32)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
train = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
percent_correct = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(1000):
ind = np.random.choice(x_train.shape[0],100)
x_train_batch = x_train[ind]
y_train_batch = y_train[ind]
#run optimization op (backprop) and cost op (to get loss value)
_,c = sess.run([train_step, loss], feed_dict={x: x_train_batch, y_: y_train_batch})
if i % 50 == 0:
train_acc = percent_correct.eval({x: x_train_batch, y_: y_train_batch})
print('step: %d, acc: %6.3f' % (i, train_acc) )
# Test trained model
print(sess.run(percent_correct, feed_dict={x: x_test,
y_: y_test}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
这里是 link 几个不同的 mnist tensorflow 架构 https://github.com/panchishin/learn-to-tensorflow/blob/master/examples/mnist_model_comparison.py 全部在一个脚本中
我正在尝试使用 csv 数据为初学者进行 mnist。 我从 here 获得了 csv 数据,并使每个标签成为一个热向量。 每行有 794dims(colum1~10 作为标签,11~794 作为像素)。 这是我写的代码,导致了可怕的准确性。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tensorflow as tf
import numpy as np
FLAGS = None
def main(_):
# Import data
def csv_to_numpy_array(filepath, delimiter):
return np.genfromtxt(filepath,delimiter=delimiter, dtype=None)
def import_data():
print("loading training data")
traindata = csv_to_numpy_array("data/mnist_train_onehot.csv",delimiter=",")
[trainY, trainX] = np.hsplit(traindata,[10]);
print("loading test data")
[testY, testX] = np.hsplit(testdata,[10]);
return trainX, trainY, testX, testY
x_train, y_train, x_test, y_test = import_data()
numX = x_train.shape[1] #784
numY = y_train.shape[1] #10
# Prepare the placeholder
x = tf.placeholder(tf.float32, [None, numX]) #input box
y_ = tf.placeholder(tf.float32, [None, numY]) #output box
#define weight and biases
w = tf.Variable(tf.zeros([numX,numY]))
b = tf.Variable(tf.zeros([numY]))
#create the model
def model(X, w, b):
pyx = tf.nn.softmax(tf.matmul(X, w) + b)
return pyx
y = model(x, w, b)
#cost function
loss = -tf.reduce_sum(y_*tf.log(y))
# the loss and acc
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y),reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
init = tf.initialize_all_variables()
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(1000):
ind = np.random.choice(100,100)
x_train_batch = x_train[ind]
y_train_batch = y_train[ind]
#run optimization op (backprop) and cost op (to get loss value)
_,c = sess.run([train_step, loss], feed_dict={x: x_train_batch, y_: y_train_batch})
if i % 50 == 0:
train_acc = accuracy.eval({x: x_train_batch, y_: y_train_batch})
print('step: %d, acc: %6.3f' % (i, train_acc) )
# Test trained model
print(sess.run(accuracy, feed_dict={x: x_test,
y_: y_test}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
精度为0.098pt。 有人可以试试这段代码并告诉我这段代码有什么问题吗? 非常感谢您。
可能存在的问题:
1- 随机初始化你的变量而不是零
2-你可能对.csv文件格式有误解,你拿到.csv文件的地方,说格式是label, pix-11, pix-12, pix-13, ...
3- 尝试使用 tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits , llabels ))
您用来计算损失的方法在数值上不稳定。 更新: 在这种情况下不要使用 tf.nn.softmax
因为 tf.nn.softmax_cross_entropy_with_logits
内部有 softmax 归一化器和交叉熵。(感谢 @ml4294 评论)
这是您进行了必要更改的代码。具体来说,您可以使用 tf.nn.softmax_cross_entropy_with_logits 为您完成计算交叉熵的繁重工作。另一个改进是使用 loss = tf.reduce_mean... 而不是 loss = tf.reduce_sum...*。这将使您的训练修正成为所有错误的平均值,而不是总和。如果你使用 sum ,你会得到疯狂和不受控制的训练波动,你将不得不使用非常小的梯度下降因子来补偿。如果你发现你必须在梯度下降中使用高于 1 或低于 .1 的东西,你可能可以通过使用 reduce_mean 作为损失来解决这个问题。
这是您的代码。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tensorflow as tf
import numpy as np
FLAGS = None
def main(_):
# Import data
def csv_to_numpy_array(filepath, delimiter):
return np.genfromtxt(filepath,delimiter=delimiter, dtype=None)
def import_data():
print("loading training data")
traindata = csv_to_numpy_array("data/mnist_train_onehot.csv",delimiter=",")
[trainY, trainX] = np.hsplit(traindata,[10]);
print("loading test data")
[testY, testX] = np.hsplit(testdata,[10]);
return trainX, trainY, testX, testY
x_train, y_train, x_test, y_test = import_data()
numX = x_train.shape[1] #784
numY = y_train.shape[1] #10
# Prepare the placeholder
x = tf.placeholder(tf.float32, [None, numX]) #input box
y_ = tf.placeholder(tf.float32, [None, numY]) #output box
#define weight and biases
w = tf.Variable(tf.zeros([numX,numY]))
b = tf.Variable(tf.zeros([numY]))
y = tf.matmul(x, w) + b
# unused for this model
keep_prob = tf.placeholder(tf.float32)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
train = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
percent_correct = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(1000):
ind = np.random.choice(x_train.shape[0],100)
x_train_batch = x_train[ind]
y_train_batch = y_train[ind]
#run optimization op (backprop) and cost op (to get loss value)
_,c = sess.run([train_step, loss], feed_dict={x: x_train_batch, y_: y_train_batch})
if i % 50 == 0:
train_acc = percent_correct.eval({x: x_train_batch, y_: y_train_batch})
print('step: %d, acc: %6.3f' % (i, train_acc) )
# Test trained model
print(sess.run(percent_correct, feed_dict={x: x_test,
y_: y_test}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
这里是 link 几个不同的 mnist tensorflow 架构 https://github.com/panchishin/learn-to-tensorflow/blob/master/examples/mnist_model_comparison.py 全部在一个脚本中