张量流中的权重和偏差未更新
Weights and Biases not updating in tensorflow
我制作了这个神经网络来判断房子是好买还是坏买。由于某些原因,代码没有更新权重和偏差。我的损失保持不变。这是我的代码:
我制作了这个神经网络来判断房子是好买还是坏买。由于某些原因,代码没有更新权重和偏差。我的损失保持不变。这是我的代码:
import pandas as pd
import tensorflow as tf
data = pd.read_csv("E:/workspace_py/datasets/good_bad_buy.csv")
features = data.drop(['index', 'good buy'], axis = 1)
lbls = data.drop(['index', 'area', 'bathrooms', 'price', 'sq_price'], axis = 1)
features = features[0:20]
lbls = lbls[0:20]
print(features)
print(lbls)
n_examples = len(lbls)
# Model
# Hyper parameters
epochs = 100
learning_rate = 0.1
batch_size = 1
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('float', [None, 1])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.random_normal([10])),
'hl2': tf.Variable(tf.random_normal([10])),
'hl3': tf.Variable(tf.random_normal([4])),
'ol': tf.Variable(tf.random_normal([1]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
loss = tf.reduce_mean((labels - ol)**2)
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iterations = int(n_examples/batch_size)
for epoch_no in range(epochs):
ptr = 0
for iteration_no in range(iterations):
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = ptr + batch_size
_, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls})
print("Error at epoch ", epoch_no, ": ", err)
print(sess.run(ol, feed_dict={input_data: [[2104, 3, 399900, 190.0665]]}))
这是数据集:
Features:
area bathrooms price sq_price
0 2104 3 399900 190.066540
1 1600 3 329900 206.187500
2 2400 3 369000 153.750000
3 1416 2 232000 163.841808
4 3000 4 539900 179.966667
5 1985 4 299900 151.083123
6 1534 3 314900 205.280313
7 1427 3 198999 139.452698
8 1380 3 212000 153.623188
9 1494 3 242500 162.315930
10 1940 4 239999 123.710825
11 2000 3 347000 173.500000
12 1890 3 329999 174.602645
13 4478 5 699900 156.297454
14 1268 3 259900 204.968454
15 2300 4 449900 195.608696
16 1320 2 299900 227.196970
17 1236 3 199900 161.731392
18 2609 4 499998 191.643542
19 3031 4 599000 197.624546
labels:
good buy
0 1.0
1 0.0
2 1.0
3 0.0
4 1.0
5 0.0
6 0.0
7 1.0
8 0.0
9 0.0
10 1.0
11 1.0
12 1.0
13 1.0
14 0.0
15 1.0
16 0.0
17 1.0
18 1.0
19 1.0
关于如何解决这个问题有什么建议吗?除了 tf.reduce_mean,我还尝试过 tf.reduce_sum。我也试过更大的 batch_size.
我不确定这是否是您遇到的问题。但是如果输入太大,sigmoid 函数梯度会变得非常小,这会使更新非常慢。
要检查您是否属于这种情况,请尝试将所有权重初始化为非常小的值。您可以通过为您的随机规范设置标准偏差来调整它。
tf.Variable(tf.random_normal([4, 10], stddev=0.1))
您的代码有几处不正常。
首先,你的意思是
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = ptr + batch_size
// _, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls}
_, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label}
现在它使用小批量。
调试渐变:
您随时可以通过添加
来检查一些内容
loss = tf.Print(loss, [tf.reduce_sum(weights['hl1'])])
这将打印该列表的元素[tf.reduce_sum(weights['hl1'])]
。要进一步调查您的问题,您可以检查梯度而不是使用 minimize
grads = tf.reduce_sum(tf.gradients(loss, ol)[0])
sess.run(grads, {input_data: features, labels: lbls})
最后,损失函数 inappropriate/numerical 对分类不稳定。使用您的版本,我得到:
variables
Variable:0
Variable_1:0
Variable_2:0
Variable_3:0
Variable_4:0
Variable_5:0
Variable_6:0
Variable_7:0
I tensorflow/core/kernels/logging_ops.cc:79] [-6.2784553]
-----------------------------------------
name MatMul_grad
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
value [[-0.59977376 -0.30060738 0.55068201 0.15304407 1.39992142 0.07495346
-0.87189424 -0.22595075 -0.30094525 -1.2688272 ]
[-0.44018757 1.08651936 -0.26267499 -0.54463315 0.47019768 0.69873857
0.56195319 0.20222363 0.38143152 -0.92212462]
[-0.39977714 -1.07244122 0.41926911 1.4951371 -2.28751612 0.45676312
0.88010246 -0.88077509 -1.25860023 0.56874037]
[-0.98260719 -1.30747247 -1.4460088 1.0717535 0.08794415 -0.53184992
-1.17537284 -0.51598179 -0.15323587 0.91142744]]
-----------------------------------------
name MatMul_1_grad
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
value [[-0.1170694 0.12174897 0.91696155 0.59427398 0.90844423 0.29010534
-0.34039831 -0.62824941 0.37833953 0.27777222]
[-0.34947088 1.09264851 0.27353975 1.31722498 -0.42032316 -2.74952078
-0.66349608 -0.61844724 -0.82141227 1.21691799]
[ 0.10453336 -1.68631995 0.45700032 -1.58120835 -1.23378754 -0.05648948
-1.64761281 -0.57684237 -0.06499017 -0.49623618]
[ 1.47821534 -0.5329541 0.09209292 1.78089786 1.71149898 0.30547267
0.39544162 1.00369155 1.0097307 -0.92320329]
[ 1.27038908 -2.17246103 -0.31276336 0.8945803 0.30964327 1.15329361
0.9711507 -0.36301252 -0.05652813 0.63399518]
[-0.30909851 -0.41660413 -0.50603527 0.11735299 -0.26837045 0.16547598
-0.33875859 -0.46821991 0.25723135 -0.80380815]
[-0.86255074 -1.11751068 0.01365725 0.66119182 0.48947951 1.6353699
-0.794447 0.43182942 -0.97692633 -1.62605619]
[ 1.38552308 0.83679706 -0.87287223 2.59401655 -0.61855 0.38301265
1.09983373 0.49209142 1.03003716 -1.33537853]
[ 0.74452382 1.57940936 -0.90974236 -1.2211293 -1.1076287 0.92846316
-0.46856263 -0.3179535 0.75120807 -0.86442506]
[ 0.31622764 -0.35965034 -0.02351121 -0.0650174 0.4714573 0.35687482
1.43354905 0.39608309 0.42744714 -0.37226421]]
-----------------------------------------
name MatMul_2_grad
gradient [[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
value [[-1.50904143 0.00228321 1.45787132 0.68312413]
[-0.16627057 1.31303644 1.16326404 0.72901946]
[ 0.8004092 0.37329885 0.89361066 -0.19850619]
[ 1.58354807 -1.05612624 0.69891322 -0.32565734]
[-1.57602286 -0.41256282 0.69086516 -0.54095054]
[ 1.72376788 -0.53928965 -0.71574098 -0.94974124]
[-0.62061429 1.51380932 -0.72585452 -0.07695383]
[ 0.35537818 1.49691582 0.03931179 0.93435526]
[ 0.20697887 1.39266443 0.73217523 -0.64737892]
[ 1.00519872 0.90984046 1.68565321 -0.28157935]]
-----------------------------------------
name MatMul_3_grad
gradient [[ 0.]
[ 0.]
[ 0.]
[ 0.]]
value [[ 0.94082022]
[ 0.14753926]
[-0.08765228]
[ 1.32516992]]
-----------------------------------------
name Add_grad
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
value [ 1.71239722 1.12632215 0.75409448 0.01951236 0.32135537 -1.46281374
0.40413955 0.54653352 -0.57894999 0.2746354 ]
-----------------------------------------
name Add_1_grad
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
value [ 0.74800217 -0.43517059 -0.77706921 1.46858656 1.09103405 -0.46681881
0.6126743 -2.27877688 1.48809242 -1.19616997]
-----------------------------------------
name Add_2_grad
gradient [ 0. 0. 0. 0.]
value [-0.12137324 -0.23238407 0.17909229 -0.75496733]
-----------------------------------------
name Add_3_grad
gradient [ 0.]
value [-0.91176724]
如你所见,几乎所有的梯度都是零。为什么?
- 根据定义
(labels - ol)
在 [0, 1]
- 平方值比1小很多
- sigmoid 的导数
s(x)
是 s'(x) = s(x)*(1-s(x))
梯度乘以这个值,它又比 1 小得多。
但是在使用 sparse_softmax_cross_entropy_with_logits
之后,它在数值上是稳定的并且在对数域中运行,我得到
variables
Variable:0
Variable_1:0
Variable_2:0
Variable_3:0
Variable_4:0
Variable_5:0
Variable_6:0
Variable_7:0
-----------------------------------------
name MatMul_grad
gradient [[ -1.42780918e-05 -1.96137808e-05 -2.44040220e-05 -2.25691911e-05
0.00000000e+00 2.95208647e-05 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -2.54181440e-08 -3.49168410e-08 -4.34445262e-08 -4.01781257e-08
0.00000000e+00 5.25536308e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -2.45539122e-03 -3.37296468e-03 -4.19673882e-03 -3.88120394e-03
0.00000000e+00 5.07667707e-03 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -1.42123906e-06 -1.95235293e-06 -2.42917258e-06 -2.24653377e-06
0.00000000e+00 2.93850212e-06 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
value [[ 0.43133125 -0.40009859 -0.08456381 0.59587955 0.57171088 -0.9824872
1.18876612 0.9704771 0.74798232 0.15660612]
[-1.18380785 0.22617982 -1.15734088 -0.50478351 1.43819618 1.55950046
-1.1510663 -0.88835335 0.58378232 0.56860197]
[ 0.29826403 0.02192715 0.62225986 2.47716165 -0.9223454 1.70159853
-1.03968358 -0.26019615 -0.33808291 -0.30873826]
[ 0.59774327 -1.28855145 -0.43420359 -0.4413566 -0.19220066 0.96984953
-0.04922202 0.32994318 -1.05539823 -0.80112725]]
-----------------------------------------
name MatMul_1_grad
gradient [[ 0.00000000e+00 1.15650124e-03 0.00000000e+00 0.00000000e+00
6.59449317e-04 -1.09400018e-03 0.00000000e+00 -4.02117817e-04
5.44495881e-04 -8.90314346e-04]
[ 0.00000000e+00 7.24206184e-05 0.00000000e+00 0.00000000e+00
4.12950030e-05 -6.85067716e-05 0.00000000e+00 -2.51807924e-05
3.40965707e-05 -5.57518724e-05]
[ 0.00000000e+00 2.38713808e-03 0.00000000e+00 0.00000000e+00
1.36117137e-03 -2.25812919e-03 0.00000000e+00 -8.30012548e-04
1.12389564e-03 -1.83770037e-03]
[ 0.00000000e+00 9.52679198e-03 0.00000000e+00 0.00000000e+00
5.43227792e-03 -9.01193265e-03 0.00000000e+00 -3.31248436e-03
4.48533799e-03 -7.33405072e-03]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 6.51591457e-03 0.00000000e+00 0.00000000e+00
3.71544389e-03 -6.16377220e-03 0.00000000e+00 -2.26559630e-03
3.06777749e-03 -5.01617463e-03]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
value [[ 0.38902158 -2.14370036 -1.02228141 -0.6492967 1.87193418 -0.06453216
1.0013988 -1.26857054 0.59826601 0.45045251]
[ 0.51465249 -1.09108925 -0.21368918 -0.49310678 -0.87893176 -0.07944249
-0.15810326 1.65703297 1.01812947 -0.95572269]
[-1.76351583 -1.46950841 1.43533802 2.15617752 1.30682683 0.77409673
-1.50309181 0.81978178 0.6672287 -0.434971 ]
[-0.7291944 2.16516733 -1.39850736 -1.06059277 0.40035763 1.23335707
-0.03707252 1.88107574 0.09459961 2.11439633]
[-1.39152992 -1.39924514 -0.35704514 -0.71152836 -2.68857026 0.78129828
-1.0077033 -1.26149333 0.4403404 -0.10159389]
[ 0.37354535 0.12654085 0.7632165 -0.76493222 0.68177891 -0.34254205
-1.11582613 2.60665917 1.53196526 -0.867055 ]
[ 0.62746197 -0.01072595 3.26629376 1.28371656 -0.88725293 3.55530715
0.67065352 -0.61927503 1.20604384 -0.87207574]
[-0.68954837 1.89912283 0.90083456 0.02054735 -0.23425011 0.39949065
-0.08969283 -0.75943565 1.0924015 0.28920195]
[-0.64865923 -1.29299021 -0.39945969 0.02289505 1.46024895 0.94282049
-0.99704605 -1.36124468 0.76788425 0.86770487]
[ 0.63794595 1.68530416 -0.15548207 -0.22658408 -0.45446202 -0.77308726
-0.12694608 1.17369819 2.25879693 0.20346723]]
-----------------------------------------
name MatMul_2_grad
gradient [[ 0. 0. 0. 0. ]
[-0.02205572 0. 0.00960038 0. ]
[ 0. 0. 0. 0. ]
[ 0. 0. 0. 0. ]
[-0.01932034 0. 0.00840973 0. ]
[-0.01617817 0. 0.00704201 0. ]
[ 0. 0. 0. 0. ]
[-0.05091252 0. 0.02216113 0. ]
[-0.0189826 0. 0.00826272 0. ]
[-0.01993647 0. 0.00867792 0. ]]
value [[-0.18724969 -0.0544498 -0.69153035 0.47535184]
[-0.75444973 -1.33321464 -0.13066645 1.56889391]
[-0.6458627 1.17859495 -0.75926393 0.30138403]
[ 1.0069555 -0.69344127 0.49295315 0.54917085]
[-0.55954564 -1.13277721 -0.37167427 -0.64837182]
[ 0.93753678 1.12197697 0.63789612 0.52438796]
[ 0.77543265 -1.241382 1.78230286 -0.6928125 ]
[ 0.95383584 -2.00331807 1.63409865 -0.36474878]
[-0.73891008 2.066082 -0.94303596 -0.42322466]
[ 0.38519588 0.03278512 -0.3487882 -1.50447905]]
-----------------------------------------
name MatMul_3_grad
gradient [[ 0.08460998]
[ 0. ]
[ 0.16564058]
[ 0. ]]
value [[-0.35376808]
[-0.07330427]
[ 0.15398768]
[-0.06484076]]
-----------------------------------------
name Add_grad
gradient [ -8.22783885e-09 -1.13025616e-08 -1.40629695e-08 -1.30056375e-08
0.00000000e+00 1.70115797e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
value [-1.00038147 -0.56519473 0.59372097 -1.1646167 -0.16213787 -0.69313556
0.62788707 1.03768504 0.57876503 -0.5201084 ]
-----------------------------------------
name Add_1_grad
gradient [ 0.00000000e+00 1.28705375e-08 0.00000000e+00 0.00000000e+00
7.33891703e-09 -1.21749730e-08 0.00000000e+00 -4.47511184e-09
6.05961770e-09 -9.90818183e-09]
value [ 0.02854451 -1.46039021 -0.03916361 0.40116394 0.16030532 0.88267213
-0.46328214 0.18927227 -1.7536788 -0.46590349]
-----------------------------------------
name Add_2_grad
gradient [ -1.84504412e-08 0.00000000e+00 8.03108247e-09 0.00000000e+00]
value [ 0.94534302 -0.9080081 -1.86719894 -1.31547296]
-----------------------------------------
name Add_3_grad
gradient [ 0.29727879 -0.29727876]
value [ 0.07999782 -0.75647992]
这次梯度(虽然很小)是非零的。
复现代码为
import numpy as np
import tensorflow as tf
features = [
[2104, 3, 399900, 190.066540],
[1600, 3, 329900, 206.187500],
[2400, 3, 369000, 153.750000],
[1416, 2, 232000, 163.841808],
[3000, 4, 539900, 179.966667],
[1985, 4, 299900, 151.083123],
[1534, 3, 314900, 205.280313],
[1427, 3, 198999, 139.452698],
[1380, 3, 212000, 153.623188],
[1494, 3, 242500, 162.315930],
[1940, 4, 239999, 123.710825],
[2000, 3, 347000, 173.500000],
[1890, 3, 329999, 174.602645],
[4478, 5, 699900, 156.297454],
[1268, 3, 259900, 204.968454],
[2300, 4, 449900, 195.608696],
[1320, 2, 299900, 227.196970],
[1236, 3, 199900, 161.731392],
[2609, 4, 499998, 191.643542],
[3031, 4, 599000, 197.624546]]
lbls = [1,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1]
features = np.array(features, dtype=np.float32)
lbls = np.array(lbls, dtype=np.int32)
n_examples = len(lbls)
epochs = 100
learning_rate = 0.1
batch_size = 1
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('int32', [None])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.random_normal([10])),
'hl2': tf.Variable(tf.random_normal([10])),
'hl3': tf.Variable(tf.random_normal([4])),
# 'ol': tf.Variable(tf.random_normal([1])),
'ol': tf.Variable(tf.random_normal([2]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
# ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
logits = tf.add(tf.matmul(hl3, weights['ol']), biases['ol'])
# ol = tf.Print(ol, [tf.reduce_sum(weights['hl1'])])
# loss = tf.reduce_mean((labels - ol)**2)
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# loss = tf.reduce_mean((labels - ol)**2)
loss = tf.reduce_mean(cost)
optimizer = tf.train.AdamOptimizer(learning_rate)
iterations = int(n_examples/batch_size)
def debug_minimize(optimizer, loss, sess):
from tensorflow.python.ops import variables
from tensorflow.python.framework import ops
# get all varibles
var_list = (variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
print 'variables'
for v in var_list:
print ' ', v.name
# get all gradients
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars)
zipped_val = sess.run(grads_and_vars, {input_data: features, labels: lbls})
for rsl, tensor in zip(zipped_val, grads_and_vars):
print '-----------------------------------------'
print 'name', tensor[0].name.replace('/tuple/control_dependency_1:0', '').replace('gradients/', '')
print 'gradient', rsl[0]
print 'value', rsl[1]
return train_op
sess = tf.Session()
sess.run(tf.global_variables_initializer())
debug_minimize(optimizer, loss, sess)
需要考虑的几件事
- 小批量未被正确评估,因为您输入的是特征和磅数而不是 epoch_input 和 epoch_label。
- 您没有以任何方式预处理您的数据,因此它完全超出了范围。 IE。我下面的代码将特征规范化为 stddev 和 mean。您可以考虑使用 batch_normalization.
- 您在任何时候都没有评估错误。您需要一个持续的训练和测试集。我下面的代码不支持数据,但它确实根据错误百分比进行测试,而不仅仅是损失(这是错误的弱代理,所以你不应该称它为错误)。
- 您将偏差初始化为随机法线。您可能只想从零开始。
- 您可能应该使用 tf.layers 或其他高级别 api。
下面的代码实现了 95% 的训练误差。您希望使用未用于训练的保留数据集进行测试以评估测试误差。
#!/usr/bin/env python
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
data = pd.read_csv("data.csv")
features = data.drop(['good buy'], axis = 1)
lbls = data.drop([ 'area', 'bathrooms', 'price', 'sq_price'], axis = 1)
features = features[0:20]
lbls = lbls[0:20]
mu = np.mean(features, axis=0)
sigma = (np.std(features, axis=0))
features = (features - mu) / sigma
n_examples = len(lbls)
# Model
# Hyper parameters
epochs = 100
learning_rate = 0.01
batch_size = 5
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('float', [None, 1])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.zeros([10])),
'hl2': tf.Variable(tf.zeros([10])),
'hl3': tf.Variable(tf.zeros([4])),
'ol': tf.Variable(tf.zeros([1]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
loss = tf.reduce_mean((labels - ol)**2)
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iterations = int(n_examples/batch_size)
def training_accuracy():
foo, = sess.run([ol], feed_dict={input_data: features, labels: lbls})
return (float(np.count_nonzero(np.equal(np.round(foo), lbls))) / float(lbls.shape[0]))
print("Initial training accuracy %f" % training_accuracy())
for epoch_no in range(epochs):
ptr = 0
for iteration_no in range(iterations):
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = (ptr + batch_size)%len(features)
_, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label})
print("Error at epoch ", epoch_no, ": ", err)
print(" Training accuracy %f" % training_accuracy())
此外,请不要 post 在 github 上提出这样的使用问题,它们属于 Whosebug。
我制作了这个神经网络来判断房子是好买还是坏买。由于某些原因,代码没有更新权重和偏差。我的损失保持不变。这是我的代码:
我制作了这个神经网络来判断房子是好买还是坏买。由于某些原因,代码没有更新权重和偏差。我的损失保持不变。这是我的代码:
import pandas as pd
import tensorflow as tf
data = pd.read_csv("E:/workspace_py/datasets/good_bad_buy.csv")
features = data.drop(['index', 'good buy'], axis = 1)
lbls = data.drop(['index', 'area', 'bathrooms', 'price', 'sq_price'], axis = 1)
features = features[0:20]
lbls = lbls[0:20]
print(features)
print(lbls)
n_examples = len(lbls)
# Model
# Hyper parameters
epochs = 100
learning_rate = 0.1
batch_size = 1
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('float', [None, 1])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.random_normal([10])),
'hl2': tf.Variable(tf.random_normal([10])),
'hl3': tf.Variable(tf.random_normal([4])),
'ol': tf.Variable(tf.random_normal([1]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
loss = tf.reduce_mean((labels - ol)**2)
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iterations = int(n_examples/batch_size)
for epoch_no in range(epochs):
ptr = 0
for iteration_no in range(iterations):
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = ptr + batch_size
_, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls})
print("Error at epoch ", epoch_no, ": ", err)
print(sess.run(ol, feed_dict={input_data: [[2104, 3, 399900, 190.0665]]}))
这是数据集:
Features:
area bathrooms price sq_price
0 2104 3 399900 190.066540
1 1600 3 329900 206.187500
2 2400 3 369000 153.750000
3 1416 2 232000 163.841808
4 3000 4 539900 179.966667
5 1985 4 299900 151.083123
6 1534 3 314900 205.280313
7 1427 3 198999 139.452698
8 1380 3 212000 153.623188
9 1494 3 242500 162.315930
10 1940 4 239999 123.710825
11 2000 3 347000 173.500000
12 1890 3 329999 174.602645
13 4478 5 699900 156.297454
14 1268 3 259900 204.968454
15 2300 4 449900 195.608696
16 1320 2 299900 227.196970
17 1236 3 199900 161.731392
18 2609 4 499998 191.643542
19 3031 4 599000 197.624546
labels:
good buy
0 1.0
1 0.0
2 1.0
3 0.0
4 1.0
5 0.0
6 0.0
7 1.0
8 0.0
9 0.0
10 1.0
11 1.0
12 1.0
13 1.0
14 0.0
15 1.0
16 0.0
17 1.0
18 1.0
19 1.0
关于如何解决这个问题有什么建议吗?除了 tf.reduce_mean,我还尝试过 tf.reduce_sum。我也试过更大的 batch_size.
我不确定这是否是您遇到的问题。但是如果输入太大,sigmoid 函数梯度会变得非常小,这会使更新非常慢。
要检查您是否属于这种情况,请尝试将所有权重初始化为非常小的值。您可以通过为您的随机规范设置标准偏差来调整它。
tf.Variable(tf.random_normal([4, 10], stddev=0.1))
您的代码有几处不正常。 首先,你的意思是
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = ptr + batch_size
// _, err = sess.run([train, loss], feed_dict={input_data: features, labels: lbls}
_, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label}
现在它使用小批量。
调试渐变:
您随时可以通过添加
来检查一些内容loss = tf.Print(loss, [tf.reduce_sum(weights['hl1'])])
这将打印该列表的元素[tf.reduce_sum(weights['hl1'])]
。要进一步调查您的问题,您可以检查梯度而不是使用 minimize
grads = tf.reduce_sum(tf.gradients(loss, ol)[0])
sess.run(grads, {input_data: features, labels: lbls})
最后,损失函数 inappropriate/numerical 对分类不稳定。使用您的版本,我得到:
variables
Variable:0
Variable_1:0
Variable_2:0
Variable_3:0
Variable_4:0
Variable_5:0
Variable_6:0
Variable_7:0
I tensorflow/core/kernels/logging_ops.cc:79] [-6.2784553]
-----------------------------------------
name MatMul_grad
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
value [[-0.59977376 -0.30060738 0.55068201 0.15304407 1.39992142 0.07495346
-0.87189424 -0.22595075 -0.30094525 -1.2688272 ]
[-0.44018757 1.08651936 -0.26267499 -0.54463315 0.47019768 0.69873857
0.56195319 0.20222363 0.38143152 -0.92212462]
[-0.39977714 -1.07244122 0.41926911 1.4951371 -2.28751612 0.45676312
0.88010246 -0.88077509 -1.25860023 0.56874037]
[-0.98260719 -1.30747247 -1.4460088 1.0717535 0.08794415 -0.53184992
-1.17537284 -0.51598179 -0.15323587 0.91142744]]
-----------------------------------------
name MatMul_1_grad
gradient [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
value [[-0.1170694 0.12174897 0.91696155 0.59427398 0.90844423 0.29010534
-0.34039831 -0.62824941 0.37833953 0.27777222]
[-0.34947088 1.09264851 0.27353975 1.31722498 -0.42032316 -2.74952078
-0.66349608 -0.61844724 -0.82141227 1.21691799]
[ 0.10453336 -1.68631995 0.45700032 -1.58120835 -1.23378754 -0.05648948
-1.64761281 -0.57684237 -0.06499017 -0.49623618]
[ 1.47821534 -0.5329541 0.09209292 1.78089786 1.71149898 0.30547267
0.39544162 1.00369155 1.0097307 -0.92320329]
[ 1.27038908 -2.17246103 -0.31276336 0.8945803 0.30964327 1.15329361
0.9711507 -0.36301252 -0.05652813 0.63399518]
[-0.30909851 -0.41660413 -0.50603527 0.11735299 -0.26837045 0.16547598
-0.33875859 -0.46821991 0.25723135 -0.80380815]
[-0.86255074 -1.11751068 0.01365725 0.66119182 0.48947951 1.6353699
-0.794447 0.43182942 -0.97692633 -1.62605619]
[ 1.38552308 0.83679706 -0.87287223 2.59401655 -0.61855 0.38301265
1.09983373 0.49209142 1.03003716 -1.33537853]
[ 0.74452382 1.57940936 -0.90974236 -1.2211293 -1.1076287 0.92846316
-0.46856263 -0.3179535 0.75120807 -0.86442506]
[ 0.31622764 -0.35965034 -0.02351121 -0.0650174 0.4714573 0.35687482
1.43354905 0.39608309 0.42744714 -0.37226421]]
-----------------------------------------
name MatMul_2_grad
gradient [[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
value [[-1.50904143 0.00228321 1.45787132 0.68312413]
[-0.16627057 1.31303644 1.16326404 0.72901946]
[ 0.8004092 0.37329885 0.89361066 -0.19850619]
[ 1.58354807 -1.05612624 0.69891322 -0.32565734]
[-1.57602286 -0.41256282 0.69086516 -0.54095054]
[ 1.72376788 -0.53928965 -0.71574098 -0.94974124]
[-0.62061429 1.51380932 -0.72585452 -0.07695383]
[ 0.35537818 1.49691582 0.03931179 0.93435526]
[ 0.20697887 1.39266443 0.73217523 -0.64737892]
[ 1.00519872 0.90984046 1.68565321 -0.28157935]]
-----------------------------------------
name MatMul_3_grad
gradient [[ 0.]
[ 0.]
[ 0.]
[ 0.]]
value [[ 0.94082022]
[ 0.14753926]
[-0.08765228]
[ 1.32516992]]
-----------------------------------------
name Add_grad
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
value [ 1.71239722 1.12632215 0.75409448 0.01951236 0.32135537 -1.46281374
0.40413955 0.54653352 -0.57894999 0.2746354 ]
-----------------------------------------
name Add_1_grad
gradient [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
value [ 0.74800217 -0.43517059 -0.77706921 1.46858656 1.09103405 -0.46681881
0.6126743 -2.27877688 1.48809242 -1.19616997]
-----------------------------------------
name Add_2_grad
gradient [ 0. 0. 0. 0.]
value [-0.12137324 -0.23238407 0.17909229 -0.75496733]
-----------------------------------------
name Add_3_grad
gradient [ 0.]
value [-0.91176724]
如你所见,几乎所有的梯度都是零。为什么?
- 根据定义
(labels - ol)
在 [0, 1] - 平方值比1小很多
- sigmoid 的导数
s(x)
是s'(x) = s(x)*(1-s(x))
梯度乘以这个值,它又比 1 小得多。
但是在使用 sparse_softmax_cross_entropy_with_logits
之后,它在数值上是稳定的并且在对数域中运行,我得到
variables
Variable:0
Variable_1:0
Variable_2:0
Variable_3:0
Variable_4:0
Variable_5:0
Variable_6:0
Variable_7:0
-----------------------------------------
name MatMul_grad
gradient [[ -1.42780918e-05 -1.96137808e-05 -2.44040220e-05 -2.25691911e-05
0.00000000e+00 2.95208647e-05 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -2.54181440e-08 -3.49168410e-08 -4.34445262e-08 -4.01781257e-08
0.00000000e+00 5.25536308e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -2.45539122e-03 -3.37296468e-03 -4.19673882e-03 -3.88120394e-03
0.00000000e+00 5.07667707e-03 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ -1.42123906e-06 -1.95235293e-06 -2.42917258e-06 -2.24653377e-06
0.00000000e+00 2.93850212e-06 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
value [[ 0.43133125 -0.40009859 -0.08456381 0.59587955 0.57171088 -0.9824872
1.18876612 0.9704771 0.74798232 0.15660612]
[-1.18380785 0.22617982 -1.15734088 -0.50478351 1.43819618 1.55950046
-1.1510663 -0.88835335 0.58378232 0.56860197]
[ 0.29826403 0.02192715 0.62225986 2.47716165 -0.9223454 1.70159853
-1.03968358 -0.26019615 -0.33808291 -0.30873826]
[ 0.59774327 -1.28855145 -0.43420359 -0.4413566 -0.19220066 0.96984953
-0.04922202 0.32994318 -1.05539823 -0.80112725]]
-----------------------------------------
name MatMul_1_grad
gradient [[ 0.00000000e+00 1.15650124e-03 0.00000000e+00 0.00000000e+00
6.59449317e-04 -1.09400018e-03 0.00000000e+00 -4.02117817e-04
5.44495881e-04 -8.90314346e-04]
[ 0.00000000e+00 7.24206184e-05 0.00000000e+00 0.00000000e+00
4.12950030e-05 -6.85067716e-05 0.00000000e+00 -2.51807924e-05
3.40965707e-05 -5.57518724e-05]
[ 0.00000000e+00 2.38713808e-03 0.00000000e+00 0.00000000e+00
1.36117137e-03 -2.25812919e-03 0.00000000e+00 -8.30012548e-04
1.12389564e-03 -1.83770037e-03]
[ 0.00000000e+00 9.52679198e-03 0.00000000e+00 0.00000000e+00
5.43227792e-03 -9.01193265e-03 0.00000000e+00 -3.31248436e-03
4.48533799e-03 -7.33405072e-03]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 6.51591457e-03 0.00000000e+00 0.00000000e+00
3.71544389e-03 -6.16377220e-03 0.00000000e+00 -2.26559630e-03
3.06777749e-03 -5.01617463e-03]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]]
value [[ 0.38902158 -2.14370036 -1.02228141 -0.6492967 1.87193418 -0.06453216
1.0013988 -1.26857054 0.59826601 0.45045251]
[ 0.51465249 -1.09108925 -0.21368918 -0.49310678 -0.87893176 -0.07944249
-0.15810326 1.65703297 1.01812947 -0.95572269]
[-1.76351583 -1.46950841 1.43533802 2.15617752 1.30682683 0.77409673
-1.50309181 0.81978178 0.6672287 -0.434971 ]
[-0.7291944 2.16516733 -1.39850736 -1.06059277 0.40035763 1.23335707
-0.03707252 1.88107574 0.09459961 2.11439633]
[-1.39152992 -1.39924514 -0.35704514 -0.71152836 -2.68857026 0.78129828
-1.0077033 -1.26149333 0.4403404 -0.10159389]
[ 0.37354535 0.12654085 0.7632165 -0.76493222 0.68177891 -0.34254205
-1.11582613 2.60665917 1.53196526 -0.867055 ]
[ 0.62746197 -0.01072595 3.26629376 1.28371656 -0.88725293 3.55530715
0.67065352 -0.61927503 1.20604384 -0.87207574]
[-0.68954837 1.89912283 0.90083456 0.02054735 -0.23425011 0.39949065
-0.08969283 -0.75943565 1.0924015 0.28920195]
[-0.64865923 -1.29299021 -0.39945969 0.02289505 1.46024895 0.94282049
-0.99704605 -1.36124468 0.76788425 0.86770487]
[ 0.63794595 1.68530416 -0.15548207 -0.22658408 -0.45446202 -0.77308726
-0.12694608 1.17369819 2.25879693 0.20346723]]
-----------------------------------------
name MatMul_2_grad
gradient [[ 0. 0. 0. 0. ]
[-0.02205572 0. 0.00960038 0. ]
[ 0. 0. 0. 0. ]
[ 0. 0. 0. 0. ]
[-0.01932034 0. 0.00840973 0. ]
[-0.01617817 0. 0.00704201 0. ]
[ 0. 0. 0. 0. ]
[-0.05091252 0. 0.02216113 0. ]
[-0.0189826 0. 0.00826272 0. ]
[-0.01993647 0. 0.00867792 0. ]]
value [[-0.18724969 -0.0544498 -0.69153035 0.47535184]
[-0.75444973 -1.33321464 -0.13066645 1.56889391]
[-0.6458627 1.17859495 -0.75926393 0.30138403]
[ 1.0069555 -0.69344127 0.49295315 0.54917085]
[-0.55954564 -1.13277721 -0.37167427 -0.64837182]
[ 0.93753678 1.12197697 0.63789612 0.52438796]
[ 0.77543265 -1.241382 1.78230286 -0.6928125 ]
[ 0.95383584 -2.00331807 1.63409865 -0.36474878]
[-0.73891008 2.066082 -0.94303596 -0.42322466]
[ 0.38519588 0.03278512 -0.3487882 -1.50447905]]
-----------------------------------------
name MatMul_3_grad
gradient [[ 0.08460998]
[ 0. ]
[ 0.16564058]
[ 0. ]]
value [[-0.35376808]
[-0.07330427]
[ 0.15398768]
[-0.06484076]]
-----------------------------------------
name Add_grad
gradient [ -8.22783885e-09 -1.13025616e-08 -1.40629695e-08 -1.30056375e-08
0.00000000e+00 1.70115797e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00]
value [-1.00038147 -0.56519473 0.59372097 -1.1646167 -0.16213787 -0.69313556
0.62788707 1.03768504 0.57876503 -0.5201084 ]
-----------------------------------------
name Add_1_grad
gradient [ 0.00000000e+00 1.28705375e-08 0.00000000e+00 0.00000000e+00
7.33891703e-09 -1.21749730e-08 0.00000000e+00 -4.47511184e-09
6.05961770e-09 -9.90818183e-09]
value [ 0.02854451 -1.46039021 -0.03916361 0.40116394 0.16030532 0.88267213
-0.46328214 0.18927227 -1.7536788 -0.46590349]
-----------------------------------------
name Add_2_grad
gradient [ -1.84504412e-08 0.00000000e+00 8.03108247e-09 0.00000000e+00]
value [ 0.94534302 -0.9080081 -1.86719894 -1.31547296]
-----------------------------------------
name Add_3_grad
gradient [ 0.29727879 -0.29727876]
value [ 0.07999782 -0.75647992]
这次梯度(虽然很小)是非零的。 复现代码为
import numpy as np
import tensorflow as tf
features = [
[2104, 3, 399900, 190.066540],
[1600, 3, 329900, 206.187500],
[2400, 3, 369000, 153.750000],
[1416, 2, 232000, 163.841808],
[3000, 4, 539900, 179.966667],
[1985, 4, 299900, 151.083123],
[1534, 3, 314900, 205.280313],
[1427, 3, 198999, 139.452698],
[1380, 3, 212000, 153.623188],
[1494, 3, 242500, 162.315930],
[1940, 4, 239999, 123.710825],
[2000, 3, 347000, 173.500000],
[1890, 3, 329999, 174.602645],
[4478, 5, 699900, 156.297454],
[1268, 3, 259900, 204.968454],
[2300, 4, 449900, 195.608696],
[1320, 2, 299900, 227.196970],
[1236, 3, 199900, 161.731392],
[2609, 4, 499998, 191.643542],
[3031, 4, 599000, 197.624546]]
lbls = [1,0,1,0,1,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1]
features = np.array(features, dtype=np.float32)
lbls = np.array(lbls, dtype=np.int32)
n_examples = len(lbls)
epochs = 100
learning_rate = 0.1
batch_size = 1
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('int32', [None])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.random_normal([10])),
'hl2': tf.Variable(tf.random_normal([10])),
'hl3': tf.Variable(tf.random_normal([4])),
# 'ol': tf.Variable(tf.random_normal([1])),
'ol': tf.Variable(tf.random_normal([2]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
# ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
logits = tf.add(tf.matmul(hl3, weights['ol']), biases['ol'])
# ol = tf.Print(ol, [tf.reduce_sum(weights['hl1'])])
# loss = tf.reduce_mean((labels - ol)**2)
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# loss = tf.reduce_mean((labels - ol)**2)
loss = tf.reduce_mean(cost)
optimizer = tf.train.AdamOptimizer(learning_rate)
iterations = int(n_examples/batch_size)
def debug_minimize(optimizer, loss, sess):
from tensorflow.python.ops import variables
from tensorflow.python.framework import ops
# get all varibles
var_list = (variables.trainable_variables() + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
print 'variables'
for v in var_list:
print ' ', v.name
# get all gradients
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars)
zipped_val = sess.run(grads_and_vars, {input_data: features, labels: lbls})
for rsl, tensor in zip(zipped_val, grads_and_vars):
print '-----------------------------------------'
print 'name', tensor[0].name.replace('/tuple/control_dependency_1:0', '').replace('gradients/', '')
print 'gradient', rsl[0]
print 'value', rsl[1]
return train_op
sess = tf.Session()
sess.run(tf.global_variables_initializer())
debug_minimize(optimizer, loss, sess)
需要考虑的几件事
- 小批量未被正确评估,因为您输入的是特征和磅数而不是 epoch_input 和 epoch_label。
- 您没有以任何方式预处理您的数据,因此它完全超出了范围。 IE。我下面的代码将特征规范化为 stddev 和 mean。您可以考虑使用 batch_normalization.
- 您在任何时候都没有评估错误。您需要一个持续的训练和测试集。我下面的代码不支持数据,但它确实根据错误百分比进行测试,而不仅仅是损失(这是错误的弱代理,所以你不应该称它为错误)。
- 您将偏差初始化为随机法线。您可能只想从零开始。
- 您可能应该使用 tf.layers 或其他高级别 api。
下面的代码实现了 95% 的训练误差。您希望使用未用于训练的保留数据集进行测试以评估测试误差。
#!/usr/bin/env python
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
data = pd.read_csv("data.csv")
features = data.drop(['good buy'], axis = 1)
lbls = data.drop([ 'area', 'bathrooms', 'price', 'sq_price'], axis = 1)
features = features[0:20]
lbls = lbls[0:20]
mu = np.mean(features, axis=0)
sigma = (np.std(features, axis=0))
features = (features - mu) / sigma
n_examples = len(lbls)
# Model
# Hyper parameters
epochs = 100
learning_rate = 0.01
batch_size = 5
input_data = tf.placeholder('float', [None, 4])
labels = tf.placeholder('float', [None, 1])
weights = {
'hl1': tf.Variable(tf.random_normal([4, 10])),
'hl2': tf.Variable(tf.random_normal([10, 10])),
'hl3': tf.Variable(tf.random_normal([10, 4])),
'ol': tf.Variable(tf.random_normal([4, 1]))
}
biases = {
'hl1': tf.Variable(tf.zeros([10])),
'hl2': tf.Variable(tf.zeros([10])),
'hl3': tf.Variable(tf.zeros([4])),
'ol': tf.Variable(tf.zeros([1]))
}
hl1 = tf.nn.relu(tf.add(tf.matmul(input_data, weights['hl1']), biases['hl1']))
hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights['hl2']), biases['hl2']))
hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights['hl3']), biases['hl3']))
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl3, weights['ol']), biases['ol']))
loss = tf.reduce_mean((labels - ol)**2)
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iterations = int(n_examples/batch_size)
def training_accuracy():
foo, = sess.run([ol], feed_dict={input_data: features, labels: lbls})
return (float(np.count_nonzero(np.equal(np.round(foo), lbls))) / float(lbls.shape[0]))
print("Initial training accuracy %f" % training_accuracy())
for epoch_no in range(epochs):
ptr = 0
for iteration_no in range(iterations):
epoch_input = features[ptr:ptr+batch_size]
epoch_label = lbls[ptr: ptr+batch_size]
ptr = (ptr + batch_size)%len(features)
_, err = sess.run([train, loss], feed_dict={input_data: epoch_input, labels: epoch_label})
print("Error at epoch ", epoch_no, ": ", err)
print(" Training accuracy %f" % training_accuracy())
此外,请不要 post 在 github 上提出这样的使用问题,它们属于 Whosebug。