tensorflow ValueError: scan/while/Merge_1:0

Question

我该如何解决这个错误.. 我不知道为什么会出现这个错误..这个错误阻止了我两天

叹息..

ValueError: The shape for my_cost_value/scan/while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (), but has shape after one iteration. Provide shape invariants using either the shape_invariants argument of tf.while_loop or set_shape() on the loop variables

下面是我的代码

    ################# constant
    bach_size = 100
    layers = 1
    directions = 1
    hiddensize = 100
    self.hiddensize = hiddensize
    self.def_output_length = 2
    self.enc = encoder
    input_length = 484
    teacher_forcing = False
    with tf.variable_scope('self_outputinit') as scope:
        self.output_ini = tf.Variable(tf.zeros([1, self.hiddensize]),dtype='float32') #my output segment

    ################# training val
    with tf.variable_scope('decoder_var') as scope:
        self.W1 = tf.Variable(tf.random_normal([self.hiddensize*2, input_length]), dtype='float32')
        self.W2 = tf.Variable(tf.random_normal([self.hiddensize*2, self.hiddensize]), dtype='float32')
        self.b1 = tf.Variable(tf.random_normal([1, input_length]), dtype='float32')
        self.b2 = tf.Variable(tf.random_normal([1, self.hiddensize]), dtype='float32')

    with tf.variable_scope("mygru") as scope:
        self.gru = tf.nn.rnn_cell.GRUCell(num_units = 100)



    ###################  inputs
    with tf.variable_scope('encoder_output') as scope:# shape = (100,484,100)
        self.input_v = encoder.getoutput()

    with tf.variable_scope('realsent') as scope: # shape = (100,seq,100)
        self.realsent = tf.placeholder(dtype='float32')

    with tf.variable_scope('decoder_res_seq') as scope: # shape = (100,seq)
        self.output_length = tf.placeholder(dtype='float32')

    with tf.variable_scope('encoder_state') as scope: #shape = (100,100)
        self.grustate = encoder.getstate()
        self.grustate = tf.reshape(self.grustate,shape=(100,1,100))



    ################## input concat ====>> (100,4) .. 4 = (484,100) + (seq,100) + seq + 100
    #with tf.variable_scope('concat_cost_input') as scope:
    #    self.concat_input = tf.stack([self.input_v,self.realsent,self.output_length,self.grustate],axis=1)

    ################## cost cal
    with tf.variable_scope('my_cost_value') as scope:
        #self.cost = tf.Variable(0.0,dtype="float32")
        newar = np.array([],dtype="float32")
        for i in range(100):
            newar = np.append(newar,i)

        self.lastcost = tf.scan(self._bach_calcost, newar.astype("float32"))









    ################## training
    with tf.variable_scope('adamtrain') as scope:
        self.adamtrain = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(self.lastcost)

    with tf.variable_scope('gradtrain') as scope:
        self.gradtrain = tf.train.GradientDescentOptimizer(learning_rate=10).minimize(self.lastcost)

    with tf.variable_scope('gradtrain') as scope:
        self.gradtrain2 = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(self.lastcost)

    self.input_seg_v = tf.Variable(tf.zeros(shape=(484,100)),dtype="float32")





    ################## for scan func in _bach_calcost
def _nt_atten(self, grustate_output ,ignore): #not teacher forcing

    with tf.variable_scope("split") as scope:
        pre_new_grustate, pre_new_output = tf.split(grustate_output, num_or_size_splits=2, axis=1)
        _ = ignore

    with tf.variable_scope("grucall") as scope:
        new_grustate, new_output =self.gru.call(pre_new_grustate, pre_new_output )

        #word = myprepdata.findwordsfromvec(output.eval())
        #print(word)
        #np_output = myprepdata.findvecfromwords(word)
        #output = tf.Variable(np_output, dtype="float32")

    with tf.variable_scope("atten") as scope:
        attn = tf.concat([new_grustate, new_output],1)#concat and linear
        attn = tf.add(tf.matmul(attn , self.W1),self.b1) # needs W initialize
        attn = tf.nn.softmax(tf.nn.relu(attn))

    with tf.variable_scope("bmm") as scope: #make 1 wordlike 1 484 * 484 100 == 1 100
        bmm = tf.matmul( attn,tf.squeeze(self.input_seg_v)) 

    with tf.variable_scope("comb") as scope:
        attn_com = tf.concat([bmm, self.output],1 ) # 1 100 + 1 100 = 1 200
        attn_com = tf.add(tf.matmul(attn_com,self.W2),self.b2) # 1 200 * 200 100 = 1 100
        self.output = tf.nn.relu(attn_com)

    with tf.variable_scope("concat") as scope:
        grustate_output = tf.concat([new_grustate, new_output],1) # 1 100 + 1 100 = 1 200
        grustate_output = tf.reshape(grustate_output, [1,200], name='grustate_output')

    return grustate_output




    ################# for scan func in __init__
def _bach_calcost(self,coste, conc_input ):

    with tf.variable_scope('decoder_output_segment') as scope:
        self.output = self.output_ini
        newarv = tf.cast(conc_input,tf.int32)

    with tf.variable_scope('split_input') as scope:
        grustate = tf.gather(self.grustate, newarv)
        self.input_seg_v = tf.gather(self.input_v, newarv)
        output_length = tf.gather(self.output_length,newarv)
        realsent = tf.gather(self.realsent, newarv)

    #    (input_v, realsent, output_length, grustate) = conc_input

    with tf.variable_scope('encoder_concat') as scope:
        encoder_output_state = tf.concat([self.output, grustate],1) # 1*100 + 1*100 = 1*200

    with tf.variable_scope('makesent') as scope:
        self.last_status = tf.scan(self._nt_atten, output_length , initializer = encoder_output_state) #my full output

    with tf.variable_scope('sent_postprocess') as scope:
        pre_sentence = tf.squeeze(self.last_status) # 1 3 1 200 -> 3 200
        _ , self.sentence = tf.split(pre_sentence, num_or_size_splits=2, axis = 1) # 3 200 -> ignore(3,100) acc(3,100)

    with tf.variable_scope('calcost') as scope:
        self.precost = (realsent - self.sentence) * (realsent - self.sentence)
        newcost =  tf.reduce_mean(self.precost)

    return newcost



    ################# print bachcost
def calcost(self,sess,realsente,output_length,line): #reduce demention
    return sess.run([self.lastcost],feed_dict = {self.realsent: realsente, self.output_length:output_length,
                                                  self.enc.input:line})


    ################# for training
def adamtraining(self,sess,realsente,output_length,line):
    #summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./testgru")
    writer.add_graph(sess.graph)
    #s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
    #                                              self.enc.input:line})
    #writer.add_summary(s,0)

    return sess.run([self.adamtrain],feed_dict = {self.realsent: realsente, self.output_length:output_length,
                                                  self.enc.input:line})

def gradtraining(self,sess,realsente,output_length,line):
    #summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./testgru")
    writer.add_graph(sess.graph)
    #s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
    #                                              self.enc.input:line})
    #writer.add_summary(s,0)

    return sess.run([self.gradtrain],feed_dict = {self.realsent: realsente, self.output_length:output_length,
                                                  self.enc.input:line})
def gradtraining2(self,sess,realsente,output_length,line):
    #summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./testgru")
    writer.add_graph(sess.graph)
    #s = sess.run(summary,feed_dict = {self.realsent: realsente, self.output_length:output_length,
    #                                              self.enc.input:line})
    #writer.add_summary(s,0)

    return sess.run([self.gradtrain2],feed_dict = {self.realsent: realsente, self.output_length:output_length,
                                                  self.enc.input:line})


def hidden_init(self,sess):
    return sess.run(tf.global_variables_initializer())

Answer 1

我没有在您的代码中看到用于指出确切问题的 while 循环，但这是核心问题。

使用 tf.while_loop 时，您需要提供一组张量作为您的 loop_vars。 while 循环执行期间这些张量的形状 "must not change much"。通常情况下，形状根本不会改变。如果他们这样做，则可能是错误的迹象 - 例如从 body.

输出张量时不小心重新排序了

高级用户可以在 loop_vars 中明确指定他们期望张量的 shape_invariants。 https://www.tensorflow.org/api_docs/python/tf/while_loop.

中对形状不变量进行了相当详细的讨论

tensorflow ValueError: scan/while/Merge_1:0

tensorflow ValueError: scan/while/Merge_1:0

python

shape

while-loop

deep-learning

tensorflow