TimeSeries 用例:如何在 VAE 网络(降噪器)之上插入 LSTM 网络(预测器)

TimeSeries use case : How to plug an LSTM network (predictor) on top of a VAE network (denoiser)

我一直在努力编写下图中的网络。

时间序列专用用例:

我正在努力构建这样一个网络,保留两个损失函数:

来自1677个时间序列的数据集,每个时间序列有61440个时间状态。 我用滚动平均值对所有时间序列进行了下采样(以减轻 61440 个特征),并用 200 长度的滑动 windows 重塑它们,这给了我一个 (989300, 1, 200) 的 InputShape,这是进入 VAE 的样本的形状(989300 序列)。 网络的输出是我的序列的下一次状态。例如,给定一个长度为 200 的序列,LSTM 回归器部分预测第 201 个状态,即紧随该序列之后的值。

我的形状(Xtrain、Xtest、ytrain、ytest):

((989300, 1, 200), (286897, 1, 200), (989300,), (286897,))

这是我的代码。我知道它可能不是那么干净,我正在尝试让它先工作。

我的导入

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, TimeDistributed, Input, RepeatVector, LSTM

我的损失函数,结合了 VAE 损失和 LSTM 损失,带有 lambda 参数:

def vae_loss2_(input_x, decoder1, y_pred, z_log_sigma, z_mean, lambd):
    """ Calculate loss = reconstruction loss + KL loss for each data in minibatch """
    recon = K.sum(K.binary_crossentropy(input_x, decoder1))
    # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
    kl = 0.5 * K.sum(K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma)
    
    lstm = tf.keras.losses.MSE(decoder1, y_pred)
    
    return (recon + kl) + lambd*lstm

我的采样函数,VAE 使用它从潜在 space:

def sampling(args):
    z_mean, z_log_sigma = args
    latent_dim = 1
    batch_size = K.shape(z_mean)[0]
    epsilon = K.random_normal(shape=(batch_size, K.shape(z_mean)[1], latent_dim), mean=0., stddev=1.)
    return z_mean + z_log_sigma * epsilon

最后,这是我所有的代码,包括两个网络:

latent_dim = 1
timesteps, features = 1, 200


# timesteps, features
input_x = Input(shape= (timesteps, features))

#Encoder
h1 = Dense(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(input_x)
h1 = Dense(100, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)
h1 = Dense(50, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)
h1 = Dense(20, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)

#z_layer
z_mean = Dense(latent_dim)(h1)
z_log_sigma = Dense(latent_dim)(h1)

z = Lambda(sampling)([z_mean, z_log_sigma])

#Decoder
decoder1 = Dense(20, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(z)
decoder1 = Dense(50, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = Dense(100, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = Dense(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = TimeDistributed(Dense(features))(decoder1)

# LSTM network

lstm1 = LSTM(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal', return_sequences=True)(decoder1)
lstm1 = Dense(1)(lstm1)

finalModel = Model(input_x, lstm1)

finalModel.add_loss(vae_loss2_(input_x, decoder1, lstm1, z_log_sigma, z_mean, 0.2))

finalModel.compile(loss=None, optimizer='adam')


history = finalModel.fit(Xtrain_, ytrain_, epochs=70, batch_size = 2500, validation_data = (Xtest_,ytest_))

执行此代码会引发以下错误,因为拟合步骤不期望 ytrain 和 ytest 用于下一个时间戳预测:


WARNING:tensorflow:Output dense_593 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to dense_593.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-207-47c2c28976f0> in <module>
     43 #a = np.load('atrain.npy')
     44 
---> 45 history = finalModel.fit(Xtrain_, ytrain_, epochs=70, batch_size = 2500, validation_data = (Xtest_,ytest_))

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    726         max_queue_size=max_queue_size,
    727         workers=workers,
--> 728         use_multiprocessing=use_multiprocessing)
    729 
    730   def evaluate(self,

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    222           validation_data=validation_data,
    223           validation_steps=validation_steps,
--> 224           distribution_strategy=strategy)
    225 
    226       total_samples = _get_total_number_of_samples(training_data_adapter)

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    545         max_queue_size=max_queue_size,
    546         workers=workers,
--> 547         use_multiprocessing=use_multiprocessing)
    548     val_adapter = None
    549     if validation_data:

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    592         batch_size=batch_size,
    593         check_steps=False,
--> 594         steps=steps)
    595   adapter = adapter_cls(
    596       x,

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
   2517           shapes=None,
   2518           check_batch_axis=False,  # Don't enforce the batch size.
-> 2519           exception_prefix='target')
   2520 
   2521       # Generate sample-wise weight values given the `sample_weight` and

/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    487       raise ValueError(
    488           'Error when checking model ' + exception_prefix + ': '
--> 489           'expected no data, but got:', data)
    490     return []
    491   if data is None:

ValueError: ('Error when checking model target: expected no data, but got:', array([0.49538032, 0.55329189, 0.47183994, ..., 0.84650205, 0.89713042,
       0.87897429]))

非常感谢您的帮助,

我解决了我的问题: 使用 add_loss 函数,模型拟合不期望任何 ytrain 或 ytest,因为它在编译函数中没有损失。 将 ytrain 和 ytest 放入 fit 方法中,强制使用 loss_fn(ytrue, ypred) 形式的损失函数:return MSE(ytrue, ypred),就像经典的 keras.losses.MSE