senddex 教程中回调的值错误

Value Error from callbacks in sentdex tutorial

我已经完成了 Sentdex 关于 RNN 和加密货币的系列教程。对于我的一生,我无法弄清楚我哪里出了问题。我最好的猜测是我保存数据的方式有问题,但无论我尝试什么,我都会在我的行“callbacks=[tensorboard, checkpoint]”上得到相同的“ValueError”。任何帮助将不胜感激。

代码:

import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint


SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0
    
def preprocess_df(df):
    df = df.drop('future', 1)
    
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
            
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
            
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
            
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))
    
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys+sells
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X), y

main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"]
for ratio in ratios:

    ratio = ratio.split('.csv')[0]
    print(ratio)
    dataset = f'C:/crypto_data/{ratio}.csv'  # get the full path to the file.
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])

    df.rename(columns={"close":f"{ratio}_close", "volume":f"{ratio}_volume"}, inplace=True)

    df.set_index('time', inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]

    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)
        
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))

main_df.dropna(inplace=True)

times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="C:/logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("C:/models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

model.save("C:/models/{}".format(NAME))

错误:

ValueError                                Traceback (most recent call last)
<ipython-input-118-2583585ee356> in <module>
    152     epochs=EPOCHS,
    153     validation_data=(validation_x, validation_y),
--> 154     callbacks=[tensorboard, checkpoint],
    155 )
    156 

~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    233           max_queue_size=max_queue_size,
    234           workers=workers,
--> 235           use_multiprocessing=use_multiprocessing)
    236 
    237       total_samples = _get_total_number_of_samples(training_data_adapter)

~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    531                      'at same time.')
    532 
--> 533   adapter_cls = data_adapter.select_data_adapter(x, y)
    534 
    535   # Handle validation_split, we want to split the data and get the training

~\anaconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\engine\data_adapter.py in select_data_adapter(x, y)
    996         "Failed to find data adapter that can handle "
    997         "input: {}, {}".format(
--> 998             _type_name(x), _type_name(y)))
    999   elif len(adapter_cls) > 1:
   1000     raise RuntimeError(

ValueError: Failed to find data adapter that can handle input: <class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'numpy.float64'>"})

编辑:

我发现提到了 sendtex tutorial 并且没有关于本教程的信息 - 但是您可以在 YouTube 上打开视频,它显示它是 2 years old 教程。也许 2 年前这段代码在旧版本的 tensorflow 中工作。


错误显示数据类型有问题

ValueError: Failed to find data adapter that can handle input: 
<class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'numpy.float64'>"})

--> 154     callbacks=[tensorboard, checkpoint],

这是函数的一部分 model.fit()

所以我得到 model.fit() 中的所有变量并检查 type

print('tensorboard:', type(tensorboard))
print('checkpoint:',  type(checkpoint))

print('train_x:',      type(train_x))
print('train_y:',      type(train_y))
print('validation_x:', type(validation_x))
print('validation_y:', type(validation_y))

这给出了

tensorboard: <class 'tensorflow.python.keras.callbacks.TensorBoard'>
checkpoint: <class 'tensorflow.python.keras.callbacks.ModelCheckpoint'>

train_x: <class 'numpy.ndarray'>
train_y: <class 'list'>
validation_x: <class 'numpy.ndarray'>
validation_y: <class 'list'>

你可以看到错误 <class 'numpy.ndarray'> for train_x, validation_x and <class 'list'> for train_y, validation_y

在互联网上挖掘我在 model.fit()

前两行找到了 Tensorflow 教程 Training and evaluation with the built-in methods
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")

这意味着 y 值必须是 numpy.array 但在您的代码中 train_y 是正常的 list

model.fit() 得到 np.array(train_y) 而不是 train_y

时,代码终于可以工作了
history = model.fit(
    train_x, 
    
    np.array(train_y), # <--- instead of `train_y`

    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)