Getting error "ValueError: could not covert string to float " in integer sequence learning
Getting error "ValueError: could not covert string to float " in integer sequence learning
我是机器学习的新手,我正在尝试为整数序列模型开发一个简单的 RNN (Integer Sequence Learning - Kaggle Competition)
我的数据集取自 Kaggle 比赛,以下是我的代码:
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.preprocessing.sequence import pad_sequences
# convert an array of values into a dataset matrix
def create_dataset(dataset, window_size=1):
dataX, dataY = [], []
for i in range(len(dataset)-window_size-1):
a = dataset[i:(i+window_size), 0]
dataX.append(a)
dataY.append(dataset[i + window_size, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# loading data
dataframe = pd.read_csv('G:/Python/integer_sequencing/train.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
#dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.67) #67 per cent used for training
test_size = len(dataset) - train_size # remaining used for testing
train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
#print(train_data.head(n=10))
# normalize the datasets
scaler_train = MinMaxScaler(feature_range=(0, 1))
scaler_test = MinMaxScaler(feature_range=(0, 1))
train = scaler_train.fit_transform(train)
test = scaler_test.fit_transform(test)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
print(trainX.shape)
print(trainY.shape)
我收到如下错误:
我该如何解决?请帮忙...
PS - 我以 Time series prediction 为例
问题是您正在为网络提供 string
序列。仔细检查输入数据的 dtype
并绝对确保它是一个带有 float
数字的 numpy 数组。
我是机器学习的新手,我正在尝试为整数序列模型开发一个简单的 RNN (Integer Sequence Learning - Kaggle Competition)
我的数据集取自 Kaggle 比赛,以下是我的代码:
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.preprocessing.sequence import pad_sequences
# convert an array of values into a dataset matrix
def create_dataset(dataset, window_size=1):
dataX, dataY = [], []
for i in range(len(dataset)-window_size-1):
a = dataset[i:(i+window_size), 0]
dataX.append(a)
dataY.append(dataset[i + window_size, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# loading data
dataframe = pd.read_csv('G:/Python/integer_sequencing/train.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
#dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.67) #67 per cent used for training
test_size = len(dataset) - train_size # remaining used for testing
train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
#print(train_data.head(n=10))
# normalize the datasets
scaler_train = MinMaxScaler(feature_range=(0, 1))
scaler_test = MinMaxScaler(feature_range=(0, 1))
train = scaler_train.fit_transform(train)
test = scaler_test.fit_transform(test)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
print(trainX.shape)
print(trainY.shape)
我收到如下错误:
我该如何解决?请帮忙...
PS - 我以 Time series prediction 为例
问题是您正在为网络提供 string
序列。仔细检查输入数据的 dtype
并绝对确保它是一个带有 float
数字的 numpy 数组。