Python Keras ValueError: Data cardinality is ambiguous
Python Keras ValueError: Data cardinality is ambiguous
我正在尝试制作一个可以预测信用卡交易是否存在欺诈的模型。我的数据集可用 on Kaggle。当我适合我的模型时,一切正常,当我收到此错误时:
ValueError: Data cardinality is ambiguous:
x sizes: 7433462
y sizes: 284807
Make sure all arrays contain the same number of samples.
谁能帮我找出问题所在?
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv("creditcard.csv")
trainSamples = data['Class']
labels = ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
trainSamples = data[labels]
trainLabels = np.array(trainLabels)
trainSamples = np.array(trainSamples)
trainLabels = shuffle(trainLabels)
trainSamples = shuffle(trainSamples)
scaler = MinMaxScaler(feature_range = (0, 1))
scaledTrainSample = scaler.fit_transform(trainSamples.reshape(-1,1))
model = Sequential([
Dense(units = 16, input_shape = (1, ), activation = 'relu'),
Dense(units = 32, activation = 'relu'),
Dense(units = 2, activation = 'softmax')
])
model.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model.fit(x = scaledTrainSample, y = trainLabels, validation_split = 0.1, batch_size = 10, epochs = 300, verbose = 2)
您的代码的主要问题是模型的输入形状应该是 30 而不是 1,因为您有 30 个特征,而输出形状应该是 1 而不是 2,因为您只有一个二进制标签(即只有两个类, 0 或 1).还有一些其他错误已在下面的代码中得到纠正。
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
tf.random.set_seed(0)
# import the data
df = pd.read_csv('creditcard.csv')
# extract the features and target
X = df.drop(labels=['Class'], axis=1).values
y = df['Class'].values
# count the number of classes
print(np.unique(y))
# [0 1]
# shuffle the data
X, y = shuffle(X, y, random_state=42)
# scale the features
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
# build the model
model = Sequential([
Dense(units=16, activation='relu', input_shape=(X.shape[1], )),
Dense(units=32, activation='relu'),
Dense(units=1, activation='sigmoid')
])
# fit the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x=X, y=y, validation_split=0.1, batch_size=256, epochs=3)
# Epoch 1/3
# 1002/1002 [==============================] - 1s 761us/step - loss: 0.1787 - accuracy: 0.9983 - val_loss: 0.0193 - val_accuracy: 0.9981
# Epoch 2/3
# 1002/1002 [==============================] - 1s 684us/step - loss: 0.0136 - accuracy: 0.9983 - val_loss: 0.0130 - val_accuracy: 0.9981
# Epoch 3/3
# 1002/1002 [==============================] - 1s 680us/step - loss: 0.0119 - accuracy: 0.9983 - val_loss: 0.0127 - val_accuracy: 0.9981
我正在尝试制作一个可以预测信用卡交易是否存在欺诈的模型。我的数据集可用 on Kaggle。当我适合我的模型时,一切正常,当我收到此错误时:
ValueError: Data cardinality is ambiguous:
x sizes: 7433462
y sizes: 284807
Make sure all arrays contain the same number of samples.
谁能帮我找出问题所在?
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv("creditcard.csv")
trainSamples = data['Class']
labels = ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
trainSamples = data[labels]
trainLabels = np.array(trainLabels)
trainSamples = np.array(trainSamples)
trainLabels = shuffle(trainLabels)
trainSamples = shuffle(trainSamples)
scaler = MinMaxScaler(feature_range = (0, 1))
scaledTrainSample = scaler.fit_transform(trainSamples.reshape(-1,1))
model = Sequential([
Dense(units = 16, input_shape = (1, ), activation = 'relu'),
Dense(units = 32, activation = 'relu'),
Dense(units = 2, activation = 'softmax')
])
model.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model.fit(x = scaledTrainSample, y = trainLabels, validation_split = 0.1, batch_size = 10, epochs = 300, verbose = 2)
您的代码的主要问题是模型的输入形状应该是 30 而不是 1,因为您有 30 个特征,而输出形状应该是 1 而不是 2,因为您只有一个二进制标签(即只有两个类, 0 或 1).还有一些其他错误已在下面的代码中得到纠正。
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
tf.random.set_seed(0)
# import the data
df = pd.read_csv('creditcard.csv')
# extract the features and target
X = df.drop(labels=['Class'], axis=1).values
y = df['Class'].values
# count the number of classes
print(np.unique(y))
# [0 1]
# shuffle the data
X, y = shuffle(X, y, random_state=42)
# scale the features
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
# build the model
model = Sequential([
Dense(units=16, activation='relu', input_shape=(X.shape[1], )),
Dense(units=32, activation='relu'),
Dense(units=1, activation='sigmoid')
])
# fit the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x=X, y=y, validation_split=0.1, batch_size=256, epochs=3)
# Epoch 1/3
# 1002/1002 [==============================] - 1s 761us/step - loss: 0.1787 - accuracy: 0.9983 - val_loss: 0.0193 - val_accuracy: 0.9981
# Epoch 2/3
# 1002/1002 [==============================] - 1s 684us/step - loss: 0.0136 - accuracy: 0.9983 - val_loss: 0.0130 - val_accuracy: 0.9981
# Epoch 3/3
# 1002/1002 [==============================] - 1s 680us/step - loss: 0.0119 - accuracy: 0.9983 - val_loss: 0.0127 - val_accuracy: 0.9981