如何用数据集拟合 keras 模型?
How to fit a keras model with a dataset?
我尝试用一组 csv 文件来拟合我的 keras 模型(我不想在内存中加载文件并连接它们)。我试图用“tf.data.experimental.make_csv_dataset”构建一个数据集(我认为它像 matlab 数据存储一样工作?)并用“next”和“iter”为我的模型提供数据,但我无法解决输入大小 and/or 的问题输入类型。我将不胜感激任何帮助。提前致谢。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
dataset = tf.data.experimental.make_csv_dataset(
"data/Testdata/*.csv",
batch_size=128,
field_delim=",",
num_epochs=1,
select_columns=['A', 'B', 'C'],
label_name='C')
# MLP Model
model = Sequential()
model.add(Dense(1, input_dim=5))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mean_absolute_error', optimizer="adam", metrics=['mean_squared_error'])
# for batch in dataset:
X, y = next(iter(dataset))
res = model.fit(X, y, epochs=5)
您可以将数据集直接提供给 model.fit
,并进行一些更改:
创建虚拟数据:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
df = pd.DataFrame(data={'A': [50.1, 1.23, 4.5, 4.3, 3.2], 'B':[50.1, 1.23, 4.5, 4.3, 3.2], 'C':[5.2, 3.1, 2.2, 1., 3.]})
df.to_csv('data1.csv', index=False)
df.to_csv('data2.csv', index=False)
预处理数据:
dataset = tf.data.experimental.make_csv_dataset(
"/content/*.csv",
batch_size=2,
field_delim=",",
num_epochs=1,
select_columns=['A', 'B', 'C'],
label_name='C')
处理前:
for x in dataset:
print(x)
OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([4.5 , 1.23], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([4.5 , 1.23], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3.1], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([50.1, 4.5], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([50.1, 4.5], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([5.2, 2.2], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 4.3, 50.1], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 4.3, 50.1], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1. , 5.2], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.23, 4.3 ], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.23, 4.3 ], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1, 1. ], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.2, 3.2], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.2, 3.2], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3., 3.], dtype=float32)>)
请注意,make_csv_dataset
的参数 shuffle
默认设置为 True
。这就是您可能会看到混合输出的原因。
预处理后输入数据具有来自 A 和 B 的 2 个特征:
dataset = dataset.map(lambda x, y: (tf.concat([tf.expand_dims(x['A'], axis=-1), tf.expand_dims(x['B'], axis=-1)], axis=-1), y))
for x in dataset:
print(x)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.5 , 4.5 ],
[1.23, 1.23]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3.1], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.3, 4.3],
[4.3, 4.3]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 1.23, 1.23],
[50.1 , 50.1 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1, 5.2], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[50.1, 50.1],
[ 3.2, 3.2]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([5.2, 3. ], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.5, 4.5],
[3.2, 3.2]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3. ], dtype=float32)>)
训练您的模型:
model = Sequential()
model.add(Dense(1, input_dim=2))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mean_absolute_error', optimizer="adam", metrics=['mean_squared_error'])
res = model.fit(dataset, epochs=5)
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_21 (Dense) (None, 1) 3
dense_22 (Dense) (None, 5) 10
dense_23 (Dense) (None, 1) 6
=================================================================
Total params: 19
Trainable params: 19
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
5/5 [==============================] - 1s 21ms/step - loss: 10.2060 - mean_squared_error: 247.2872
Epoch 2/5
5/5 [==============================] - 0s 10ms/step - loss: 10.0791 - mean_squared_error: 241.0892
Epoch 3/5
5/5 [==============================] - 0s 8ms/step - loss: 9.9328 - mean_squared_error: 233.3316
Epoch 4/5
5/5 [==============================] - 0s 6ms/step - loss: 9.7714 - mean_squared_error: 224.4764
Epoch 5/5
5/5 [==============================] - 0s 8ms/step - loss: 9.6863 - mean_squared_error: 221.0282
我尝试用一组 csv 文件来拟合我的 keras 模型(我不想在内存中加载文件并连接它们)。我试图用“tf.data.experimental.make_csv_dataset”构建一个数据集(我认为它像 matlab 数据存储一样工作?)并用“next”和“iter”为我的模型提供数据,但我无法解决输入大小 and/or 的问题输入类型。我将不胜感激任何帮助。提前致谢。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
dataset = tf.data.experimental.make_csv_dataset(
"data/Testdata/*.csv",
batch_size=128,
field_delim=",",
num_epochs=1,
select_columns=['A', 'B', 'C'],
label_name='C')
# MLP Model
model = Sequential()
model.add(Dense(1, input_dim=5))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mean_absolute_error', optimizer="adam", metrics=['mean_squared_error'])
# for batch in dataset:
X, y = next(iter(dataset))
res = model.fit(X, y, epochs=5)
您可以将数据集直接提供给 model.fit
,并进行一些更改:
创建虚拟数据:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
df = pd.DataFrame(data={'A': [50.1, 1.23, 4.5, 4.3, 3.2], 'B':[50.1, 1.23, 4.5, 4.3, 3.2], 'C':[5.2, 3.1, 2.2, 1., 3.]})
df.to_csv('data1.csv', index=False)
df.to_csv('data2.csv', index=False)
预处理数据:
dataset = tf.data.experimental.make_csv_dataset(
"/content/*.csv",
batch_size=2,
field_delim=",",
num_epochs=1,
select_columns=['A', 'B', 'C'],
label_name='C')
处理前:
for x in dataset:
print(x)
OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([4.5 , 1.23], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([4.5 , 1.23], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3.1], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([50.1, 4.5], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([50.1, 4.5], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([5.2, 2.2], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 4.3, 50.1], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 4.3, 50.1], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1. , 5.2], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.23, 4.3 ], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.23, 4.3 ], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1, 1. ], dtype=float32)>)
(OrderedDict([('A', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.2, 3.2], dtype=float32)>), ('B', <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.2, 3.2], dtype=float32)>)]), <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3., 3.], dtype=float32)>)
请注意,make_csv_dataset
的参数 shuffle
默认设置为 True
。这就是您可能会看到混合输出的原因。
预处理后输入数据具有来自 A 和 B 的 2 个特征:
dataset = dataset.map(lambda x, y: (tf.concat([tf.expand_dims(x['A'], axis=-1), tf.expand_dims(x['B'], axis=-1)], axis=-1), y))
for x in dataset:
print(x)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.5 , 4.5 ],
[1.23, 1.23]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3.1], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.3, 4.3],
[4.3, 4.3]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 1.23, 1.23],
[50.1 , 50.1 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1, 5.2], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[50.1, 50.1],
[ 3.2, 3.2]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([5.2, 3. ], dtype=float32)>)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[4.5, 4.5],
[3.2, 3.2]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.2, 3. ], dtype=float32)>)
训练您的模型:
model = Sequential()
model.add(Dense(1, input_dim=2))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
model.compile(loss='mean_absolute_error', optimizer="adam", metrics=['mean_squared_error'])
res = model.fit(dataset, epochs=5)
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_21 (Dense) (None, 1) 3
dense_22 (Dense) (None, 5) 10
dense_23 (Dense) (None, 1) 6
=================================================================
Total params: 19
Trainable params: 19
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
5/5 [==============================] - 1s 21ms/step - loss: 10.2060 - mean_squared_error: 247.2872
Epoch 2/5
5/5 [==============================] - 0s 10ms/step - loss: 10.0791 - mean_squared_error: 241.0892
Epoch 3/5
5/5 [==============================] - 0s 8ms/step - loss: 9.9328 - mean_squared_error: 233.3316
Epoch 4/5
5/5 [==============================] - 0s 6ms/step - loss: 9.7714 - mean_squared_error: 224.4764
Epoch 5/5
5/5 [==============================] - 0s 8ms/step - loss: 9.6863 - mean_squared_error: 221.0282