在使用 TensorFlow 进行多变量线性回归的情况下,如何添加 CSV 日志记录机制?
How can I add CSV logging mechanism in case of Multivariable Linear Regression using TensorFlow?
假设,下面是我在Python中的多元线性回归源代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
def load_data_k(fname: str, yyy_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y = [], []
for l in lines:
row = l.strip().split()
x = [float(ix) for ix in row[yyy_index+1:]]
y = float(row[yyy_index])
data_x.append(x)
data_y.append(y)
# END for l in lines...
num_rows = len(data_x)
print("row size = ", len(data_x[0]))
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y = data_x, data_y
else:
n = int(num_rows * given_fraction)
data_x, data_y = data_x[n:], data_y[n:]
valid_x, valid_y = data_x[:n], data_y[:n]
# END of if-else block
print("size of x = ", len(data_x))
print("size of y = ", len(data_y))
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
ty = tf.convert_to_tensor(data_y, dtype=tf.float32)
vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)
return tx, ty, vx, vy
# END of the function
# load training data from the disk
train_x, train_y, validate_x, validate_y = \
load_data_k(
fname="data_file.csv",
yyy_index=6,
random_n_lines=90000,
validation_part=0.2
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
predict_data = np.array([[7.042, 5.781, 5.399, 5.373, 5.423, -9.118, 5.488, 5.166, 4.852, 7.470, 6.452, 6.069,
0, 0, 0, 1, 0, 1, 1, 3, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0]])
# Create Keras model
model = Sequential()
model.add(Dense(1, input_dim=40))
model.add(Dense(128))
model.add(Dense(128))
model.add(Dense(1))
# Gradient descent algorithm
adam_opt = Adam(0.1)
model.compile(loss='mse', optimizer=adam_opt)
history = model.fit(train_x, train_y, epochs=500)
prediction = model.predict(predict_data)
print(prediction)
我想为训练损失、验证损失、训练准确性和验证准确性添加 CSV 日志记录。
我该怎么做?
只需使用 tf.keras.callbacks.CSVLogger
和任何您想在训练期间记录的回归指标:
import tensorflow as tf
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_dim=40))
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(1))
adam_opt = tf.keras.optimizers.Adam(0.1)
model.compile(loss='mse', optimizer=adam_opt, metrics=tf.keras.metrics.MeanSquaredError(name="mean_squared_error", dtype=None))
train_x = tf.random.normal((50, 40))
train_y = tf.random.normal((50, 1))
val_x = tf.random.normal((50, 40))
val_y = tf.random.normal((50, 1))
csv_logger = tf.keras.callbacks.CSVLogger('model_training.csv')
history = model.fit(train_x, train_y, epochs=5, validation_data=(val_x, val_y), callbacks=[csv_logger])
model_training.csv
:
epoch loss mean_squared_error val_loss val_mean_squared_error
0 304.349060 304.349060 69.584991 69.584991
1 105.304787 105.304787 170.063126 170.063126
2 175.232788 175.232788 7.874812 7.874812
3 104.159607 104.159607 320.626556 320.626556
4 194.709763 194.709763 1.438866 1.438866
假设,下面是我在Python中的多元线性回归源代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
def load_data_k(fname: str, yyy_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y = [], []
for l in lines:
row = l.strip().split()
x = [float(ix) for ix in row[yyy_index+1:]]
y = float(row[yyy_index])
data_x.append(x)
data_y.append(y)
# END for l in lines...
num_rows = len(data_x)
print("row size = ", len(data_x[0]))
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y = data_x, data_y
else:
n = int(num_rows * given_fraction)
data_x, data_y = data_x[n:], data_y[n:]
valid_x, valid_y = data_x[:n], data_y[:n]
# END of if-else block
print("size of x = ", len(data_x))
print("size of y = ", len(data_y))
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
ty = tf.convert_to_tensor(data_y, dtype=tf.float32)
vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)
return tx, ty, vx, vy
# END of the function
# load training data from the disk
train_x, train_y, validate_x, validate_y = \
load_data_k(
fname="data_file.csv",
yyy_index=6,
random_n_lines=90000,
validation_part=0.2
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
predict_data = np.array([[7.042, 5.781, 5.399, 5.373, 5.423, -9.118, 5.488, 5.166, 4.852, 7.470, 6.452, 6.069,
0, 0, 0, 1, 0, 1, 1, 3, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0]])
# Create Keras model
model = Sequential()
model.add(Dense(1, input_dim=40))
model.add(Dense(128))
model.add(Dense(128))
model.add(Dense(1))
# Gradient descent algorithm
adam_opt = Adam(0.1)
model.compile(loss='mse', optimizer=adam_opt)
history = model.fit(train_x, train_y, epochs=500)
prediction = model.predict(predict_data)
print(prediction)
我想为训练损失、验证损失、训练准确性和验证准确性添加 CSV 日志记录。
我该怎么做?
只需使用 tf.keras.callbacks.CSVLogger
和任何您想在训练期间记录的回归指标:
import tensorflow as tf
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_dim=40))
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(1))
adam_opt = tf.keras.optimizers.Adam(0.1)
model.compile(loss='mse', optimizer=adam_opt, metrics=tf.keras.metrics.MeanSquaredError(name="mean_squared_error", dtype=None))
train_x = tf.random.normal((50, 40))
train_y = tf.random.normal((50, 1))
val_x = tf.random.normal((50, 40))
val_y = tf.random.normal((50, 1))
csv_logger = tf.keras.callbacks.CSVLogger('model_training.csv')
history = model.fit(train_x, train_y, epochs=5, validation_data=(val_x, val_y), callbacks=[csv_logger])
model_training.csv
:
epoch loss mean_squared_error val_loss val_mean_squared_error
0 304.349060 304.349060 69.584991 69.584991
1 105.304787 105.304787 170.063126 170.063126
2 175.232788 175.232788 7.874812 7.874812
3 104.159607 104.159607 320.626556 320.626556
4 194.709763 194.709763 1.438866 1.438866