Tensorflow 扩展中的训练器组件错误

Trainer Component Error in Tensorflow Extension

这是我在完成教程并尝试使用自己的数据集构建 tfx 管道后第一次构建 tfx 管道。我可以对我编写的转换代码使用一些建议,并更好地理解,感谢您抽出时间并提前致谢。

我已经完成了 ExampleGen、StatisticsGen、SchemaGen、ExampleValidator、Transform,但在 Trainer 组件中出现错误。

错误:

c:\lib\site-packages\tfx\orchestration\launcher\in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
     65         executor_context)  # type: ignore
     66 
---> 67     executor.Do(input_dict, output_dict, exec_properties)

c:\lib\site-packages\tfx\components\trainer\executor.py in Do(self, input_dict, output_dict, exec_properties)
    317 
    318     fn_args = self._GetFnArgs(input_dict, output_dict, exec_properties)
--> 319     trainer_fn = self._GetFn(exec_properties, 'trainer_fn')
    320 
    321     schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

c:\lib\site-packages\tfx\components\trainer\executor.py in _GetFn(self, exec_properties, fn_name)
    128     if has_module_file:
    129       return import_utils.import_func_from_source(
--> 130           exec_properties['module_file'], fn_name)
    131 
    132     fn_path_split = exec_properties[fn_name].split('.')

c:\lib\site-packages\tfx\utils\import_utils.py in import_func_from_source(source_path, fn_name)
     66       user_module = types.ModuleType(loader.name)
     67       loader.exec_module(user_module)
---> 68       return getattr(user_module, fn_name)
     69 
     70   except IOError:

AttributeError: module 'user_module' has no attribute 'trainer_fn'

代码:

def get_model(show_summary=True):

#one-hot categorical features
num_A = 4,
num_B = 3,
num_C = 2,
num_D = 8,
num_E = 12,
num_F = 4,
num_G = 16,
num_H = 26

input_A = tf.keras.Input(shape=(num_A,), name="A_xf")
input_B = tf.keras.Input(shape=(num_B,), name="B_xf")
input_C = tf.keras.Input(shape=(num_C,), name="C_xf")
input_D = tf.keras.Input(shape=(num_D,), name="D_xf")
input_E = tf.keras.Input(shape=(num_E,), name="E_xf")
input_F = tf.keras.Input(shape=(num_F,), name="F_xf")
input_G = tf.keras.Input(shape=(num_G,), name="G_xf")
input_H = tf.keras.Input(shape=(num_H,), name="H_xf")


fl = keras.Input(shape=(75,))
dense = layers.Dense(35, activation = "relu")
x = dense(fl)
x = layers.Dense(15, activation="relu")(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

_inputs = [input_A, input_B, input_C, input_D, input_E, input_F, input_G, input_H]


model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='rmsprop',
          loss='binary_crossentropy',
          metrics=['accuracy'])

if show_summary:
    model.summary()

return model

我们遇到了同样的问题,并且能够通过从 tf.keras 顺序模型 api 迁移到函数 api 来解决它。您可以阅读有关不同模型 apis here 的信息。您会在教程中注意到 wide 和 deep 模型是作为函数 api.

实现的

例如,这里是训练器组件的 keras 模型示例:

def _build_functional_test_model():
colname = 'feature_string_xf'
feature_columns = [
    tf.feature_column.numeric_column(colname, shape=_MAX_REQUEST_LEN, dtype=tf.dtypes.int64, default_value=0)]

input_layers = {
    colname: tf.keras.layers.Input(name=colname, shape=_MAX_REQUEST_LEN, dtype=tf.int64)
}

input_dense_layer = tf.keras.layers.DenseFeatures(feature_columns)(input_layers)

embedding = tf.keras.layers.Embedding(_N_UNIQUE_WORDS,
                                      _N_EMBED,
                                      input_length=_MAX_REQUEST_LEN)(input_dense_layer)

embedding_dropout = tf.keras.layers.SpatialDropout1D(_DROPOUT_EMBEDDING)(embedding)

conv_1 = tf.keras.layers.Conv1D(_N_CONV_1,
                                _K_CONV_1,
                                activation='linear',
                                activity_regularizer=tf.keras.regularizers.l1(_REGULARIZE_L1_CONV))(
    embedding_dropout)
activation_1 = tf.keras.layers.Activation('relu')(conv_1)
dropout_1 = tf.keras.layers.Dropout(_DROPOUT_CONV_1)(activation_1)

conv_2 = tf.keras.layers.Conv1D(_N_CONV_1,
                                _K_CONV_2,
                                activation='linear',
                                activity_regularizer=tf.keras.regularizers.l1(_REGULARIZE_L1_CONV))(dropout_1)
activation_2 = tf.keras.layers.Activation('relu')(conv_2)
dropout_2 = tf.keras.layers.Dropout(_DROPOUT_CONV_1)(activation_2)

conv_3 = tf.keras.layers.Conv1D(_N_CONV_3,
                                _K_CONV_3,
                                activation='linear',
                                activity_regularizer=tf.keras.regularizers.l1(_REGULARIZE_L1_CONV))(dropout_2)
activation_3 = tf.keras.layers.Activation('relu')(conv_3)
dropout_3 = tf.keras.layers.Dropout(_DROPOUT_CONV_1)(activation_3)

conv_4 = tf.keras.layers.Conv1D(_N_CONV_4,
                                _K_CONV_4,
                                activation='linear',
                                activity_regularizer=tf.keras.regularizers.l1(_REGULARIZE_L1_CONV))(dropout_3)
activation_4 = tf.keras.layers.Activation('relu')(conv_4)
dropout_4 = tf.keras.layers.Dropout(_DROPOUT_CONV_1)(activation_4)

max_pool_5 = tf.keras.layers.GlobalMaxPooling1D()(dropout_4)

dense_6 = tf.keras.layers.Dense(_N_DENSE_1,
                                activation='linear',
                                # activity_regularizer=keras.regularizers.l1(_REGULARIZE_L1_DENSE_1)
                                )(max_pool_5)
activation_6 = tf.keras.layers.Activation('relu')(dense_6)
dropout_6 = tf.keras.layers.Dropout(_REGULARIZE_L1_DENSE_1)(activation_6)

dense_7 = tf.keras.layers.Dense(_N_DENSE_2,
                                activation='linear',
                                # activity_regularizer=keras.regularizers.l1(_REGULARIZE_L1_DENSE_1)
                                )(dropout_6)
activation_6 = tf.keras.layers.Activation('relu')(dense_7)
dropout_7 = tf.keras.layers.Dropout(_REGULARIZE_L1_DENSE_1)(activation_6)

output = tf.keras.layers.Dense(1, activation='sigmoid')(dropout_7)

model = tf.keras.Model(input_layers, output)
model.compile(loss='binary_crossentropy', optimizer=_OPTIMIZER, metrics=['accuracy'])

return model

通过使用 Jason 的指导和评论,我更改了模型部分,因为 tfx 不支持顺序模型,但支持 Keras 功能 API。

def get_model(show_summary=True):

#one-hot categorical features
num_A = 4,
num_B = 3,
num_C = 2,
num_D = 8,
num_E = 12,
num_F = 4,
num_G = 16,
num_H = 26

input_A = tf.keras.Input(shape=(num_A,), name="A_xf")
input_B = tf.keras.Input(shape=(num_B,), name="B_xf")
input_C = tf.keras.Input(shape=(num_C,), name="C_xf")
input_D = tf.keras.Input(shape=(num_D,), name="D_xf")
input_E = tf.keras.Input(shape=(num_E,), name="E_xf")
input_F = tf.keras.Input(shape=(num_F,), name="F_xf")
input_G = tf.keras.Input(shape=(num_G,), name="G_xf")
input_H = tf.keras.Input(shape=(num_H,), name="H_xf")

inputs_con = tf.keras.layers.concatenate([
input_A,
input_B,
input_C,
input_D,
input_E,
input_F,
input_G,
input_H])

dense_1 = tf.keras.layers.Dense(50, activation = 'relu')(inputs_con)
dense_2 = tf keras.layers.Dense(25, activation = "rely") (dense_1)
output = tf.keras.laters.Dense(1, activation = "sigmoid") (dense_2)
model = keras.Model(inputs=inputs, outputs=outputs)

_inputs = [
input_A,
input_B,
input_C,
input_D,
input_E,
input_F,
input_G,
input_H]

model = tf.keras.models.Model(_inputs, output)

model.compile(optimizer='rmsprop',
          loss='binary_crossentropy',
          metrics=['accuracy'])

if show_summary:
    model.summary()

return model