合并张量流数据集批次

Merging tensorflow dataset batches

请考虑以下代码:

import tensorflow as tf
import numpy as np
 
simple_features = np.array([
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3],
         [4, 4, 4],
         [5, 5, 5],

])
 
simple_labels = np.array([
         [-1, -1],
         [-2, -2],
         [-3, -3],
         [-4, -4],
         [-5, -5],

])
 

simple_features1 = np.array([
         [1, 4, 1],
         [2, 2, 2],
         [3, 3, 3],
         [6, 4, 4],
         [5, 4, 5],

])
 
simple_labels1 = np.array([
         [8, -7],
         [-2, -2],
         [-3, 7],
         [-4, 9],
         [-5, -5],

])

def print_dataset(ds):
    for inputs, targets in ds:
        print("---Batch---")
        print("Feature:", inputs.numpy())
        print("Label:", targets.numpy())
        print("")
        
ds1 = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features, simple_labels, sequence_length=4, batch_size=1)
print_dataset(ds1)

ds2 = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features1, simple_labels1, sequence_length=4, batch_size=1)
print_dataset(ds2)

以上代码将创建要素和标签。我想按以下方式合并两个相应的批次。比如第一批ds1是这样显示的:

---Batch---
Feature: [[[1 1 1]
  [2 2 2]
  [3 3 3]
  [4 4 4]]]
Label: [[-1 -1]]

...第一批 ds2 是这样的。

---Batch---
Feature: [[[1 4 1]
  [2 2 2]
  [3 3 3]
  [6 4 4]]]
Label: [[ 8 -7]]

第一批 ds1 和第一批 ds2 应该以这样的方式合并给我以下输出:

---Batch---
Feature: [[[1 1 1 1 4 1]
  [2 2 2 2 2 2]
  [3 3 3 3 3 3]
  [4 4 4 6 4 4 ]]]
Label: [[-1 -1 8 -7]]

您可以使用 tf.concat 连接两个数据集:

import tensorflow as tf
import numpy as np
 
simple_features = np.array([
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3],
         [4, 4, 4],
         [5, 5, 5],
])
simple_labels = np.array([
         [-1, -1],
         [-2, -2],
         [-3, -3],
         [-4, -4],
         [-5, -5],
])
simple_features1 = np.array([
         [1, 4, 1],
         [2, 2, 2],
         [3, 3, 3],
         [6, 4, 4],
         [5, 4, 5],
])
simple_labels1 = np.array([
         [8, -7],
         [-2, -2],
         [-3, 7],
         [-4, 9],
         [-5, -5],
])

def print_dataset(ds):
    for inputs, targets in ds:
        print("---Batch---")
        print("Feature:", inputs.numpy())
        print("Label:", targets.numpy())
        print("")
        
ds1 = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features, simple_labels, sequence_length=4, batch_size=1)
ds2 = tf.keras.preprocessing.timeseries_dataset_from_array(simple_features1, simple_labels1, sequence_length=4, batch_size=1)

def merge(data1, data2):
  x1, y1 = data1
  x2, y2 = data2
  return tf.concat([x1, x2], axis=-1), tf.concat([y1, y2], axis=-1)

dataset = tf.data.Dataset.zip((ds1, ds2)).map(merge)
print_dataset(dataset)
---Batch---
Feature: [[[1 1 1 1 4 1]
  [2 2 2 2 2 2]
  [3 3 3 3 3 3]
  [4 4 4 6 4 4]]]
Label: [[-1 -1  8 -7]]

---Batch---
Feature: [[[2 2 2 2 2 2]
  [3 3 3 3 3 3]
  [4 4 4 6 4 4]
  [5 5 5 5 4 5]]]
Label: [[-2 -2 -2 -2]]