将 Numpy 文件转换为 TFRecord,其中每行包含一个数字和一个可变长度列表
Converting a Numpy file to TFRecord where each row contains a number, and a variable length list
这是对这两个 SO 问题的跟进
第一个提到tfrecords可以使用tf.VarLenFeature()
处理变长数据
不过,我仍然无法弄清楚如何将我的数字数组转换为 tfrecord 文件。这是前 10 行的样子
print(my_data[0:15])
[[1446549
list([491827, 30085, 1417541, 799563, 879302, 1997973, 1373049, 1460602, 2240973, 1172992, 1186011, 147536, 1958456, 3095889, 319954, 2191582, 1113354, 302626, 1985611, 1186704, 2231212, 2642148, 386962, 3072993, 1131255, 15085, 2714264, 1363205])]
[406529 list([900479, 660976, 1270383, 1287181])]
[1350274
list([207721, 676951, 1311781, 2712019, 1719660, 2969693, 37187, 2284531, 1253304, 1274866, 2815382, 1513583, 1339084, 1624616, 2967307, 1702118, 585261, 426595, 1444507, 1982792])]
[1163243
list([324383, 81509, 322474, 406941, 768416, 109067, 173425, 1478467, 573723, 1009159, 313463, 313924, 627680, 1072293, 1025620, 2325337, 2457705, 1505115, 2812547, 922812, 2152425, 2524196, 182325, 2912690, 1388620, 1484514, 1481728, 2616639, 2180765, 1544586, 1987272, 1557441, 453182, 892217, 1462085, 1892770, 1646735, 2521186, 2814552, 2983691, 3037096, 832554, 2807250, 2253333, 2595688, 2650475, 2525317, 2716592, 2573244, 2666514, 256757, 1135836, 1856208, 2605537, 1851963, 2381938, 1716883, 773842, 1877852, 2504806, 2208699, 1076111, 3058991, 3024546, 2010887, 2630915])]
[2491621 list([877803, 546802, 2855232, 2950610, 1378514, 285536])]
[2465968
list([626040, 1151291, 560715, 1153787, 893941, 3094902, 1239392, 2081948, 1677321, 1193880, 2326117, 2805797, 1715983, 1213177, 1476995, 2620772, 1242804, 2942330, 588938, 2338375, 2805378, 169015, 1766962, 562485, 1210404, 772334, 415148, 1293624, 527245, 587088, 665484, 449673, 315509])]
[886255
list([694445, 796232, 1151072, 2312348, 1773175, 1898319, 1696093, 91310, 719379, 2080422, 1352695, 2364846, 845154, 2476191, 537059, 1216854, 1529449, 284855, 1215830, 3041789, 1625939])]
[451113
list([2805707, 2727647, 742706, 1727139, 2585759, 822759, 1099617])]
[1529600
list([1755946, 2110553, 1056110, 426876, 2448684, 396996, 1498300, 756831, 2181288, 1159493])]
[596550 list([1610895, 2579387, 3081786, 2000733, 2142308])]
[1631548
list([1576412, 849908, 2705650, 2291675, 751733, 1911747, 1496204])]
[3001784 list([327334, 1197547, 2515733])]
[308747
list([8344, 80684, 996504, 2250076, 1905654, 863587, 2235560, 2676079, 1826, 685487, 1481871, 588465, 1126662, 2458841, 2481927])]
[731288 list([2793620, 1115724, 1406934])]
[1523219
list([12825, 1128776, 1761080, 1486798, 2689369, 1040645, 3012606])]]
阅读起来可能有点棘手,但这是一个较小行的实例
[406529 list([900479, 660976, 1270383, 1287181])]
每个现在都包含一个数字和一个数字列表,该列表的长度各不相同。
我无法确定如何将其转换为 tfrecord 文件。任何 help/hints 将不胜感激。
我假设你想在这里分别添加数字功能和列表功能。
import tensorflow as tf
writer = tf.python_io.TFRecordWriter('test.tfrecords')
for index in range(my_data.shape[0]):
example = tf.train.Example(features=tf.train.Features(feature={
'num_value':tf.train.Feature(int64_list=tf.train.Int64List(value=[my_data[index][0]])),
'list_value':tf.train.Feature(int64_list=tf.train.Int64List(value=my_data[index][1]))
}))
writer.write(example.SerializeToString())
writer.close()
#read data from tfrecords
record_iterator = tf.python_io.tf_record_iterator('test.tfrecords')
for _ in range(2):
seralized_img_example = next(record_iterator)
example = tf.train.Example()
example.ParseFromString(seralized_img_example)
num_value = example.features.feature['num_value'].int64_list.value[0]
list_value = example.features.feature['list_value'].int64_list.value
print(num_value,list_value)
#print
1446549 [491827, 30085, 1417541, 799563, 879302, 1997973, 1373049, 1460602, 2240973, 1172992, 1186011, 147536, 1958456, 3095889, 319954, 2191582, 1113354, 302626, 1985611, 1186704, 2231212, 2642148, 386962, 3072993, 1131255, 15085, 2714264, 1363205]
406529 [900479, 660976, 1270383, 1287181]
这是对这两个 SO 问题的跟进
第一个提到tfrecords可以使用tf.VarLenFeature()
不过,我仍然无法弄清楚如何将我的数字数组转换为 tfrecord 文件。这是前 10 行的样子
print(my_data[0:15])
[[1446549
list([491827, 30085, 1417541, 799563, 879302, 1997973, 1373049, 1460602, 2240973, 1172992, 1186011, 147536, 1958456, 3095889, 319954, 2191582, 1113354, 302626, 1985611, 1186704, 2231212, 2642148, 386962, 3072993, 1131255, 15085, 2714264, 1363205])]
[406529 list([900479, 660976, 1270383, 1287181])]
[1350274
list([207721, 676951, 1311781, 2712019, 1719660, 2969693, 37187, 2284531, 1253304, 1274866, 2815382, 1513583, 1339084, 1624616, 2967307, 1702118, 585261, 426595, 1444507, 1982792])]
[1163243
list([324383, 81509, 322474, 406941, 768416, 109067, 173425, 1478467, 573723, 1009159, 313463, 313924, 627680, 1072293, 1025620, 2325337, 2457705, 1505115, 2812547, 922812, 2152425, 2524196, 182325, 2912690, 1388620, 1484514, 1481728, 2616639, 2180765, 1544586, 1987272, 1557441, 453182, 892217, 1462085, 1892770, 1646735, 2521186, 2814552, 2983691, 3037096, 832554, 2807250, 2253333, 2595688, 2650475, 2525317, 2716592, 2573244, 2666514, 256757, 1135836, 1856208, 2605537, 1851963, 2381938, 1716883, 773842, 1877852, 2504806, 2208699, 1076111, 3058991, 3024546, 2010887, 2630915])]
[2491621 list([877803, 546802, 2855232, 2950610, 1378514, 285536])]
[2465968
list([626040, 1151291, 560715, 1153787, 893941, 3094902, 1239392, 2081948, 1677321, 1193880, 2326117, 2805797, 1715983, 1213177, 1476995, 2620772, 1242804, 2942330, 588938, 2338375, 2805378, 169015, 1766962, 562485, 1210404, 772334, 415148, 1293624, 527245, 587088, 665484, 449673, 315509])]
[886255
list([694445, 796232, 1151072, 2312348, 1773175, 1898319, 1696093, 91310, 719379, 2080422, 1352695, 2364846, 845154, 2476191, 537059, 1216854, 1529449, 284855, 1215830, 3041789, 1625939])]
[451113
list([2805707, 2727647, 742706, 1727139, 2585759, 822759, 1099617])]
[1529600
list([1755946, 2110553, 1056110, 426876, 2448684, 396996, 1498300, 756831, 2181288, 1159493])]
[596550 list([1610895, 2579387, 3081786, 2000733, 2142308])]
[1631548
list([1576412, 849908, 2705650, 2291675, 751733, 1911747, 1496204])]
[3001784 list([327334, 1197547, 2515733])]
[308747
list([8344, 80684, 996504, 2250076, 1905654, 863587, 2235560, 2676079, 1826, 685487, 1481871, 588465, 1126662, 2458841, 2481927])]
[731288 list([2793620, 1115724, 1406934])]
[1523219
list([12825, 1128776, 1761080, 1486798, 2689369, 1040645, 3012606])]]
阅读起来可能有点棘手,但这是一个较小行的实例
[406529 list([900479, 660976, 1270383, 1287181])]
每个现在都包含一个数字和一个数字列表,该列表的长度各不相同。
我无法确定如何将其转换为 tfrecord 文件。任何 help/hints 将不胜感激。
我假设你想在这里分别添加数字功能和列表功能。
import tensorflow as tf
writer = tf.python_io.TFRecordWriter('test.tfrecords')
for index in range(my_data.shape[0]):
example = tf.train.Example(features=tf.train.Features(feature={
'num_value':tf.train.Feature(int64_list=tf.train.Int64List(value=[my_data[index][0]])),
'list_value':tf.train.Feature(int64_list=tf.train.Int64List(value=my_data[index][1]))
}))
writer.write(example.SerializeToString())
writer.close()
#read data from tfrecords
record_iterator = tf.python_io.tf_record_iterator('test.tfrecords')
for _ in range(2):
seralized_img_example = next(record_iterator)
example = tf.train.Example()
example.ParseFromString(seralized_img_example)
num_value = example.features.feature['num_value'].int64_list.value[0]
list_value = example.features.feature['list_value'].int64_list.value
print(num_value,list_value)
#print
1446549 [491827, 30085, 1417541, 799563, 879302, 1997973, 1373049, 1460602, 2240973, 1172992, 1186011, 147536, 1958456, 3095889, 319954, 2191582, 1113354, 302626, 1985611, 1186704, 2231212, 2642148, 386962, 3072993, 1131255, 15085, 2714264, 1363205]
406529 [900479, 660976, 1270383, 1287181]