迭代数据框或 pytable 中的记录的正确语法是什么?
What is the correct syntax for iterating over records in a dataframe or pytable?
import pandas as pd
from pandas import DataFrame
import tables as pytb
with pytb.open_file('debug_counts.h5', mode='r') as h5file:
table = h5file.get_node('/tbl_main')
print("number of rows in table =", table.nrows)
i = 0
j = 0
for row in table:
j += 1
if row['symbol'] == b"foo":
i += 1
print("table all records count =", j)
print("table foo records count =", i)
df = pd.DataFrame.from_records(table.read_where('(symbol == b"foo")'))
print("dataframe size =", df.size)
i = 0
for index, row in df.iterrows():
i += 1
print("dataframe records count =", i)
i = 0
for record in table.where('(symbol == b"foo")'):
i += 1
print("table.where records count =", i)
h5file.close()
输出:
runfile('G:/$HDF5/debug_counts.py', wdir='G:/$HDF5')
number of rows in table = 2826254
table all records count = 2826254
table foo records count = 37920
dataframe size = 985920
dataframe records count = 37920
table.where records count = 37920
较大的数字都是正确的。 37920 数字不正确,或者至少不是我想要的。我如何获得我正在寻找的输出(985920,而不是 37920),37920 来自哪里?
这就是我编写代码来获取行数和大小数的方法。我无法测试你的例子。如果有无意的打字错误,我们深表歉意。
import tables as pytb
with pytb.open_file('debug_counts.h5', mode='r') as h5file:
table = h5file.get_node('/tbl_main')
sym_list = table.get_where_list('(symbol == b"foo")')
print("table.get_where_list retrieved =", len(sym_list), "rows" )
sym_array = table.read_where('(symbol == b"foo")')
print("table.read_where array dtype=" , sym_array.dtype)
print("table.read_where array shape=" , sym_array.shape)
print("table.read_where array size =" , sym_array.size)
import pandas as pd
from pandas import DataFrame
import tables as pytb
with pytb.open_file('debug_counts.h5', mode='r') as h5file:
table = h5file.get_node('/tbl_main')
print("number of rows in table =", table.nrows)
i = 0
j = 0
for row in table:
j += 1
if row['symbol'] == b"foo":
i += 1
print("table all records count =", j)
print("table foo records count =", i)
df = pd.DataFrame.from_records(table.read_where('(symbol == b"foo")'))
print("dataframe size =", df.size)
i = 0
for index, row in df.iterrows():
i += 1
print("dataframe records count =", i)
i = 0
for record in table.where('(symbol == b"foo")'):
i += 1
print("table.where records count =", i)
h5file.close()
输出:
runfile('G:/$HDF5/debug_counts.py', wdir='G:/$HDF5')
number of rows in table = 2826254
table all records count = 2826254
table foo records count = 37920
dataframe size = 985920
dataframe records count = 37920
table.where records count = 37920
较大的数字都是正确的。 37920 数字不正确,或者至少不是我想要的。我如何获得我正在寻找的输出(985920,而不是 37920),37920 来自哪里?
这就是我编写代码来获取行数和大小数的方法。我无法测试你的例子。如果有无意的打字错误,我们深表歉意。
import tables as pytb
with pytb.open_file('debug_counts.h5', mode='r') as h5file:
table = h5file.get_node('/tbl_main')
sym_list = table.get_where_list('(symbol == b"foo")')
print("table.get_where_list retrieved =", len(sym_list), "rows" )
sym_array = table.read_where('(symbol == b"foo")')
print("table.read_where array dtype=" , sym_array.dtype)
print("table.read_where array shape=" , sym_array.shape)
print("table.read_where array size =" , sym_array.size)