Python 嵌套循环中缺少数据
Missing data in Python nested loop
我正在处理一个多维数据数组,其中包含个人的各种数据点。我创建了一个嵌套循环,允许我在整个数据集中进行度量计算,但是,一旦重新排列它,我就会丢失数据点。从我最初的 253 个人,我最终得到了 182 个人的计算指标。代码有效,但我不知道我在什么时候泄露数据。
data_array -- containing 253 individuals, each with several subcategories
mos0_ids=[]
mos0_dt = []
mos0_x_dpos = []
mos0_y_dpos = []
mos0_z_dpos = []
for i in range (0,252):
mos0=data_array[i]
mos0_id= mos0[0][0]
mos0_time=mos0[:,1]
mos0_x_pos=mos0[:,2]
mos0_y_pos=mos0[:,3]
mos0_z_pos=mos0[:,4]
mos0_speed=mos0[:,6]
for j in range(0,len(mos0_id)):
mos0_ids.append(mos0_id)
for k in range(0,len(mos0_time)):
first_mov_time=mos0_time[k]
last_mov_time=mos0_time[k-1]
first_movement = dt.datetime.strptime(first_mov_time, '%Y-%m-%d %H:%M:%S.%f')
last_movement = dt.datetime.strptime(last_mov_time, '%Y-%m-%d %H:%M:%S.%f')
x = first_movement - last_movement
total_seconds = x.total_seconds()
mos0_dt.append(total_seconds)
for l in range(0,len(mos0_x_pos)):
first_mov_pos=mos0_x_pos[l]
last_mov_pos=mos0_x_pos[l-1]
x = first_mov_pos - last_mov_pos
mos0_x_dpos.append(x)
for m in range(0,len(mos0_y_pos)):
first_mov_pos=mos0_y_pos[m]
last_mov_pos=mos0_y_pos[m-1]
x = first_mov_pos - last_mov_pos
mos0_y_dpos.append(x)
for n in range(0,len(mos0_z_pos)):
first_mov_pos=mos0_z_pos[n]
last_mov_pos=mos0_z_pos[n-1]
x = first_mov_pos - last_mov_pos
mos0_z_dpos.append(x)
mos0_ids
mos0_dt
mos0_x_dpos
mos0_y_dpos
mos0_z_dpos
time_pos=list(zip(mos0_ids, mos0_dt, mos0_x_dpos, mos0_y_dpos, mos0_z_dpos))
time_pos=pd.DataFrame(time_pos,columns=['mos_id','dtime', 'x_position', 'y_position','z_position']) # transform into a dataframe
time_pos['x_velocity'] = time_pos['x_position']/time_pos['dtime']
time_pos['y_velocity'] = time_pos['y_position']/time_pos['dtime']
time_pos['z_velocity'] = time_pos['z_position']/time_pos['dtime']
time_pos['x_acceleration'] = time_pos['x_velocity']/time_pos['dtime']
time_pos['y_acceleration'] = time_pos['y_velocity']/time_pos['dtime']
time_pos['z_acceleration'] = time_pos['z_velocity']/time_pos['dtime']
time_pos=time_pos.groupby('mos_id')
time_pos = np.array(time_pos, dtype=object)
time_pos
编辑:
我重新安排了包含 for i in range (0,253)
和缩进的代码,如下所示:
for i in range (0,253):
mos0=swarm_data_array[i]
mos0_id= mos0[0][0]
mos0_time=mos0[:,1]
mos0_x_pos=mos0[:,2]
mos0_y_pos=mos0[:,3]
mos0_z_pos=mos0[:,4]
mos0_speed=mos0[:,6]
for j in range(len(mos0_id)):
mos0_ids.append(mos0_id)
for k in range(len(mos0_time)):
first_mov_time=mos0_time[k]
last_mov_time=mos0_time[k-1]
first_movement = dt.datetime.strptime(first_mov_time, '%Y-%m-%d %H:%M:%S.%f')
last_movement = dt.datetime.strptime(last_mov_time, '%Y-%m-%d %H:%M:%S.%f')
x = first_movement - last_movement
total_seconds = x.total_seconds()
mos0_dt.append(total_seconds)
for l in range(len(mos0_x_pos)):
first_mov_pos=mos0_x_pos[l]
last_mov_pos=mos0_x_pos[l-1]
x = first_mov_pos - last_mov_pos
mos0_x_dpos.append(x)
for m in range(len(mos0_y_pos)):
first_mov_pos=mos0_y_pos[m]
last_mov_pos=mos0_y_pos[m-1]
x = first_mov_pos - last_mov_pos
mos0_y_dpos.append(x)
for n in range(len(mos0_z_pos)):
first_mov_pos=mos0_z_pos[n]
last_mov_pos=mos0_z_pos[n-1]
x = first_mov_pos - last_mov_pos
mos0_z_dpos.append(x)
mos0_ids
mos0_dt
mos0_x_dpos
mos0_y_dpos
mos0_z_dpos
time_pos=list(zip(mos0_ids, mos0_dt, mos0_x_dpos, mos0_y_dpos, mos0_z_dpos))
time_pos=pd.DataFrame(time_pos,columns=['mos_id','dtime', 'x_position', 'y_position','z_position']) # transform into a dataframe
time_pos['x_velocity'] = time_pos['x_position']/time_pos['dtime']
time_pos['y_velocity'] = time_pos['y_position']/time_pos['dtime']
time_pos['z_velocity'] = time_pos['z_position']/time_pos['dtime']
time_pos['x_acceleration'] = time_pos['x_velocity']/time_pos['dtime']
time_pos['y_acceleration'] = time_pos['y_velocity']/time_pos['dtime']
time_pos['z_acceleration'] = time_pos['z_velocity']/time_pos['dtime']
time_pos=time_pos.groupby('mos_id')
现在的问题是,在我使用 GroupBy 组织我的数据并应用 .describe() 函数后,我得到每组 26 个常量计数,这是不正确的。有些团体比其他团体更大。这可能是嵌套循环任何部分的错误吗?
您可能错过了 range() 的一个“特定”行为。
您的第一个非常简化的循环将只有 252
值,而不是 253
在控制台中试试这个:
len(range(0,252))
-> 252
所以我假设它是嵌套的 arr(矩阵),根据它应该为每个 col/row 进行的几次计算,它会丢失大量数据。
解决方案:
for i in range(0, 253)
或 for i in range(len(data_array) + 1)
我假设您提供的所有 for
循环都发生了同样的情况
我正在处理一个多维数据数组,其中包含个人的各种数据点。我创建了一个嵌套循环,允许我在整个数据集中进行度量计算,但是,一旦重新排列它,我就会丢失数据点。从我最初的 253 个人,我最终得到了 182 个人的计算指标。代码有效,但我不知道我在什么时候泄露数据。
data_array -- containing 253 individuals, each with several subcategories
mos0_ids=[]
mos0_dt = []
mos0_x_dpos = []
mos0_y_dpos = []
mos0_z_dpos = []
for i in range (0,252):
mos0=data_array[i]
mos0_id= mos0[0][0]
mos0_time=mos0[:,1]
mos0_x_pos=mos0[:,2]
mos0_y_pos=mos0[:,3]
mos0_z_pos=mos0[:,4]
mos0_speed=mos0[:,6]
for j in range(0,len(mos0_id)):
mos0_ids.append(mos0_id)
for k in range(0,len(mos0_time)):
first_mov_time=mos0_time[k]
last_mov_time=mos0_time[k-1]
first_movement = dt.datetime.strptime(first_mov_time, '%Y-%m-%d %H:%M:%S.%f')
last_movement = dt.datetime.strptime(last_mov_time, '%Y-%m-%d %H:%M:%S.%f')
x = first_movement - last_movement
total_seconds = x.total_seconds()
mos0_dt.append(total_seconds)
for l in range(0,len(mos0_x_pos)):
first_mov_pos=mos0_x_pos[l]
last_mov_pos=mos0_x_pos[l-1]
x = first_mov_pos - last_mov_pos
mos0_x_dpos.append(x)
for m in range(0,len(mos0_y_pos)):
first_mov_pos=mos0_y_pos[m]
last_mov_pos=mos0_y_pos[m-1]
x = first_mov_pos - last_mov_pos
mos0_y_dpos.append(x)
for n in range(0,len(mos0_z_pos)):
first_mov_pos=mos0_z_pos[n]
last_mov_pos=mos0_z_pos[n-1]
x = first_mov_pos - last_mov_pos
mos0_z_dpos.append(x)
mos0_ids
mos0_dt
mos0_x_dpos
mos0_y_dpos
mos0_z_dpos
time_pos=list(zip(mos0_ids, mos0_dt, mos0_x_dpos, mos0_y_dpos, mos0_z_dpos))
time_pos=pd.DataFrame(time_pos,columns=['mos_id','dtime', 'x_position', 'y_position','z_position']) # transform into a dataframe
time_pos['x_velocity'] = time_pos['x_position']/time_pos['dtime']
time_pos['y_velocity'] = time_pos['y_position']/time_pos['dtime']
time_pos['z_velocity'] = time_pos['z_position']/time_pos['dtime']
time_pos['x_acceleration'] = time_pos['x_velocity']/time_pos['dtime']
time_pos['y_acceleration'] = time_pos['y_velocity']/time_pos['dtime']
time_pos['z_acceleration'] = time_pos['z_velocity']/time_pos['dtime']
time_pos=time_pos.groupby('mos_id')
time_pos = np.array(time_pos, dtype=object)
time_pos
编辑:
我重新安排了包含 for i in range (0,253)
和缩进的代码,如下所示:
for i in range (0,253):
mos0=swarm_data_array[i]
mos0_id= mos0[0][0]
mos0_time=mos0[:,1]
mos0_x_pos=mos0[:,2]
mos0_y_pos=mos0[:,3]
mos0_z_pos=mos0[:,4]
mos0_speed=mos0[:,6]
for j in range(len(mos0_id)):
mos0_ids.append(mos0_id)
for k in range(len(mos0_time)):
first_mov_time=mos0_time[k]
last_mov_time=mos0_time[k-1]
first_movement = dt.datetime.strptime(first_mov_time, '%Y-%m-%d %H:%M:%S.%f')
last_movement = dt.datetime.strptime(last_mov_time, '%Y-%m-%d %H:%M:%S.%f')
x = first_movement - last_movement
total_seconds = x.total_seconds()
mos0_dt.append(total_seconds)
for l in range(len(mos0_x_pos)):
first_mov_pos=mos0_x_pos[l]
last_mov_pos=mos0_x_pos[l-1]
x = first_mov_pos - last_mov_pos
mos0_x_dpos.append(x)
for m in range(len(mos0_y_pos)):
first_mov_pos=mos0_y_pos[m]
last_mov_pos=mos0_y_pos[m-1]
x = first_mov_pos - last_mov_pos
mos0_y_dpos.append(x)
for n in range(len(mos0_z_pos)):
first_mov_pos=mos0_z_pos[n]
last_mov_pos=mos0_z_pos[n-1]
x = first_mov_pos - last_mov_pos
mos0_z_dpos.append(x)
mos0_ids
mos0_dt
mos0_x_dpos
mos0_y_dpos
mos0_z_dpos
time_pos=list(zip(mos0_ids, mos0_dt, mos0_x_dpos, mos0_y_dpos, mos0_z_dpos))
time_pos=pd.DataFrame(time_pos,columns=['mos_id','dtime', 'x_position', 'y_position','z_position']) # transform into a dataframe
time_pos['x_velocity'] = time_pos['x_position']/time_pos['dtime']
time_pos['y_velocity'] = time_pos['y_position']/time_pos['dtime']
time_pos['z_velocity'] = time_pos['z_position']/time_pos['dtime']
time_pos['x_acceleration'] = time_pos['x_velocity']/time_pos['dtime']
time_pos['y_acceleration'] = time_pos['y_velocity']/time_pos['dtime']
time_pos['z_acceleration'] = time_pos['z_velocity']/time_pos['dtime']
time_pos=time_pos.groupby('mos_id')
现在的问题是,在我使用 GroupBy 组织我的数据并应用 .describe() 函数后,我得到每组 26 个常量计数,这是不正确的。有些团体比其他团体更大。这可能是嵌套循环任何部分的错误吗?
您可能错过了 range() 的一个“特定”行为。
您的第一个非常简化的循环将只有 252
值,而不是 253
在控制台中试试这个:
len(range(0,252))
-> 252
所以我假设它是嵌套的 arr(矩阵),根据它应该为每个 col/row 进行的几次计算,它会丢失大量数据。
解决方案:
for i in range(0, 253)
或 for i in range(len(data_array) + 1)
我假设您提供的所有 for
循环都发生了同样的情况