以 3 为一组分配列值 - Python
Assign Column values in groups of 3 - Python
我的目标是 assign
位给 pandas
df
的人。具体来说,我使用下面的 df
确定当前 在 上的位置数。我想使用这些值并 assign
它们以 3 个为一组。
例如,总出现次数小于 3 的应分配给 P1
。 3-6 的位置应分配给 P2
等
注意:一次出现的地点总数最多可达 20 个,因此分配的组数需要为此提供支持。
这是我的尝试。
import pandas as pd
import numpy as np
d = ({
'Time' : ['8:03:00','8:07:00','8:10:00','8:23:00','8:27:00','8:30:00','8:37:00','8:40:00','8:48:00'],
'Place' : ['House 1','House 2','House 3','House 4','House 5','House 1','House 2','House 3','House 4'],
})
df = pd.DataFrame(data=d)
df['u'] = df[::-1].groupby('Place').Place.cumcount()
ids = [1]
seen = set([df.iloc[0].Place])
dec = False
for val, u in zip(df.Place[1:], df.u[1:]):
ids.append(ids[-1] + (val not in seen) - dec)
seen.add(val)
dec = u == 0
df['Places On'] = ids
df = df.drop(df[['u']], axis=1)
def g(gps):
s = gps['Place'].unique()
d = dict(zip(s, np.arange(len(s)) // 3 + 1))
gps['P'] = gps['Place'].map(d)
return gps
df = df.groupby('Place', sort=False).apply(g)
输出:
Time Place Places On P
0 8:03:00 House 1 1 1
1 8:07:00 House 2 2 1
2 8:10:00 House 3 3 1
3 8:23:00 House 4 4 1
4 8:27:00 House 5 5 1
5 8:30:00 House 1 4 1
6 8:37:00 House 2 3 1
7 8:40:00 House 3 2 1
8 8:48:00 House 4 1 1
预期输出:
Time Place Places On P
0 8:03:00 House 1 1 1
1 8:07:00 House 2 2 1
2 8:10:00 House 3 3 1
3 8:23:00 House 4 4 2
4 8:27:00 House 5 5 2
5 8:30:00 House 1 4 2
6 8:37:00 House 2 3 1
7 8:40:00 House 3 2 1
8 8:48:00 House 4 1 1
import pandas as pd
import numpy as np
d = ({
'Time' : ['8:03:00','8:07:00','8:10:00','8:23:00','8:27:00','8:30:00','8:37:00','8:40:00','8:48:00'],
'Place' : ['House 1','House 2','House 3','House 4','House 5','House 1','House 2','House 3','House 4'],
})
df = pd.DataFrame(data=d)
df['u'] = df[::-1].groupby('Place').Place.cumcount()
ids = [1]
seen = set([df.iloc[0].Place])
dec = False
for val, u in zip(df.Place[1:], df.u[1:]):
ids.append(ids[-1] + (val not in seen) - dec)
seen.add(val)
dec = u == 0
df['Places On'] = ids
df = df.drop(df[['u']], axis=1)
def g(gps):
s = gps['Place'].unique()
d = dict(zip(s, np.arange(len(s)) // 3 + 1))
gps['P'] = gps['Place'].map(d)
return gps
df = df.groupby('Place', sort=False).apply(g)
for i in range(df.shape[0]):
if(df['Places On'][i]<=3):
df['P'][i]=1
else:
df['P'][i]=2
print(df)
这应该基于排序 df['Places On'].
我的目标是 assign
位给 pandas
df
的人。具体来说,我使用下面的 df
确定当前 在 上的位置数。我想使用这些值并 assign
它们以 3 个为一组。
例如,总出现次数小于 3 的应分配给 P1
。 3-6 的位置应分配给 P2
等
注意:一次出现的地点总数最多可达 20 个,因此分配的组数需要为此提供支持。
这是我的尝试。
import pandas as pd
import numpy as np
d = ({
'Time' : ['8:03:00','8:07:00','8:10:00','8:23:00','8:27:00','8:30:00','8:37:00','8:40:00','8:48:00'],
'Place' : ['House 1','House 2','House 3','House 4','House 5','House 1','House 2','House 3','House 4'],
})
df = pd.DataFrame(data=d)
df['u'] = df[::-1].groupby('Place').Place.cumcount()
ids = [1]
seen = set([df.iloc[0].Place])
dec = False
for val, u in zip(df.Place[1:], df.u[1:]):
ids.append(ids[-1] + (val not in seen) - dec)
seen.add(val)
dec = u == 0
df['Places On'] = ids
df = df.drop(df[['u']], axis=1)
def g(gps):
s = gps['Place'].unique()
d = dict(zip(s, np.arange(len(s)) // 3 + 1))
gps['P'] = gps['Place'].map(d)
return gps
df = df.groupby('Place', sort=False).apply(g)
输出:
Time Place Places On P
0 8:03:00 House 1 1 1
1 8:07:00 House 2 2 1
2 8:10:00 House 3 3 1
3 8:23:00 House 4 4 1
4 8:27:00 House 5 5 1
5 8:30:00 House 1 4 1
6 8:37:00 House 2 3 1
7 8:40:00 House 3 2 1
8 8:48:00 House 4 1 1
预期输出:
Time Place Places On P
0 8:03:00 House 1 1 1
1 8:07:00 House 2 2 1
2 8:10:00 House 3 3 1
3 8:23:00 House 4 4 2
4 8:27:00 House 5 5 2
5 8:30:00 House 1 4 2
6 8:37:00 House 2 3 1
7 8:40:00 House 3 2 1
8 8:48:00 House 4 1 1
import pandas as pd
import numpy as np
d = ({
'Time' : ['8:03:00','8:07:00','8:10:00','8:23:00','8:27:00','8:30:00','8:37:00','8:40:00','8:48:00'],
'Place' : ['House 1','House 2','House 3','House 4','House 5','House 1','House 2','House 3','House 4'],
})
df = pd.DataFrame(data=d)
df['u'] = df[::-1].groupby('Place').Place.cumcount()
ids = [1]
seen = set([df.iloc[0].Place])
dec = False
for val, u in zip(df.Place[1:], df.u[1:]):
ids.append(ids[-1] + (val not in seen) - dec)
seen.add(val)
dec = u == 0
df['Places On'] = ids
df = df.drop(df[['u']], axis=1)
def g(gps):
s = gps['Place'].unique()
d = dict(zip(s, np.arange(len(s)) // 3 + 1))
gps['P'] = gps['Place'].map(d)
return gps
df = df.groupby('Place', sort=False).apply(g)
for i in range(df.shape[0]):
if(df['Places On'][i]<=3):
df['P'][i]=1
else:
df['P'][i]=2
print(df)
这应该基于排序 df['Places On'].