每次列中字符串更改时的布尔标记,按组
Boolean marker every time a string changes in a column, by group
我有以下数据框:
data = {'unit': {59: 'unit1',
662: 'unit1',
680: 'unit1',
725: 'unit1',
709: 'unit1',
703: 'unit1',
653: 'unit1',
807: 'unit4',
825: 'unit4',
778: 'unit4',
816: 'unit4',
822: 'unit4',
849: 'unit4',
820: 'unit4',
754: 'unit4',
1031: 'unit3',
1094: 'unit2',
1008: 'unit2',
1089: 'unit2',
1044: 'unit5'},
'Date_job': {59: datetime.date(2021, 6, 7),
662: datetime.date(2021, 6, 14),
680: datetime.date(2021, 7, 5),
725: datetime.date(2021, 7, 26),
709: datetime.date(2021, 8, 30),
703: datetime.date(2021, 10, 11),
653: datetime.date(2021, 10, 18),
807: datetime.date(2021, 7, 19),
825: datetime.date(2021, 7, 26),
778: datetime.date(2021, 8, 23),
816: datetime.date(2021, 8, 30),
822: datetime.date(2021, 9, 6),
849: datetime.date(2021, 9, 27),
820: datetime.date(2021, 10, 4),
754: datetime.date(2021, 10, 18),
1031: datetime.date(2021, 9, 6),
1094: datetime.date(2021, 7, 26),
1008: datetime.date(2021, 8, 9),
1089: datetime.date(2021, 10, 4),
1044: datetime.date(2021, 6, 14)},
'Vector': {59: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
662: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|5:1/8.0',
'B|6:1/5.0',
'B|7:1/5.0'],
680: ['A|14:1/9.0',
'A|14:1/4.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
725: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
709: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
703: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
653: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
807: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|4:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
825: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
778: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
816: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
822: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0'],
849: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
820: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/5.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
754: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
1031: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/10.0',
'A|5:1/2.0',
'A|6:1/12.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/6.0',
'A|9:1/2.0'],
1094: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/4.0',
'A|8:1/4.0',
'A|8:1/3.0',
'A|9:1/2.0'],
1008: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/4.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/9.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1089: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/2.0',
'A|6:1/11.0',
'A|6:1/6.0',
'A|7:1/9.0',
'A|7:1/3.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1044: ['A|10:1/6.0',
'A|10:1/6.0',
'A|5:1/4.0',
'A|5:1/4.0',
'A|6:1/10.0',
'A|6:1/9.0',
'A|6:1/9.0',
'A|7:1/8.0',
'A|7:1/8.0',
'A|8:1/3.0']}}
看起来像这样:
unit Date_job Vector
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0]
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0]
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0]
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0]
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0]
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0]
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0]
最后一列是向量,这里写成列表。如您所见,对于同一单位,此向量会随时间变化。我希望能够标记每个单元行之间的变化。我知道如何为整个数据框执行此操作,但我无法将 groupby
应用于此。
以下是我知道的方法:
df2["VectorChanged"] = df2["Vector"].shift(1, fill_value=df2["Vector"].head(1)) != df2["Vector"]
但是,正如你所理解的,当单位改变时它不会改变向量(这是正确的但不适用,因为我们不是在看同一个单位):
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] True
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] True
预期输出为:
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] False
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] False
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] False
即每个组的第一行 i False
因为没有可比较的东西,即没有变化。
如有任何帮助,我们将不胜感激。
像这样尝试使用 groupby
和 transform
:
df["Vector Changed"] = df.groupby("unit")["Vector"].transform(lambda x: x!=x.shift().bfill())
df["Vector Changed"] = df["Vector Changed"].mask(df.groupby("unit")["Vector"].transform("count")==1, False)
>>> df
unit Date_job Vector Vector Changed
59 unit1 2021-06-07 ... False
662 unit1 2021-06-14 ... True
680 unit1 2021-07-05 ... True
725 unit1 2021-07-26 ... True
709 unit1 2021-08-30 ... False
703 unit1 2021-10-11 ... True
653 unit1 2021-10-18 ... False
807 unit4 2021-07-19 ... False
825 unit4 2021-07-26 ... True
778 unit4 2021-08-23 ... True
816 unit4 2021-08-30 ... True
822 unit4 2021-09-06 ... True
849 unit4 2021-09-27 ... True
820 unit4 2021-10-04 ... True
754 unit4 2021-10-18 ... True
1031 unit3 2021-09-06 ... False
1094 unit2 2021-07-26 ... False
1008 unit2 2021-08-09 ... True
1089 unit2 2021-10-04 ... True
1044 unit5 2021-06-14 ... False
比较移动后的值,然后通过 unit
为第一个值设置 False
如果性能很重要,则使用 groupby
是不必要的:
df["Vector Changed"] = (df["Vector"].shift()
.ne(df['Vector'])
.where(df['unit'].duplicated(), False))
编辑:
如果可能,unit
列使用连续重复:
m = df["Vector"].shift().ne(df['Vector'])
g = df["unit"].shift().ne(df['unit']).cumsum()
df["Vector Changed"] = m.where(g.duplicated(), False)
print (df)
unit Date_job Vector \
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/1...
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
822 unit1 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
849 unit1 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
820 unit1 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 4 groups
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0...
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0...
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0,...
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0,...
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0,...
Vector Changed
59 False
662 True
680 True
725 True
709 False
703 True
653 False
807 False
825 True
778 True
816 True
822 False
849 True
820 True
754 False
1031 False
1094 False
1008 True
1089 True
1044 False
我有以下数据框:
data = {'unit': {59: 'unit1',
662: 'unit1',
680: 'unit1',
725: 'unit1',
709: 'unit1',
703: 'unit1',
653: 'unit1',
807: 'unit4',
825: 'unit4',
778: 'unit4',
816: 'unit4',
822: 'unit4',
849: 'unit4',
820: 'unit4',
754: 'unit4',
1031: 'unit3',
1094: 'unit2',
1008: 'unit2',
1089: 'unit2',
1044: 'unit5'},
'Date_job': {59: datetime.date(2021, 6, 7),
662: datetime.date(2021, 6, 14),
680: datetime.date(2021, 7, 5),
725: datetime.date(2021, 7, 26),
709: datetime.date(2021, 8, 30),
703: datetime.date(2021, 10, 11),
653: datetime.date(2021, 10, 18),
807: datetime.date(2021, 7, 19),
825: datetime.date(2021, 7, 26),
778: datetime.date(2021, 8, 23),
816: datetime.date(2021, 8, 30),
822: datetime.date(2021, 9, 6),
849: datetime.date(2021, 9, 27),
820: datetime.date(2021, 10, 4),
754: datetime.date(2021, 10, 18),
1031: datetime.date(2021, 9, 6),
1094: datetime.date(2021, 7, 26),
1008: datetime.date(2021, 8, 9),
1089: datetime.date(2021, 10, 4),
1044: datetime.date(2021, 6, 14)},
'Vector': {59: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
662: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|5:1/8.0',
'B|6:1/5.0',
'B|7:1/5.0'],
680: ['A|14:1/9.0',
'A|14:1/4.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
725: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
709: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
703: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
653: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
807: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|4:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
825: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
778: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
816: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
822: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0'],
849: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
820: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/5.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
754: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
1031: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/10.0',
'A|5:1/2.0',
'A|6:1/12.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/6.0',
'A|9:1/2.0'],
1094: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/4.0',
'A|8:1/4.0',
'A|8:1/3.0',
'A|9:1/2.0'],
1008: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/4.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/9.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1089: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/2.0',
'A|6:1/11.0',
'A|6:1/6.0',
'A|7:1/9.0',
'A|7:1/3.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1044: ['A|10:1/6.0',
'A|10:1/6.0',
'A|5:1/4.0',
'A|5:1/4.0',
'A|6:1/10.0',
'A|6:1/9.0',
'A|6:1/9.0',
'A|7:1/8.0',
'A|7:1/8.0',
'A|8:1/3.0']}}
看起来像这样:
unit Date_job Vector
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0]
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0]
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0]
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0]
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0]
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0]
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0]
最后一列是向量,这里写成列表。如您所见,对于同一单位,此向量会随时间变化。我希望能够标记每个单元行之间的变化。我知道如何为整个数据框执行此操作,但我无法将 groupby
应用于此。
以下是我知道的方法:
df2["VectorChanged"] = df2["Vector"].shift(1, fill_value=df2["Vector"].head(1)) != df2["Vector"]
但是,正如你所理解的,当单位改变时它不会改变向量(这是正确的但不适用,因为我们不是在看同一个单位):
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] True
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] True
预期输出为:
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] False
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] False
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] False
即每个组的第一行 i False
因为没有可比较的东西,即没有变化。
如有任何帮助,我们将不胜感激。
像这样尝试使用 groupby
和 transform
:
df["Vector Changed"] = df.groupby("unit")["Vector"].transform(lambda x: x!=x.shift().bfill())
df["Vector Changed"] = df["Vector Changed"].mask(df.groupby("unit")["Vector"].transform("count")==1, False)
>>> df
unit Date_job Vector Vector Changed
59 unit1 2021-06-07 ... False
662 unit1 2021-06-14 ... True
680 unit1 2021-07-05 ... True
725 unit1 2021-07-26 ... True
709 unit1 2021-08-30 ... False
703 unit1 2021-10-11 ... True
653 unit1 2021-10-18 ... False
807 unit4 2021-07-19 ... False
825 unit4 2021-07-26 ... True
778 unit4 2021-08-23 ... True
816 unit4 2021-08-30 ... True
822 unit4 2021-09-06 ... True
849 unit4 2021-09-27 ... True
820 unit4 2021-10-04 ... True
754 unit4 2021-10-18 ... True
1031 unit3 2021-09-06 ... False
1094 unit2 2021-07-26 ... False
1008 unit2 2021-08-09 ... True
1089 unit2 2021-10-04 ... True
1044 unit5 2021-06-14 ... False
比较移动后的值,然后通过 unit
为第一个值设置 False
如果性能很重要,则使用 groupby
是不必要的:
df["Vector Changed"] = (df["Vector"].shift()
.ne(df['Vector'])
.where(df['unit'].duplicated(), False))
编辑:
如果可能,unit
列使用连续重复:
m = df["Vector"].shift().ne(df['Vector'])
g = df["unit"].shift().ne(df['unit']).cumsum()
df["Vector Changed"] = m.where(g.duplicated(), False)
print (df)
unit Date_job Vector \
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/1...
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
822 unit1 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
849 unit1 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
820 unit1 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 4 groups
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0...
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0...
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0,...
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0,...
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0,...
Vector Changed
59 False
662 True
680 True
725 True
709 False
703 True
653 False
807 False
825 True
778 True
816 True
822 False
849 True
820 True
754 False
1031 False
1094 False
1008 True
1089 True
1044 False