向 R 数据框中的重复行条目添加前缀
Adding prefix to the duplicated row entries in R dataframe
我使用以下命令在 R studio 中创建了以下 Dataframe
Sample_df$First_Val<-NA
# WE NOW FILL IN THE VALUES AS NEEDED
Sample_df$First_Val[c(2,3,5,10,14,19,20,21,25,30)]<-paste0("[LO]_",
c(2,3,5,10,14,19,20,21,25,30))
## WE NOW FILL IN THE INTERIM VALUES HERE
Sample_df$First_Val[1]<-"A1"
Sample_df$First_Val[19]<-"A19"
Sample_df$First_Val[3]<-NA
Sample_df$First_Val[4]<-c("A4")
Sample_df$First_Val[c(6,7,8,9)]<-c("B", "C", "D", "A2")
Sample_df$First_Val[c(11,12,13)]<-c("P", "Q", "A3")
Sample_df$First_Val[c(15,16,17,18)]<-c("B", "C", "D", "D")
Sample_df$First_Val[c(22,23,24)]<-c("P", "Q", "A_5")
Sample_df$First_Val[c(26,27,28, 29)]<-c("M", "N", "P", "A_6")
Sample_df$First_Val[is.na(Sample_df$First_Val)]<-c("M", "B", "O", "M",
'AQII', "B", "o", "L", 'I', "U", "[LO]")
数据框如下所示
SlN0 First_Val
1 A1
2 [LO]_2
3 M
4 A4
5 [LO]_5
6 B
7 C
8 D
9 A2
10 [LO]_10
11 P
12 Q
13 A3
14 [LO]_14
15 B
16 C
17 D
18 D
19 A19
20 [LO]_20
21 [LO]_21
22 P
23 Q
24 A_5
25 [LO]_25
26 M
27 N
28 P
29 A_6
30 [LO]_30
31 B
32 O
33 M
34 AQII
35 B
36 o
37 L
38 I
39 U
40 [LO]
41 M
以下是预期输出
SlN0 First_Val
1 A1
2 [LO]_2
3 A1_[LO]_2_M
4 A4
5 [LO]_5
6 A4_[LO]_5_B
7 A4_[LO]_5_C
8 A4_[LO]_5_D
9 A2
10 [LO]_10
11 A2_[LO]_10_P
12 A2_[LO]_10_Q
13 A3
14 [LO]_14
15 A3_[LO]_14_B
16 A3_[LO]_14_C
17 A3_[LO]_14_D
18 A3_[LO]_14_D
19 A19
20 [LO]_20
21 [LO]_21
22 A19_[LO]_20_P
23 A19_[LO]_20_Q
24 A_5
25 [LO]_25
26 A_5_[LO]_25_M
27 A_5_[LO]_25_N
28 A_5_[LO]_25_P
29 A_6
30 [LO]_30
31 A_6_[LO]_30_B
32 A_6_[LO]_30_O
33 A_6_[LO]_30_M
34 AQII
35 AQII_B
36 o
37 L
38 I
39 U
40 [LO]
41 [LO]_M
First_Val栏变化如下。
如果 First_Val 列中的 2 个 [LO] 值之间存在重复的条目,我们会附加(使用粘贴)紧接在前面的 2 个值。因此,First_Val 列 (M) 中的条目 3 更改为 A1_[LO]_2_M。
如果一个条目没有重复值,例如 o、L、I 等 - 这些值保持不变,因为它们是唯一值。
如果在一系列重复项之前有多个 [LO] 值,则将最新的 [LO] 值与前面的值一起附加。示例 - 第 22,23 行。因此,前面有一个唯一值的重复条目将与该唯一值一起粘贴。如果在重复值之前有 2 个唯一值,我们将这两个值都粘贴到重复值中。
我请人帮忙。
循序渐进。我不明白 AQII 规则是什么,但这是一个开始的方式:
library(tidyverse)
library(stringr)
d <- read.table(text = "SlN0 First_Val
1 A1
2 [LO]_2
3 M
4 A4
5 [LO]_5
6 B
7 C
8 D
9 A2
10 [LO]_10
11 P
12 Q
13 A3
14 [LO]_14
15 B
16 C
17 D
18 D
19 A19
20 [LO]_20
21 [LO]_21
22 P
23 Q
24 A_5
25 [LO]_25
26 M
27 N
28 P
29 A_6
30 [LO]_30
31 B
32 O
33 M
34 AQII
35 B
36 o
37 L
38 I
39 U
40 [LO]
41 M", stringsAsFactors = FALSE, header = TRUE)
d %>%
mutate(a_var = if_else(str_detect(First_Val, "A[_]?\d*" ), First_Val, as.character(NA)),
lo_var = if_else(str_detect(First_Val, "\[LO\]*" ), First_Val, as.character(NA))) %>%
mutate(flag_a = if_else(!is.na(a_var), 1L, 0L),
flag_lo = if_else(!is.na(lo_var), 1L, 0L)) %>%
fill(a_var, lo_var) %>%
mutate(want = case_when(flag_a == 1L ~ a_var,
flag_lo == 1L ~ lo_var,
TRUE ~ str_c(a_var, lo_var, "_", First_Val)))
# SlN0 First_Val a_var lo_var flag_a flag_lo want
# 1 1 A1 A1 <NA> 1 0 A1
# 2 2 [LO]_2 A1 [LO]_2 0 1 [LO]_2
# 3 3 M A1 [LO]_2 0 0 A1[LO]_2_M
# 4 4 A4 A4 [LO]_2 1 0 A4
# 5 5 [LO]_5 A4 [LO]_5 0 1 [LO]_5
# 6 6 B A4 [LO]_5 0 0 A4[LO]_5_B
# 7 7 C A4 [LO]_5 0 0 A4[LO]_5_C
# 8 8 D A4 [LO]_5 0 0 A4[LO]_5_D
# 9 9 A2 A2 [LO]_5 1 0 A2
# 10 10 [LO]_10 A2 [LO]_10 0 1 [LO]_10
# 11 11 P A2 [LO]_10 0 0 A2[LO]_10_P
# 12 12 Q A2 [LO]_10 0 0 A2[LO]_10_Q
# 13 13 A3 A3 [LO]_10 1 0 A3
# 14 14 [LO]_14 A3 [LO]_14 0 1 [LO]_14
# 15 15 B A3 [LO]_14 0 0 A3[LO]_14_B
# 16 16 C A3 [LO]_14 0 0 A3[LO]_14_C
# 17 17 D A3 [LO]_14 0 0 A3[LO]_14_D
# 18 18 D A3 [LO]_14 0 0 A3[LO]_14_D
# 19 19 A19 A19 [LO]_14 1 0 A19
# 20 20 [LO]_20 A19 [LO]_20 0 1 [LO]_20
# 21 21 [LO]_21 A19 [LO]_21 0 1 [LO]_21
# 22 22 P A19 [LO]_21 0 0 A19[LO]_21_P
# 23 23 Q A19 [LO]_21 0 0 A19[LO]_21_Q
# 24 24 A_5 A_5 [LO]_21 1 0 A_5
# 25 25 [LO]_25 A_5 [LO]_25 0 1 [LO]_25
# 26 26 M A_5 [LO]_25 0 0 A_5[LO]_25_M
# 27 27 N A_5 [LO]_25 0 0 A_5[LO]_25_N
# 28 28 P A_5 [LO]_25 0 0 A_5[LO]_25_P
# 29 29 A_6 A_6 [LO]_25 1 0 A_6
# 30 30 [LO]_30 A_6 [LO]_30 0 1 [LO]_30
# 31 31 B A_6 [LO]_30 0 0 A_6[LO]_30_B
# 32 32 O A_6 [LO]_30 0 0 A_6[LO]_30_O
# 33 33 M A_6 [LO]_30 0 0 A_6[LO]_30_M
# 34 34 AQII AQII [LO]_30 1 0 AQII
# 35 35 B AQII [LO]_30 0 0 AQII[LO]_30_B
# 36 36 o AQII [LO]_30 0 0 AQII[LO]_30_o
# 37 37 L AQII [LO]_30 0 0 AQII[LO]_30_L
# 38 38 I AQII [LO]_30 0 0 AQII[LO]_30_I
# 39 39 U AQII [LO]_30 0 0 AQII[LO]_30_U
# 40 40 [LO] AQII [LO] 0 1 [LO]
# 41 41 M AQII [LO] 0 0 AQII[LO]_M
我使用以下命令在 R studio 中创建了以下 Dataframe
Sample_df$First_Val<-NA
# WE NOW FILL IN THE VALUES AS NEEDED
Sample_df$First_Val[c(2,3,5,10,14,19,20,21,25,30)]<-paste0("[LO]_",
c(2,3,5,10,14,19,20,21,25,30))
## WE NOW FILL IN THE INTERIM VALUES HERE
Sample_df$First_Val[1]<-"A1"
Sample_df$First_Val[19]<-"A19"
Sample_df$First_Val[3]<-NA
Sample_df$First_Val[4]<-c("A4")
Sample_df$First_Val[c(6,7,8,9)]<-c("B", "C", "D", "A2")
Sample_df$First_Val[c(11,12,13)]<-c("P", "Q", "A3")
Sample_df$First_Val[c(15,16,17,18)]<-c("B", "C", "D", "D")
Sample_df$First_Val[c(22,23,24)]<-c("P", "Q", "A_5")
Sample_df$First_Val[c(26,27,28, 29)]<-c("M", "N", "P", "A_6")
Sample_df$First_Val[is.na(Sample_df$First_Val)]<-c("M", "B", "O", "M",
'AQII', "B", "o", "L", 'I', "U", "[LO]")
数据框如下所示
SlN0 First_Val
1 A1
2 [LO]_2
3 M
4 A4
5 [LO]_5
6 B
7 C
8 D
9 A2
10 [LO]_10
11 P
12 Q
13 A3
14 [LO]_14
15 B
16 C
17 D
18 D
19 A19
20 [LO]_20
21 [LO]_21
22 P
23 Q
24 A_5
25 [LO]_25
26 M
27 N
28 P
29 A_6
30 [LO]_30
31 B
32 O
33 M
34 AQII
35 B
36 o
37 L
38 I
39 U
40 [LO]
41 M
以下是预期输出
SlN0 First_Val
1 A1
2 [LO]_2
3 A1_[LO]_2_M
4 A4
5 [LO]_5
6 A4_[LO]_5_B
7 A4_[LO]_5_C
8 A4_[LO]_5_D
9 A2
10 [LO]_10
11 A2_[LO]_10_P
12 A2_[LO]_10_Q
13 A3
14 [LO]_14
15 A3_[LO]_14_B
16 A3_[LO]_14_C
17 A3_[LO]_14_D
18 A3_[LO]_14_D
19 A19
20 [LO]_20
21 [LO]_21
22 A19_[LO]_20_P
23 A19_[LO]_20_Q
24 A_5
25 [LO]_25
26 A_5_[LO]_25_M
27 A_5_[LO]_25_N
28 A_5_[LO]_25_P
29 A_6
30 [LO]_30
31 A_6_[LO]_30_B
32 A_6_[LO]_30_O
33 A_6_[LO]_30_M
34 AQII
35 AQII_B
36 o
37 L
38 I
39 U
40 [LO]
41 [LO]_M
First_Val栏变化如下。
如果 First_Val 列中的 2 个 [LO] 值之间存在重复的条目,我们会附加(使用粘贴)紧接在前面的 2 个值。因此,First_Val 列 (M) 中的条目 3 更改为 A1_[LO]_2_M。
如果一个条目没有重复值,例如 o、L、I 等 - 这些值保持不变,因为它们是唯一值。
如果在一系列重复项之前有多个 [LO] 值,则将最新的 [LO] 值与前面的值一起附加。示例 - 第 22,23 行。因此,前面有一个唯一值的重复条目将与该唯一值一起粘贴。如果在重复值之前有 2 个唯一值,我们将这两个值都粘贴到重复值中。
我请人帮忙。
循序渐进。我不明白 AQII 规则是什么,但这是一个开始的方式:
library(tidyverse)
library(stringr)
d <- read.table(text = "SlN0 First_Val
1 A1
2 [LO]_2
3 M
4 A4
5 [LO]_5
6 B
7 C
8 D
9 A2
10 [LO]_10
11 P
12 Q
13 A3
14 [LO]_14
15 B
16 C
17 D
18 D
19 A19
20 [LO]_20
21 [LO]_21
22 P
23 Q
24 A_5
25 [LO]_25
26 M
27 N
28 P
29 A_6
30 [LO]_30
31 B
32 O
33 M
34 AQII
35 B
36 o
37 L
38 I
39 U
40 [LO]
41 M", stringsAsFactors = FALSE, header = TRUE)
d %>%
mutate(a_var = if_else(str_detect(First_Val, "A[_]?\d*" ), First_Val, as.character(NA)),
lo_var = if_else(str_detect(First_Val, "\[LO\]*" ), First_Val, as.character(NA))) %>%
mutate(flag_a = if_else(!is.na(a_var), 1L, 0L),
flag_lo = if_else(!is.na(lo_var), 1L, 0L)) %>%
fill(a_var, lo_var) %>%
mutate(want = case_when(flag_a == 1L ~ a_var,
flag_lo == 1L ~ lo_var,
TRUE ~ str_c(a_var, lo_var, "_", First_Val)))
# SlN0 First_Val a_var lo_var flag_a flag_lo want
# 1 1 A1 A1 <NA> 1 0 A1
# 2 2 [LO]_2 A1 [LO]_2 0 1 [LO]_2
# 3 3 M A1 [LO]_2 0 0 A1[LO]_2_M
# 4 4 A4 A4 [LO]_2 1 0 A4
# 5 5 [LO]_5 A4 [LO]_5 0 1 [LO]_5
# 6 6 B A4 [LO]_5 0 0 A4[LO]_5_B
# 7 7 C A4 [LO]_5 0 0 A4[LO]_5_C
# 8 8 D A4 [LO]_5 0 0 A4[LO]_5_D
# 9 9 A2 A2 [LO]_5 1 0 A2
# 10 10 [LO]_10 A2 [LO]_10 0 1 [LO]_10
# 11 11 P A2 [LO]_10 0 0 A2[LO]_10_P
# 12 12 Q A2 [LO]_10 0 0 A2[LO]_10_Q
# 13 13 A3 A3 [LO]_10 1 0 A3
# 14 14 [LO]_14 A3 [LO]_14 0 1 [LO]_14
# 15 15 B A3 [LO]_14 0 0 A3[LO]_14_B
# 16 16 C A3 [LO]_14 0 0 A3[LO]_14_C
# 17 17 D A3 [LO]_14 0 0 A3[LO]_14_D
# 18 18 D A3 [LO]_14 0 0 A3[LO]_14_D
# 19 19 A19 A19 [LO]_14 1 0 A19
# 20 20 [LO]_20 A19 [LO]_20 0 1 [LO]_20
# 21 21 [LO]_21 A19 [LO]_21 0 1 [LO]_21
# 22 22 P A19 [LO]_21 0 0 A19[LO]_21_P
# 23 23 Q A19 [LO]_21 0 0 A19[LO]_21_Q
# 24 24 A_5 A_5 [LO]_21 1 0 A_5
# 25 25 [LO]_25 A_5 [LO]_25 0 1 [LO]_25
# 26 26 M A_5 [LO]_25 0 0 A_5[LO]_25_M
# 27 27 N A_5 [LO]_25 0 0 A_5[LO]_25_N
# 28 28 P A_5 [LO]_25 0 0 A_5[LO]_25_P
# 29 29 A_6 A_6 [LO]_25 1 0 A_6
# 30 30 [LO]_30 A_6 [LO]_30 0 1 [LO]_30
# 31 31 B A_6 [LO]_30 0 0 A_6[LO]_30_B
# 32 32 O A_6 [LO]_30 0 0 A_6[LO]_30_O
# 33 33 M A_6 [LO]_30 0 0 A_6[LO]_30_M
# 34 34 AQII AQII [LO]_30 1 0 AQII
# 35 35 B AQII [LO]_30 0 0 AQII[LO]_30_B
# 36 36 o AQII [LO]_30 0 0 AQII[LO]_30_o
# 37 37 L AQII [LO]_30 0 0 AQII[LO]_30_L
# 38 38 I AQII [LO]_30 0 0 AQII[LO]_30_I
# 39 39 U AQII [LO]_30 0 0 AQII[LO]_30_U
# 40 40 [LO] AQII [LO] 0 1 [LO]
# 41 41 M AQII [LO] 0 0 AQII[LO]_M