向 R 数据框中的重复行条目添加前缀

Adding prefix to the duplicated row entries in R dataframe

我使用以下命令在 R studio 中创建了以下 Dataframe

  Sample_df$First_Val<-NA
 #  WE NOW FILL IN THE VALUES AS NEEDED
  Sample_df$First_Val[c(2,3,5,10,14,19,20,21,25,30)]<-paste0("[LO]_", 
  c(2,3,5,10,14,19,20,21,25,30))

  ## WE NOW FILL IN THE INTERIM VALUES HERE
  Sample_df$First_Val[1]<-"A1"
  Sample_df$First_Val[19]<-"A19"
  Sample_df$First_Val[3]<-NA
  Sample_df$First_Val[4]<-c("A4")

  Sample_df$First_Val[c(6,7,8,9)]<-c("B", "C", "D", "A2")

  Sample_df$First_Val[c(11,12,13)]<-c("P", "Q", "A3")

 Sample_df$First_Val[c(15,16,17,18)]<-c("B", "C", "D", "D")

 Sample_df$First_Val[c(22,23,24)]<-c("P", "Q", "A_5")

 Sample_df$First_Val[c(26,27,28, 29)]<-c("M", "N", "P", "A_6")
 Sample_df$First_Val[is.na(Sample_df$First_Val)]<-c("M", "B", "O", "M", 
'AQII', "B", "o", "L", 'I', "U", "[LO]")

数据框如下所示

     SlN0 First_Val
      1        A1
      2    [LO]_2
      3         M
      4        A4
      5    [LO]_5
      6         B
      7         C
      8         D
      9        A2
     10   [LO]_10
     11         P
     12         Q
     13        A3
     14   [LO]_14
     15         B
     16         C
     17         D
     18         D
     19       A19
     20   [LO]_20
     21   [LO]_21
     22         P
     23         Q
     24       A_5
     25   [LO]_25
     26         M
     27         N
     28         P
     29       A_6
     30   [LO]_30
     31         B
     32         O
     33         M
     34      AQII
     35         B
     36         o
     37         L
     38         I
     39         U
     40      [LO]
     41         M

以下是预期输出

     SlN0     First_Val
      1            A1
      2        [LO]_2
      3   A1_[LO]_2_M
      4            A4
      5        [LO]_5
       6   A4_[LO]_5_B
       7   A4_[LO]_5_C
       8   A4_[LO]_5_D
       9            A2
      10       [LO]_10
      11  A2_[LO]_10_P
      12  A2_[LO]_10_Q
      13            A3
      14       [LO]_14
      15  A3_[LO]_14_B
      16  A3_[LO]_14_C
      17  A3_[LO]_14_D
      18  A3_[LO]_14_D
      19           A19
      20       [LO]_20
      21       [LO]_21
      22 A19_[LO]_20_P
      23 A19_[LO]_20_Q
      24           A_5
      25       [LO]_25
      26 A_5_[LO]_25_M
      27 A_5_[LO]_25_N
      28 A_5_[LO]_25_P
      29           A_6
      30       [LO]_30
      31 A_6_[LO]_30_B
      32 A_6_[LO]_30_O
      33 A_6_[LO]_30_M
      34          AQII
      35         AQII_B
      36             o
      37  L
      38  I
      39  U
      40          [LO]
      41          [LO]_M

First_Val栏变化如下。

如果 First_Val 列中的 2 个 [LO] 值之间存在重复的条目,我们会附加(使用粘贴)紧接在前面的 2 个值。因此,First_Val 列 (M) 中的条目 3 更改为 A1_[LO]_2_M。

如果一个条目没有重复值,例如 o、L、I 等 - 这些值保持不变,因为它们是唯一值。

如果在一系列重复项之前有多个 [LO] 值,则将最新的 [LO] 值与前面的值一起附加。示例 - 第 22,23 行。因此,前面有一个唯一值的重复条目将与该唯一值一起粘贴。如果在重复值之前有 2 个唯一值,我们将这两个值都粘贴到重复值中。

我请人帮忙。

循序渐进。我不明白 AQII 规则是什么,但这是一个开始的方式:

library(tidyverse)
library(stringr)

d <- read.table(text = "SlN0 First_Val
  1        A1
            2    [LO]_2
            3         M
            4        A4
            5    [LO]_5
            6         B
            7         C
            8         D
            9        A2
            10   [LO]_10
            11         P
            12         Q
            13        A3
            14   [LO]_14
            15         B
            16         C
            17         D
            18         D
            19       A19
            20   [LO]_20
            21   [LO]_21
            22         P
            23         Q
            24       A_5
            25   [LO]_25
            26         M
            27         N
            28         P
            29       A_6
            30   [LO]_30
            31         B
            32         O
            33         M
            34      AQII
            35         B
            36         o
            37         L
            38         I
            39         U
            40      [LO]
            41         M", stringsAsFactors = FALSE, header = TRUE)


d %>%
  mutate(a_var = if_else(str_detect(First_Val, "A[_]?\d*" ), First_Val, as.character(NA)),
         lo_var = if_else(str_detect(First_Val, "\[LO\]*" ), First_Val, as.character(NA))) %>%
  mutate(flag_a = if_else(!is.na(a_var), 1L, 0L),
        flag_lo = if_else(!is.na(lo_var), 1L, 0L)) %>%
  fill(a_var, lo_var) %>%
  mutate(want = case_when(flag_a == 1L ~ a_var,
                          flag_lo == 1L ~ lo_var,
                          TRUE ~ str_c(a_var, lo_var, "_", First_Val)))

# SlN0 First_Val a_var  lo_var flag_a flag_lo          want
# 1     1        A1    A1    <NA>      1       0            A1
# 2     2    [LO]_2    A1  [LO]_2      0       1        [LO]_2
# 3     3         M    A1  [LO]_2      0       0    A1[LO]_2_M
# 4     4        A4    A4  [LO]_2      1       0            A4
# 5     5    [LO]_5    A4  [LO]_5      0       1        [LO]_5
# 6     6         B    A4  [LO]_5      0       0    A4[LO]_5_B
# 7     7         C    A4  [LO]_5      0       0    A4[LO]_5_C
# 8     8         D    A4  [LO]_5      0       0    A4[LO]_5_D
# 9     9        A2    A2  [LO]_5      1       0            A2
# 10   10   [LO]_10    A2 [LO]_10      0       1       [LO]_10
# 11   11         P    A2 [LO]_10      0       0   A2[LO]_10_P
# 12   12         Q    A2 [LO]_10      0       0   A2[LO]_10_Q
# 13   13        A3    A3 [LO]_10      1       0            A3
# 14   14   [LO]_14    A3 [LO]_14      0       1       [LO]_14
# 15   15         B    A3 [LO]_14      0       0   A3[LO]_14_B
# 16   16         C    A3 [LO]_14      0       0   A3[LO]_14_C
# 17   17         D    A3 [LO]_14      0       0   A3[LO]_14_D
# 18   18         D    A3 [LO]_14      0       0   A3[LO]_14_D
# 19   19       A19   A19 [LO]_14      1       0           A19
# 20   20   [LO]_20   A19 [LO]_20      0       1       [LO]_20
# 21   21   [LO]_21   A19 [LO]_21      0       1       [LO]_21
# 22   22         P   A19 [LO]_21      0       0  A19[LO]_21_P
# 23   23         Q   A19 [LO]_21      0       0  A19[LO]_21_Q
# 24   24       A_5   A_5 [LO]_21      1       0           A_5
# 25   25   [LO]_25   A_5 [LO]_25      0       1       [LO]_25
# 26   26         M   A_5 [LO]_25      0       0  A_5[LO]_25_M
# 27   27         N   A_5 [LO]_25      0       0  A_5[LO]_25_N
# 28   28         P   A_5 [LO]_25      0       0  A_5[LO]_25_P
# 29   29       A_6   A_6 [LO]_25      1       0           A_6
# 30   30   [LO]_30   A_6 [LO]_30      0       1       [LO]_30
# 31   31         B   A_6 [LO]_30      0       0  A_6[LO]_30_B
# 32   32         O   A_6 [LO]_30      0       0  A_6[LO]_30_O
# 33   33         M   A_6 [LO]_30      0       0  A_6[LO]_30_M
# 34   34      AQII  AQII [LO]_30      1       0          AQII
# 35   35         B  AQII [LO]_30      0       0 AQII[LO]_30_B
# 36   36         o  AQII [LO]_30      0       0 AQII[LO]_30_o
# 37   37         L  AQII [LO]_30      0       0 AQII[LO]_30_L
# 38   38         I  AQII [LO]_30      0       0 AQII[LO]_30_I
# 39   39         U  AQII [LO]_30      0       0 AQII[LO]_30_U
# 40   40      [LO]  AQII    [LO]      0       1          [LO]
# 41   41         M  AQII    [LO]      0       0    AQII[LO]_M