将数据从 R 数据框中的特定行的一列复制到另一列

Copy data from one column to another column from a specific row in R dataframe

我有一个数据框(尺寸 = 1134*14),我想将部分数据值从一列复制到 R 中同一数据框中的另一列,以进一步重塑数据以进行绘图。我想将数据值从第 371 行复制到第 1134 行(聚合到群集列,另一个从 sub_aggregate 到 cluster_location 列)

请帮助我如何从特定行复制数据。

谢谢,

图菲克

下面提供了输入和预期输出数据的示例:

Input

dput(df_1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19", 
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36", 
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"), 
    Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3", 
    "M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "", 
    ""), Group_A = c(95.23809524, -63.15789474, 33.33333333, 
    0, -23.33333333, -26.31578947, -20.83333333, 17.39130435, 
    -67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0, 
    36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0, 
    0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28", 
    "A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "", 
    "", "", "", "", "", "", "", ""), Cluster_location = c(1L, 
    1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Function = c("Biological processes", "Molecular functions", 
    "cellular components", "cellular components", "cellular components", 
    "cellular components", "Biological processes", "Biological processes", 
    "TBD", "", "", "", "", "", "", "", "", ""), Aggregate = c("", 
    "", "", "", "", "", "", "", "", "A8", "A8", "A8", "A8", "A8", 
    "A8", "A8", "A8", "A8"), sub_aggregate = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 
    9L)), class = "data.frame", row.names = c(NA, -18L))

Expected Output

dput(df_2)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19", 
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36", 
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"), 
    Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3", 
    "M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "", 
    ""), Group_A = c(95.23809524, -63.15789474, 33.33333333, 
    0, -23.33333333, -26.31578947, -20.83333333, 17.39130435, 
    -67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0, 
    36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0, 
    0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28", 
    "A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "A8", 
    "A8", "A8", "A8", "A8", "A8", "A8", "A8", "A8"), Cluster_location = c(1L, 
    1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L, 
    40L, 41L, 42L, 9L), Function = c("Biological processes", 
    "Molecular functions", "cellular components", "cellular components", 
    "cellular components", "cellular components", "Biological processes", 
    "Biological processes", "TBD", "", "", "", "", "", "", "", 
    "", ""), Aggregate = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), sub_aggregate = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA)), class = "data.frame", row.names = c(NA, -18L))

#################################

编辑:在 运行 下面建议的代码之后。

dput(df_n_v1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19", 
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36", 
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"), 
    Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3", 
    "M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "", 
    ""), Group_A = c(95.23809524, -63.15789474, 33.33333333, 
    0, -23.33333333, -26.31578947, -20.83333333, 17.39130435, 
    -67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0, 
    36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0, 
    0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28NA", 
    "A34NA", "A38NA", "A1NA", "A37NA", "A37NA", "A37NA", "A1NA", 
    "A35NA", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8", 
    "NAA8", "NAA8", "NAA8"), Cluster_location = c(1L, 1L, 2L, 
    19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L, 40L, 
    41L, 42L, 9L), Function = c("Biological processes", "Molecular functions", 
    "cellular components", "cellular components", "cellular components", 
    "cellular components", "Biological processes", "Biological processes", 
    "TBD", "", "", "", "", "", "", "", "", "")), class = "data.frame", row.names = c(NA, 
-18L))

您可以使用unite

library(tidyr)
library(dplyr)
library(stringr)

df_2 <- df_1 %>% 
  unite(Cluster, Cluster, Aggregate, sep = "") %>% 
  unite(Cluster_location, Cluster_location, sub_aggregate, sep="") %>% 
  dplyr::mutate(Cluster_location = stringr::str_replace_all(Cluster_location, 'NA,?', ''))  # remove NA string