将数据从 R 数据框中的特定行的一列复制到另一列
Copy data from one column to another column from a specific row in R dataframe
我有一个数据框(尺寸 = 1134*14),我想将部分数据值从一列复制到 R 中同一数据框中的另一列,以进一步重塑数据以进行绘图。我想将数据值从第 371 行复制到第 1134 行(聚合到群集列,另一个从 sub_aggregate 到 cluster_location 列)
请帮助我如何从特定行复制数据。
谢谢,
图菲克
下面提供了输入和预期输出数据的示例:
Input
dput(df_1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28",
"A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "",
"", "", "", "", "", "", "", ""), Cluster_location = c(1L,
1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Function = c("Biological processes", "Molecular functions",
"cellular components", "cellular components", "cellular components",
"cellular components", "Biological processes", "Biological processes",
"TBD", "", "", "", "", "", "", "", "", ""), Aggregate = c("",
"", "", "", "", "", "", "", "", "A8", "A8", "A8", "A8", "A8",
"A8", "A8", "A8", "A8"), sub_aggregate = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
9L)), class = "data.frame", row.names = c(NA, -18L))
Expected Output
dput(df_2)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28",
"A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "A8",
"A8", "A8", "A8", "A8", "A8", "A8", "A8", "A8"), Cluster_location = c(1L,
1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 9L), Function = c("Biological processes",
"Molecular functions", "cellular components", "cellular components",
"cellular components", "cellular components", "Biological processes",
"Biological processes", "TBD", "", "", "", "", "", "", "",
"", ""), Aggregate = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), sub_aggregate = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), class = "data.frame", row.names = c(NA, -18L))
#################################
编辑:在 运行 下面建议的代码之后。
dput(df_n_v1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28NA",
"A34NA", "A38NA", "A1NA", "A37NA", "A37NA", "A37NA", "A1NA",
"A35NA", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8",
"NAA8", "NAA8", "NAA8"), Cluster_location = c(1L, 1L, 2L,
19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 9L), Function = c("Biological processes", "Molecular functions",
"cellular components", "cellular components", "cellular components",
"cellular components", "Biological processes", "Biological processes",
"TBD", "", "", "", "", "", "", "", "", "")), class = "data.frame", row.names = c(NA,
-18L))
您可以使用unite
library(tidyr)
library(dplyr)
library(stringr)
df_2 <- df_1 %>%
unite(Cluster, Cluster, Aggregate, sep = "") %>%
unite(Cluster_location, Cluster_location, sub_aggregate, sep="") %>%
dplyr::mutate(Cluster_location = stringr::str_replace_all(Cluster_location, 'NA,?', '')) # remove NA string
我有一个数据框(尺寸 = 1134*14),我想将部分数据值从一列复制到 R 中同一数据框中的另一列,以进一步重塑数据以进行绘图。我想将数据值从第 371 行复制到第 1134 行(聚合到群集列,另一个从 sub_aggregate 到 cluster_location 列)
请帮助我如何从特定行复制数据。
谢谢,
图菲克
下面提供了输入和预期输出数据的示例:
Input
dput(df_1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28",
"A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "",
"", "", "", "", "", "", "", ""), Cluster_location = c(1L,
1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Function = c("Biological processes", "Molecular functions",
"cellular components", "cellular components", "cellular components",
"cellular components", "Biological processes", "Biological processes",
"TBD", "", "", "", "", "", "", "", "", ""), Aggregate = c("",
"", "", "", "", "", "", "", "", "A8", "A8", "A8", "A8", "A8",
"A8", "A8", "A8", "A8"), sub_aggregate = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
9L)), class = "data.frame", row.names = c(NA, -18L))
Expected Output
dput(df_2)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28",
"A34", "A38", "A1", "A37", "A37", "A37", "A1", "A35", "A8",
"A8", "A8", "A8", "A8", "A8", "A8", "A8", "A8"), Cluster_location = c(1L,
1L, 2L, 19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L,
40L, 41L, 42L, 9L), Function = c("Biological processes",
"Molecular functions", "cellular components", "cellular components",
"cellular components", "cellular components", "Biological processes",
"Biological processes", "TBD", "", "", "", "", "", "", "",
"", ""), Aggregate = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), sub_aggregate = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), class = "data.frame", row.names = c(NA, -18L))
#################################
编辑:在 运行 下面建议的代码之后。
dput(df_n_v1)
structure(list(position = c("A28.1", "A34.1", "A38.2", "A1.19",
"A37.10", "A37.2", "A37.11", "A1.32", "A35.6", "A8.35", "A8.36",
"A8.37", "A8.38", "A8.39", "A8.40", "A8.41", "A8.42", "A8.9"),
Set = c("M10.1", "M10.3", "M10.4", "M11.1", "M11.2", "M11.3",
"M11.4", "M12.1", "M12.10", "", "", "", "", "", "", "", "",
""), Group_A = c(95.23809524, -63.15789474, 33.33333333,
0, -23.33333333, -26.31578947, -20.83333333, 17.39130435,
-67.34693878, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_B = c(0,
36.84210526, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Group_D = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA,
NA, NA), Group_E = c(76.19047619, -26.31578947, 0, 0, 0,
0, 0, 19.56521739, -34.69387755, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Group_F = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, NA, NA, NA, NA, NA, NA, NA, NA, NA), Cluster = c("A28NA",
"A34NA", "A38NA", "A1NA", "A37NA", "A37NA", "A37NA", "A1NA",
"A35NA", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8", "NAA8",
"NAA8", "NAA8", "NAA8"), Cluster_location = c(1L, 1L, 2L,
19L, 10L, 2L, 11L, 32L, 6L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 9L), Function = c("Biological processes", "Molecular functions",
"cellular components", "cellular components", "cellular components",
"cellular components", "Biological processes", "Biological processes",
"TBD", "", "", "", "", "", "", "", "", "")), class = "data.frame", row.names = c(NA,
-18L))
您可以使用unite
library(tidyr)
library(dplyr)
library(stringr)
df_2 <- df_1 %>%
unite(Cluster, Cluster, Aggregate, sep = "") %>%
unite(Cluster_location, Cluster_location, sub_aggregate, sep="") %>%
dplyr::mutate(Cluster_location = stringr::str_replace_all(Cluster_location, 'NA,?', '')) # remove NA string