从R中的数据框中随机获取一些行后如何获取其余行
How to get the rest of the rows after taking some rows randomly from a dataframe in R
我有 2 个数据帧 df_1
和 df_2
。现在我必须从 df_1
中随机 select 一些行,然后我将 rest of the rows (which not selected randomly) from df_1
与 df_2
.
合并
我正在使用这个代码
set.seed(9999)
df_1 <- # the whole dataset
test_dataset1 <- sample_n(df_1, 10)
train_part_1 <- df_1[which(!df_1 %in% test_dataset1)] # Not working
train_1 <- rbind(df_2, train_part_1)
但是,当我尝试提取未随机 select 的行时。我的代码不工作。我得到与 df_1
相同的数据意味着 20 rows (same dataset)
已编辑:实际上,我必须制作 3 test
和 3 train
数据集。那么,如何使用 seed
函数来获取相同的数据集以进行重现?
可重现数据(仅df_1):
structure(list(nodeA = structure(c(4L, 2L, 1L, 1L, 1L, 4L, 1L,
9L, 3L, 4L, 2L, 8L, 2L, 1L, 5L, 7L, 3L, 6L, 2L, 1L), .Label = c("ID00309",
"ID00361", "ID00541", "ID00570", "ID00615", "ID00696", "ID00762",
"ID01200", "ID05109"), class = "factor"), nodeB = structure(c(8L,
3L, 3L, 1L, 2L, 7L, 9L, 8L, 8L, 6L, 9L, 7L, 4L, 4L, 6L, 9L, 6L,
7L, 5L, 5L), .Label = c("ID00361", "ID00541", "ID00570", "ID00615",
"ID00696", "ID01200", "ID05109", "ID11641", "ID11691"), class = "factor"),
scr = structure(20:1, .Label = c("1.85284606048794", "1.90444166064472",
"1.90762235378507", "1.94364188077133", "1.95883206119256",
"2.08440437841349", "2.26408172709962", "2.3223132020942",
"2.46120775935034", "2.49647215035727", "2.50432367561777",
"2.57541320006514", "2.65099330092281", "2.75209155741549",
"2.93717640337986", "2.99596628688011", "3.21209741517806",
"3.21997803385465", "3.48788394772132", "3.81389707587156"
), class = "factor")), class = "data.frame", row.names = c(NA,
-20L))
使用随机行号获取样本并使用 -
获取逆序:
df_1 <- structure(list(nodeA = structure(c(4L, 2L, 1L, 1L, 1L, 4L, 1L, 9L, 3L, 4L,
2L, 8L, 2L, 1L, 5L, 7L, 3L, 6L, 2L, 1L),
.Label = c("ID00309", "ID00361", "ID00541",
"ID00570", "ID00615", "ID00696",
"ID00762", "ID01200", "ID05109"),
class = "factor"),
nodeB = structure(c(8L, 3L, 3L, 1L, 2L, 7L, 9L, 8L, 8L, 6L,
9L, 7L, 4L, 4L, 6L, 9L, 6L, 7L, 5L, 5L),
.Label = c("ID00361", "ID00541", "ID00570",
"ID00615", "ID00696", "ID01200",
"ID05109", "ID11641", "ID11691"),
class = "factor"),
scr = structure(20:1, .Label = c("1.85284606048794", "1.90444166064472",
"1.90762235378507", "1.94364188077133",
"1.95883206119256", "2.08440437841349",
"2.26408172709962", "2.3223132020942",
"2.46120775935034", "2.49647215035727",
"2.50432367561777", "2.57541320006514",
"2.65099330092281", "2.75209155741549",
"2.93717640337986", "2.99596628688011",
"3.21209741517806", "3.21997803385465",
"3.48788394772132", "3.81389707587156"
), class = "factor")),
class = "data.frame", row.names = c(NA, -20L))
set.seed(9999)
Selected <- sample.int(nrow(df_1), 10)
# index selected the row; use [col,row] pattern to select rows
test_dataset1 <- df_1[ Selected, ]
# use -index to remove rows
train_part_1 <- df_1[-Selected, ]
test_dataset1
#> nodeA nodeB scr
#> 6 ID00570 ID05109 2.93717640337986
#> 9 ID00541 ID11641 2.57541320006514
#> 19 ID00361 ID00696 1.90444166064472
#> 3 ID00309 ID00570 3.21997803385465
#> 10 ID00570 ID01200 2.50432367561777
#> 2 ID00361 ID00570 3.48788394772132
#> 20 ID00309 ID00696 1.85284606048794
#> 8 ID05109 ID11641 2.65099330092281
#> 12 ID01200 ID05109 2.46120775935034
#> 18 ID00696 ID05109 1.90762235378507
train_part_1
#> nodeA nodeB scr
#> 1 ID00570 ID11641 3.81389707587156
#> 4 ID00309 ID00361 3.21209741517806
#> 5 ID00309 ID00541 2.99596628688011
#> 7 ID00309 ID11691 2.75209155741549
#> 11 ID00361 ID11691 2.49647215035727
#> 13 ID00361 ID00615 2.3223132020942
#> 14 ID00309 ID00615 2.26408172709962
#> 15 ID00615 ID01200 2.08440437841349
#> 16 ID00762 ID11691 1.95883206119256
#> 17 ID00541 ID01200 1.94364188077133
由 reprex package (v1.0.0)
于 2021 年 3 月 14 日创建
我有 2 个数据帧 df_1
和 df_2
。现在我必须从 df_1
中随机 select 一些行,然后我将 rest of the rows (which not selected randomly) from df_1
与 df_2
.
我正在使用这个代码
set.seed(9999)
df_1 <- # the whole dataset
test_dataset1 <- sample_n(df_1, 10)
train_part_1 <- df_1[which(!df_1 %in% test_dataset1)] # Not working
train_1 <- rbind(df_2, train_part_1)
但是,当我尝试提取未随机 select 的行时。我的代码不工作。我得到与 df_1
相同的数据意味着 20 rows (same dataset)
已编辑:实际上,我必须制作 3 test
和 3 train
数据集。那么,如何使用 seed
函数来获取相同的数据集以进行重现?
可重现数据(仅df_1):
structure(list(nodeA = structure(c(4L, 2L, 1L, 1L, 1L, 4L, 1L,
9L, 3L, 4L, 2L, 8L, 2L, 1L, 5L, 7L, 3L, 6L, 2L, 1L), .Label = c("ID00309",
"ID00361", "ID00541", "ID00570", "ID00615", "ID00696", "ID00762",
"ID01200", "ID05109"), class = "factor"), nodeB = structure(c(8L,
3L, 3L, 1L, 2L, 7L, 9L, 8L, 8L, 6L, 9L, 7L, 4L, 4L, 6L, 9L, 6L,
7L, 5L, 5L), .Label = c("ID00361", "ID00541", "ID00570", "ID00615",
"ID00696", "ID01200", "ID05109", "ID11641", "ID11691"), class = "factor"),
scr = structure(20:1, .Label = c("1.85284606048794", "1.90444166064472",
"1.90762235378507", "1.94364188077133", "1.95883206119256",
"2.08440437841349", "2.26408172709962", "2.3223132020942",
"2.46120775935034", "2.49647215035727", "2.50432367561777",
"2.57541320006514", "2.65099330092281", "2.75209155741549",
"2.93717640337986", "2.99596628688011", "3.21209741517806",
"3.21997803385465", "3.48788394772132", "3.81389707587156"
), class = "factor")), class = "data.frame", row.names = c(NA,
-20L))
使用随机行号获取样本并使用 -
获取逆序:
df_1 <- structure(list(nodeA = structure(c(4L, 2L, 1L, 1L, 1L, 4L, 1L, 9L, 3L, 4L,
2L, 8L, 2L, 1L, 5L, 7L, 3L, 6L, 2L, 1L),
.Label = c("ID00309", "ID00361", "ID00541",
"ID00570", "ID00615", "ID00696",
"ID00762", "ID01200", "ID05109"),
class = "factor"),
nodeB = structure(c(8L, 3L, 3L, 1L, 2L, 7L, 9L, 8L, 8L, 6L,
9L, 7L, 4L, 4L, 6L, 9L, 6L, 7L, 5L, 5L),
.Label = c("ID00361", "ID00541", "ID00570",
"ID00615", "ID00696", "ID01200",
"ID05109", "ID11641", "ID11691"),
class = "factor"),
scr = structure(20:1, .Label = c("1.85284606048794", "1.90444166064472",
"1.90762235378507", "1.94364188077133",
"1.95883206119256", "2.08440437841349",
"2.26408172709962", "2.3223132020942",
"2.46120775935034", "2.49647215035727",
"2.50432367561777", "2.57541320006514",
"2.65099330092281", "2.75209155741549",
"2.93717640337986", "2.99596628688011",
"3.21209741517806", "3.21997803385465",
"3.48788394772132", "3.81389707587156"
), class = "factor")),
class = "data.frame", row.names = c(NA, -20L))
set.seed(9999)
Selected <- sample.int(nrow(df_1), 10)
# index selected the row; use [col,row] pattern to select rows
test_dataset1 <- df_1[ Selected, ]
# use -index to remove rows
train_part_1 <- df_1[-Selected, ]
test_dataset1
#> nodeA nodeB scr
#> 6 ID00570 ID05109 2.93717640337986
#> 9 ID00541 ID11641 2.57541320006514
#> 19 ID00361 ID00696 1.90444166064472
#> 3 ID00309 ID00570 3.21997803385465
#> 10 ID00570 ID01200 2.50432367561777
#> 2 ID00361 ID00570 3.48788394772132
#> 20 ID00309 ID00696 1.85284606048794
#> 8 ID05109 ID11641 2.65099330092281
#> 12 ID01200 ID05109 2.46120775935034
#> 18 ID00696 ID05109 1.90762235378507
train_part_1
#> nodeA nodeB scr
#> 1 ID00570 ID11641 3.81389707587156
#> 4 ID00309 ID00361 3.21209741517806
#> 5 ID00309 ID00541 2.99596628688011
#> 7 ID00309 ID11691 2.75209155741549
#> 11 ID00361 ID11691 2.49647215035727
#> 13 ID00361 ID00615 2.3223132020942
#> 14 ID00309 ID00615 2.26408172709962
#> 15 ID00615 ID01200 2.08440437841349
#> 16 ID00762 ID11691 1.95883206119256
#> 17 ID00541 ID01200 1.94364188077133
由 reprex package (v1.0.0)
于 2021 年 3 月 14 日创建