R:用 0 随机替换数据框的元素
R: Randomly Replacing Elements of a Data Frame with 0
我正在使用 R 编程语言。
假设我有以下数据框:
var_1 = var_2 = var_3 = var_4 = var_5 = c("1,2,3,4,5,6,7,8,9,10")
my_data = data.frame(var_1,var_2,var_3,var_4,var_5)
my_data = rbind(my_data, my_data[rep(1, 100), ])
rownames(my_data) = 1:nrow(my_data)
数据如下所示:
head(my_data)
var_1 var_2 var_3 var_4 var_5
1 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
2 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
3 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
4 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
5 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
6 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
我的问题:我想用 0 随机替换此数据框中的元素 - 例如,最终结果应该是这样的(为简洁起见,我只是显示第一行):
# desired result
var_1 var_2 var_3 var_4 var_5
1 1,0,3,0,5,6,0,0,9,10 1,2,0,4,5,0,0,8,9,0 1,0,3,0,0,0,0,8,9,0 1,2,3,4,0,6,7,0,0,10 1,2,0,4,5,0,7,8,0,10
我尝试使用以下代码行 () 执行此操作:
my_data$var_1[sample(nrow(my_data),as.integer(0.5*nrow(my_data)) , replace = TRUE)] <- 0
my_data$var_2[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_3[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_4[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_5[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
但是这是用 0 替换一行中的所有元素(而不是仅仅替换一行中的一些元素):
head(my_data)
var_1 var_2 var_3 var_4 var_5
1 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0 0 0
2 0 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0
3 0 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
4 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10
5 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
6 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0
有人可以告诉我我做错了什么以及如何得到想要的结果吗?
谢谢!
这可能是实现目标的一种方式:
在您的示例中,一列的每一行都被视为一个元素向量,因此当您替换它时,所有 1:10 都将替换为 0。
为了避免这种情况,一种可能的方法是使用 separate_row
将此向量与 tidyr
包分开,并在完成我们的计算后,将其带回 toString
:
library(dplyr)
library(tidyr)
my_data %>%
separate_rows(starts_with("var")) %>%
group_by(id_Group =cumsum(var_1==1)) %>%
mutate(across(starts_with("var"), ~replace(., sample(row_number(),
size = ceiling(0.4 * n()), replace = TRUE), 0))) %>%
mutate(across(starts_with("var"), ~toString(.))) %>%
slice(1) %>%
ungroup() %>%
select(-id_Group)
var_1 var_2 var_3 var_4 var_5
<chr> <chr> <chr> <chr> <chr>
1 1, 2, 0, 4, 0, 6, 7, 0, 9, 10 1, 2, 3, 0, 5, 0, 0, 8, 9, 0 0, 0, 3,~ 1, 2~ 0, 0~
2 1, 2, 3, 4, 0, 6, 0, 8, 9, 0 1, 2, 3, 0, 0, 6, 7, 8, 9, 0 1, 2, 3,~ 1, 0~ 1, 0~
3 1, 0, 3, 4, 5, 0, 7, 8, 9, 0 1, 2, 3, 0, 5, 0, 0, 0, 9, 10 1, 2, 0,~ 1, 2~ 1, 2~
4 1, 0, 0, 4, 5, 0, 7, 8, 9, 0 1, 2, 3, 4, 5, 0, 7, 0, 0, 10 0, 0, 0,~ 0, 0~ 0, 0~
5 0, 0, 3, 4, 0, 6, 7, 8, 9, 10 0, 0, 3, 4, 5, 6, 0, 8, 9, 10 1, 0, 3,~ 1, 2~ 1, 2~
6 1, 0, 0, 4, 5, 6, 0, 8, 9, 10 1, 2, 3, 0, 0, 6, 7, 8, 9, 0 1, 0, 3,~ 1, 2~ 1, 2~
7 1, 0, 3, 4, 5, 0, 0, 8, 0, 10 0, 2, 3, 0, 5, 6, 7, 8, 0, 0 1, 2, 3,~ 1, 2~ 1, 0~
8 1, 2, 0, 4, 0, 0, 7, 8, 9, 10 1, 2, 3, 0, 5, 0, 7, 8, 0, 0 1, 0, 3,~ 1, 0~ 0, 2~
9 1, 2, 3, 0, 5, 0, 0, 0, 9, 10 1, 2, 3, 4, 0, 0, 0, 8, 9, 0 1, 2, 0,~ 0, 0~ 0, 0~
10 1, 2, 3, 0, 5, 6, 7, 8, 9, 0 1, 2, 0, 4, 5, 6, 7, 8, 0, 10 1, 0, 3,~ 1, 2~ 1, 2~
# ... with 91 more rows
这是一个方法。
创建一个函数,从每个向量元素中采样 n
个元素,默认为 1,并且 lapply
函数到要更改的向量。在发布的示例中,所有列向量都将更改。
options(width=205)
var_1 = var_2 = var_3 = var_4 = var_5 = c("1,2,3,4,5,6,7,8,9,10")
my_data = data.frame(var_1,var_2,var_3,var_4,var_5)
my_data = rbind(my_data, my_data[rep(1, 100), ])
rownames(my_data) = 1:nrow(my_data)
my_data1 <- my_data
randReplace <- function(x, n = 1L, split = ",") {
y <- strsplit(x, split = split)
z <- lapply(y, function(.y) {
i <- seq_along(.y)
m <- min(n, length(.y))
.y[sample(i, m)] <- 0
.y
})
z <- lapply(z, paste, collapse = split)
unlist(z)
}
my_data[] <- lapply(my_data, randReplace)
head(my_data)
#> var_1 var_2 var_3 var_4 var_5
#> 1 1,2,3,4,5,0,7,8,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,0,7,8,9,10 1,2,3,4,0,6,7,8,9,10 1,2,3,0,5,6,7,8,9,10
#> 2 1,0,3,4,5,6,7,8,9,10 1,2,0,4,5,6,7,8,9,10 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,0,7,8,9,10 0,2,3,4,5,6,7,8,9,10
#> 3 1,2,3,4,0,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,0,8,9,10 1,2,3,4,0,6,7,8,9,10
#> 4 1,2,0,4,5,6,7,8,9,10 1,2,3,4,0,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10 1,2,3,4,5,6,0,8,9,10 1,0,3,4,5,6,7,8,9,10
#> 5 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,6,7,8,0,10 1,2,3,0,5,6,7,8,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10
#> 6 1,2,3,4,5,6,0,8,9,10 1,2,3,4,5,6,7,8,9,0 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,0 1,0,3,4,5,6,7,8,9,10
my_data1[] <- lapply(my_data1, randReplace, n = 4)
head(my_data1)
#> var_1 var_2 var_3 var_4 var_5
#> 1 1,2,3,4,0,0,0,8,9,0 0,2,0,0,5,6,7,8,9,0 1,0,0,4,0,6,0,8,9,10 0,0,3,4,0,0,7,8,9,10 1,0,3,4,5,6,0,8,0,0
#> 2 1,0,3,4,5,6,0,8,0,0 1,0,3,0,0,6,7,8,9,0 0,2,3,4,5,0,0,8,0,10 1,0,0,4,5,0,7,8,0,10 0,0,3,4,0,6,7,8,9,0
#> 3 0,2,3,4,5,6,7,0,0,0 0,2,0,0,5,6,7,8,0,10 1,2,0,4,5,6,0,0,9,0 0,2,0,0,5,0,7,8,9,10 0,2,0,4,5,0,7,8,9,0
#> 4 1,2,3,0,0,6,0,8,0,10 1,0,3,4,5,0,7,8,0,0 1,2,0,0,5,0,7,0,9,10 1,0,3,0,5,6,0,8,9,0 0,2,0,4,0,6,0,8,9,10
#> 5 1,2,3,4,0,0,7,0,9,0 0,0,0,0,5,6,7,8,9,10 0,0,3,4,5,6,0,0,9,10 0,2,3,4,0,0,7,0,9,10 1,0,0,0,5,6,7,8,0,10
#> 6 1,2,0,4,0,0,0,8,9,10 0,2,3,0,0,0,7,8,9,10 0,2,0,4,0,6,7,8,0,10 1,2,3,4,5,0,0,0,0,10 0,2,0,4,0,6,7,8,9,0
由 reprex package (v2.0.1)
于 2022-04-10 创建
这是一个版本,允许您使用 Map
在每一列中分别指定成为 0
的概率向量 pnul
。拆分字符串的 length
乘以 pnul
的元素以获得 sample
的数量设置为零。您还可以将 pnul
设置为所有列中具有相同概率的标量。
pnul <- c(.0, .2, .5, .8, 1)
res <- Map(\(x, a) {
S <- strsplit(x, ',')
sapply(S, \(s) {
s[sample(seq_along(s), length(s)*a)] <- '0'
paste(s, collapse=',')
})
}, my_data, pnul) |> as.data.frame()
head(res)
# var_1 var_2 var_3 var_4 var_5
# 1 1,2,3,4,5,6,7,8,9,10 0,0,3,4,5,6,7,8,9,10 1,2,0,4,0,0,7,8,0,0 0,0,0,0,0,0,0,8,9,0 0,0,0,0,0,0,0,0,0,0
# 2 1,2,3,4,5,6,7,8,9,10 1,0,3,4,5,6,7,8,9,0 1,0,3,0,5,0,0,0,9,10 0,0,0,0,0,0,7,8,0,0 0,0,0,0,0,0,0,0,0,0
# 3 1,2,3,4,5,6,7,8,9,10 1,0,0,4,5,6,7,8,9,10 1,0,0,0,0,6,7,0,9,10 0,0,0,0,5,0,0,0,0,10 0,0,0,0,0,0,0,0,0,0
# 4 1,2,3,4,5,6,7,8,9,10 1,2,3,0,5,6,7,0,9,10 0,0,3,0,5,0,7,0,9,10 0,0,0,4,0,0,7,0,0,0 0,0,0,0,0,0,0,0,0,0
# 5 1,2,3,4,5,6,7,8,9,10 1,0,3,4,5,6,7,8,9,0 0,2,0,4,5,0,7,0,0,10 1,0,0,0,0,0,0,8,0,0 0,0,0,0,0,0,0,0,0,0
# 6 1,2,3,4,5,6,7,8,9,10 0,2,3,4,5,6,0,8,9,10 1,2,3,0,5,0,7,0,0,0 0,0,0,4,5,0,0,0,0,0 0,0,0,0,0,0,0,0,0,0
我正在使用 R 编程语言。
假设我有以下数据框:
var_1 = var_2 = var_3 = var_4 = var_5 = c("1,2,3,4,5,6,7,8,9,10")
my_data = data.frame(var_1,var_2,var_3,var_4,var_5)
my_data = rbind(my_data, my_data[rep(1, 100), ])
rownames(my_data) = 1:nrow(my_data)
数据如下所示:
head(my_data)
var_1 var_2 var_3 var_4 var_5
1 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
2 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
3 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
4 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
5 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
6 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
我的问题:我想用 0 随机替换此数据框中的元素 - 例如,最终结果应该是这样的(为简洁起见,我只是显示第一行):
# desired result
var_1 var_2 var_3 var_4 var_5
1 1,0,3,0,5,6,0,0,9,10 1,2,0,4,5,0,0,8,9,0 1,0,3,0,0,0,0,8,9,0 1,2,3,4,0,6,7,0,0,10 1,2,0,4,5,0,7,8,0,10
我尝试使用以下代码行 (
my_data$var_1[sample(nrow(my_data),as.integer(0.5*nrow(my_data)) , replace = TRUE)] <- 0
my_data$var_2[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_3[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_4[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
my_data$var_5[sample(nrow(my_data),as.integer(0.5*nrow(my_data)), replace = TRUE)] <- 0
但是这是用 0 替换一行中的所有元素(而不是仅仅替换一行中的一些元素):
head(my_data)
var_1 var_2 var_3 var_4 var_5
1 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0 0 0
2 0 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0
3 0 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
4 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10
5 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10
6 1,2,3,4,5,6,7,8,9,10 0 1,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,10 0
有人可以告诉我我做错了什么以及如何得到想要的结果吗?
谢谢!
这可能是实现目标的一种方式:
在您的示例中,一列的每一行都被视为一个元素向量,因此当您替换它时,所有 1:10 都将替换为 0。
为了避免这种情况,一种可能的方法是使用 separate_row
将此向量与 tidyr
包分开,并在完成我们的计算后,将其带回 toString
:
library(dplyr)
library(tidyr)
my_data %>%
separate_rows(starts_with("var")) %>%
group_by(id_Group =cumsum(var_1==1)) %>%
mutate(across(starts_with("var"), ~replace(., sample(row_number(),
size = ceiling(0.4 * n()), replace = TRUE), 0))) %>%
mutate(across(starts_with("var"), ~toString(.))) %>%
slice(1) %>%
ungroup() %>%
select(-id_Group)
var_1 var_2 var_3 var_4 var_5
<chr> <chr> <chr> <chr> <chr>
1 1, 2, 0, 4, 0, 6, 7, 0, 9, 10 1, 2, 3, 0, 5, 0, 0, 8, 9, 0 0, 0, 3,~ 1, 2~ 0, 0~
2 1, 2, 3, 4, 0, 6, 0, 8, 9, 0 1, 2, 3, 0, 0, 6, 7, 8, 9, 0 1, 2, 3,~ 1, 0~ 1, 0~
3 1, 0, 3, 4, 5, 0, 7, 8, 9, 0 1, 2, 3, 0, 5, 0, 0, 0, 9, 10 1, 2, 0,~ 1, 2~ 1, 2~
4 1, 0, 0, 4, 5, 0, 7, 8, 9, 0 1, 2, 3, 4, 5, 0, 7, 0, 0, 10 0, 0, 0,~ 0, 0~ 0, 0~
5 0, 0, 3, 4, 0, 6, 7, 8, 9, 10 0, 0, 3, 4, 5, 6, 0, 8, 9, 10 1, 0, 3,~ 1, 2~ 1, 2~
6 1, 0, 0, 4, 5, 6, 0, 8, 9, 10 1, 2, 3, 0, 0, 6, 7, 8, 9, 0 1, 0, 3,~ 1, 2~ 1, 2~
7 1, 0, 3, 4, 5, 0, 0, 8, 0, 10 0, 2, 3, 0, 5, 6, 7, 8, 0, 0 1, 2, 3,~ 1, 2~ 1, 0~
8 1, 2, 0, 4, 0, 0, 7, 8, 9, 10 1, 2, 3, 0, 5, 0, 7, 8, 0, 0 1, 0, 3,~ 1, 0~ 0, 2~
9 1, 2, 3, 0, 5, 0, 0, 0, 9, 10 1, 2, 3, 4, 0, 0, 0, 8, 9, 0 1, 2, 0,~ 0, 0~ 0, 0~
10 1, 2, 3, 0, 5, 6, 7, 8, 9, 0 1, 2, 0, 4, 5, 6, 7, 8, 0, 10 1, 0, 3,~ 1, 2~ 1, 2~
# ... with 91 more rows
这是一个方法。
创建一个函数,从每个向量元素中采样 n
个元素,默认为 1,并且 lapply
函数到要更改的向量。在发布的示例中,所有列向量都将更改。
options(width=205)
var_1 = var_2 = var_3 = var_4 = var_5 = c("1,2,3,4,5,6,7,8,9,10")
my_data = data.frame(var_1,var_2,var_3,var_4,var_5)
my_data = rbind(my_data, my_data[rep(1, 100), ])
rownames(my_data) = 1:nrow(my_data)
my_data1 <- my_data
randReplace <- function(x, n = 1L, split = ",") {
y <- strsplit(x, split = split)
z <- lapply(y, function(.y) {
i <- seq_along(.y)
m <- min(n, length(.y))
.y[sample(i, m)] <- 0
.y
})
z <- lapply(z, paste, collapse = split)
unlist(z)
}
my_data[] <- lapply(my_data, randReplace)
head(my_data)
#> var_1 var_2 var_3 var_4 var_5
#> 1 1,2,3,4,5,0,7,8,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,0,7,8,9,10 1,2,3,4,0,6,7,8,9,10 1,2,3,0,5,6,7,8,9,10
#> 2 1,0,3,4,5,6,7,8,9,10 1,2,0,4,5,6,7,8,9,10 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,0,7,8,9,10 0,2,3,4,5,6,7,8,9,10
#> 3 1,2,3,4,0,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,0,8,9,10 1,2,3,4,0,6,7,8,9,10
#> 4 1,2,0,4,5,6,7,8,9,10 1,2,3,4,0,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10 1,2,3,4,5,6,0,8,9,10 1,0,3,4,5,6,7,8,9,10
#> 5 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,6,7,8,0,10 1,2,3,0,5,6,7,8,9,10 0,2,3,4,5,6,7,8,9,10 1,2,3,4,5,6,7,0,9,10
#> 6 1,2,3,4,5,6,0,8,9,10 1,2,3,4,5,6,7,8,9,0 1,2,3,0,5,6,7,8,9,10 1,2,3,4,5,6,7,8,9,0 1,0,3,4,5,6,7,8,9,10
my_data1[] <- lapply(my_data1, randReplace, n = 4)
head(my_data1)
#> var_1 var_2 var_3 var_4 var_5
#> 1 1,2,3,4,0,0,0,8,9,0 0,2,0,0,5,6,7,8,9,0 1,0,0,4,0,6,0,8,9,10 0,0,3,4,0,0,7,8,9,10 1,0,3,4,5,6,0,8,0,0
#> 2 1,0,3,4,5,6,0,8,0,0 1,0,3,0,0,6,7,8,9,0 0,2,3,4,5,0,0,8,0,10 1,0,0,4,5,0,7,8,0,10 0,0,3,4,0,6,7,8,9,0
#> 3 0,2,3,4,5,6,7,0,0,0 0,2,0,0,5,6,7,8,0,10 1,2,0,4,5,6,0,0,9,0 0,2,0,0,5,0,7,8,9,10 0,2,0,4,5,0,7,8,9,0
#> 4 1,2,3,0,0,6,0,8,0,10 1,0,3,4,5,0,7,8,0,0 1,2,0,0,5,0,7,0,9,10 1,0,3,0,5,6,0,8,9,0 0,2,0,4,0,6,0,8,9,10
#> 5 1,2,3,4,0,0,7,0,9,0 0,0,0,0,5,6,7,8,9,10 0,0,3,4,5,6,0,0,9,10 0,2,3,4,0,0,7,0,9,10 1,0,0,0,5,6,7,8,0,10
#> 6 1,2,0,4,0,0,0,8,9,10 0,2,3,0,0,0,7,8,9,10 0,2,0,4,0,6,7,8,0,10 1,2,3,4,5,0,0,0,0,10 0,2,0,4,0,6,7,8,9,0
由 reprex package (v2.0.1)
于 2022-04-10 创建这是一个版本,允许您使用 Map
在每一列中分别指定成为 0
的概率向量 pnul
。拆分字符串的 length
乘以 pnul
的元素以获得 sample
的数量设置为零。您还可以将 pnul
设置为所有列中具有相同概率的标量。
pnul <- c(.0, .2, .5, .8, 1)
res <- Map(\(x, a) {
S <- strsplit(x, ',')
sapply(S, \(s) {
s[sample(seq_along(s), length(s)*a)] <- '0'
paste(s, collapse=',')
})
}, my_data, pnul) |> as.data.frame()
head(res)
# var_1 var_2 var_3 var_4 var_5
# 1 1,2,3,4,5,6,7,8,9,10 0,0,3,4,5,6,7,8,9,10 1,2,0,4,0,0,7,8,0,0 0,0,0,0,0,0,0,8,9,0 0,0,0,0,0,0,0,0,0,0
# 2 1,2,3,4,5,6,7,8,9,10 1,0,3,4,5,6,7,8,9,0 1,0,3,0,5,0,0,0,9,10 0,0,0,0,0,0,7,8,0,0 0,0,0,0,0,0,0,0,0,0
# 3 1,2,3,4,5,6,7,8,9,10 1,0,0,4,5,6,7,8,9,10 1,0,0,0,0,6,7,0,9,10 0,0,0,0,5,0,0,0,0,10 0,0,0,0,0,0,0,0,0,0
# 4 1,2,3,4,5,6,7,8,9,10 1,2,3,0,5,6,7,0,9,10 0,0,3,0,5,0,7,0,9,10 0,0,0,4,0,0,7,0,0,0 0,0,0,0,0,0,0,0,0,0
# 5 1,2,3,4,5,6,7,8,9,10 1,0,3,4,5,6,7,8,9,0 0,2,0,4,5,0,7,0,0,10 1,0,0,0,0,0,0,8,0,0 0,0,0,0,0,0,0,0,0,0
# 6 1,2,3,4,5,6,7,8,9,10 0,2,3,4,5,6,0,8,9,10 1,2,3,0,5,0,7,0,0,0 0,0,0,4,5,0,0,0,0,0 0,0,0,0,0,0,0,0,0,0