R 中具有最大条件的样本
Sample in R with a maximum condition
为了评估我的学生的岩相学,我需要用 2 种不同的岩石制作盒子,学生需要认出它们。我想写一个 R 脚本来为每个盒子分配石头。
我是这样做的:
rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
for (i in 1:24) {
samp <- sample(rocks,2)
if (i==1) {
box <- samp
} else {
box <- rbind(box,samp)
}
}
效果很好
box
[,1] [,2]
"granite" "gneiss"
"marl" "chalk"
"granite" "gneiss"
"pumice" "marl"
"chalk" "granite"
"gneiss" "granite"
"pumice" "chalk"
"gneiss" "marl"
"gneiss" "pumice"
"gneiss" "pumice"
"pumice" "gneiss"
"pumice" "marl"
"marl" "chalk"
"granite" "gneiss"
"granite" "chalk"
"gneiss" "granite"
"pumice" "granite"
"pumice" "chalk"
"pumice" "granite"
"gneiss" "pumice"
"pumice" "granite"
"granite" "marl"
"marl" "gneiss"
"pumice" "gneiss"
但是,对于岩石“粉笔”和“泥灰岩”,我只有 3 个样品。因此,我生成的dataframe box是不可行的。
如何让我的脚本只生成 3 个“chalk”和“marl”样本?
提前致谢,
不是最优雅的方式,但应该这样做:
rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
nb.chalk = 0
nb.marl = 0
for (i in 1:24) {
# bad.sample: when 3 rocks from chalk and marl were already attributed but still sampled
bad.sample = TRUE
while (bad.sample){
samp <- sample(rocks,2)
bad.sample = (nb.chalk == 3 & 'chalk' %in% samp) | (nb.marl == 3 & 'marl' %in% samp)
}
# count the numbers of chalk and marl already obtained
if ('chalk' %in% samp) nb.chalk = nb.chalk +1
if ('marl' %in% samp) nb.marl = nb.marl +1
if (i==1) {
box <- samp
} else {
box <- rbind(box,samp)
}
}
这是一个没有 for 循环的 tidyverse
解决方案:
library(tidyverse)
rox <- bind_rows(
tibble(rox = c("marl","chalk")) %>%
split(.$rox) %>%
map_df(~.x %>% slice(rep(row_number(), 3))),
tibble(rox = c("granite","pumice","gneiss")) %>%
split(.$rox) %>%
map_df(~.x %>% slice(rep(row_number(), 10)))
)
box <- rox %>%
sample_n(24, replace = F) %>%
bind_cols(rox %>%
sample_n(24, replace = F)) %>%
rename(box_1 = 1, box_2 = 2)
table(box$box_1)
#>
#> chalk gneiss granite pumice
#> 1 9 7 7
table(box$box_2)
#>
#> chalk gneiss granite marl pumice
#> 1 8 8 1 6
另一种方法是先清点可用的岩石,然后将 non-matching 对分配给洗牌的学生。
这确保了约束岩石得到充分利用。
library(tidyverse)
# Set rocks available
rocks_available <- tribble(
~rock, ~constraint,
"granite", 14,
"pumice", 14,
"gneiss", 14,
"marl", 3,
"chalk", 3
) |>
uncount(constraint)
# Assign shuffled rocks
boxed <- rocks_available |>
group_by(rock) |>
mutate(rock_id = row_number()) |>
arrange(rock_id) |>
ungroup() |>
select(-rock_id) |>
mutate(student = rep(1:24, each = 2),
pick = if_else(row_number() %% 2 == 1, "one", "two")) |>
pivot_wider(names_from = pick, names_prefix = "pick_", values_from = rock) |>
mutate(student = permute::shuffle(student))
# Peak in the boxes
boxed |> head()
#> # A tibble: 6 × 3
#> student pick_one pick_two
#> <int> <chr> <chr>
#> 1 20 granite pumice
#> 2 6 gneiss marl
#> 3 22 chalk granite
#> 4 9 pumice gneiss
#> 5 19 marl chalk
#> 6 24 granite pumice
# Verify rocks boxed
boxed |>
pivot_longer(-student) |>
count(value)
#> # A tibble: 5 × 2
#> value n
#> <chr> <int>
#> 1 chalk 3
#> 2 gneiss 14
#> 3 granite 14
#> 4 marl 3
#> 5 pumice 14
由 reprex package (v2.0.1)
于 2022-06-04 创建
为了评估我的学生的岩相学,我需要用 2 种不同的岩石制作盒子,学生需要认出它们。我想写一个 R 脚本来为每个盒子分配石头。
我是这样做的:
rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
for (i in 1:24) {
samp <- sample(rocks,2)
if (i==1) {
box <- samp
} else {
box <- rbind(box,samp)
}
}
效果很好
box
[,1] [,2]
"granite" "gneiss"
"marl" "chalk"
"granite" "gneiss"
"pumice" "marl"
"chalk" "granite"
"gneiss" "granite"
"pumice" "chalk"
"gneiss" "marl"
"gneiss" "pumice"
"gneiss" "pumice"
"pumice" "gneiss"
"pumice" "marl"
"marl" "chalk"
"granite" "gneiss"
"granite" "chalk"
"gneiss" "granite"
"pumice" "granite"
"pumice" "chalk"
"pumice" "granite"
"gneiss" "pumice"
"pumice" "granite"
"granite" "marl"
"marl" "gneiss"
"pumice" "gneiss"
但是,对于岩石“粉笔”和“泥灰岩”,我只有 3 个样品。因此,我生成的dataframe box是不可行的。
如何让我的脚本只生成 3 个“chalk”和“marl”样本?
提前致谢,
不是最优雅的方式,但应该这样做:
rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
nb.chalk = 0
nb.marl = 0
for (i in 1:24) {
# bad.sample: when 3 rocks from chalk and marl were already attributed but still sampled
bad.sample = TRUE
while (bad.sample){
samp <- sample(rocks,2)
bad.sample = (nb.chalk == 3 & 'chalk' %in% samp) | (nb.marl == 3 & 'marl' %in% samp)
}
# count the numbers of chalk and marl already obtained
if ('chalk' %in% samp) nb.chalk = nb.chalk +1
if ('marl' %in% samp) nb.marl = nb.marl +1
if (i==1) {
box <- samp
} else {
box <- rbind(box,samp)
}
}
这是一个没有 for 循环的 tidyverse
解决方案:
library(tidyverse)
rox <- bind_rows(
tibble(rox = c("marl","chalk")) %>%
split(.$rox) %>%
map_df(~.x %>% slice(rep(row_number(), 3))),
tibble(rox = c("granite","pumice","gneiss")) %>%
split(.$rox) %>%
map_df(~.x %>% slice(rep(row_number(), 10)))
)
box <- rox %>%
sample_n(24, replace = F) %>%
bind_cols(rox %>%
sample_n(24, replace = F)) %>%
rename(box_1 = 1, box_2 = 2)
table(box$box_1)
#>
#> chalk gneiss granite pumice
#> 1 9 7 7
table(box$box_2)
#>
#> chalk gneiss granite marl pumice
#> 1 8 8 1 6
另一种方法是先清点可用的岩石,然后将 non-matching 对分配给洗牌的学生。
这确保了约束岩石得到充分利用。
library(tidyverse)
# Set rocks available
rocks_available <- tribble(
~rock, ~constraint,
"granite", 14,
"pumice", 14,
"gneiss", 14,
"marl", 3,
"chalk", 3
) |>
uncount(constraint)
# Assign shuffled rocks
boxed <- rocks_available |>
group_by(rock) |>
mutate(rock_id = row_number()) |>
arrange(rock_id) |>
ungroup() |>
select(-rock_id) |>
mutate(student = rep(1:24, each = 2),
pick = if_else(row_number() %% 2 == 1, "one", "two")) |>
pivot_wider(names_from = pick, names_prefix = "pick_", values_from = rock) |>
mutate(student = permute::shuffle(student))
# Peak in the boxes
boxed |> head()
#> # A tibble: 6 × 3
#> student pick_one pick_two
#> <int> <chr> <chr>
#> 1 20 granite pumice
#> 2 6 gneiss marl
#> 3 22 chalk granite
#> 4 9 pumice gneiss
#> 5 19 marl chalk
#> 6 24 granite pumice
# Verify rocks boxed
boxed |>
pivot_longer(-student) |>
count(value)
#> # A tibble: 5 × 2
#> value n
#> <chr> <int>
#> 1 chalk 3
#> 2 gneiss 14
#> 3 granite 14
#> 4 marl 3
#> 5 pumice 14
由 reprex package (v2.0.1)
于 2022-06-04 创建