R 中具有最大条件的样本

Sample in R with a maximum condition

为了评估我的学生的岩相学,我需要用 2 种不同的岩石制作盒子,学生需要认出它们。我想写一个 R 脚本来为每个盒子分配石头。

我是这样做的:

rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
for (i in 1:24) {
    samp <- sample(rocks,2)
    
    if (i==1) {
       box <- samp
    } else {
      box <- rbind(box,samp)
    }
}

效果很好

box
[,1]      [,2]     
"granite" "gneiss" 
"marl"    "chalk"  
"granite" "gneiss" 
"pumice"  "marl"
"chalk"   "granite"
"gneiss"  "granite"
"pumice"  "chalk"  
"gneiss"  "marl"   
"gneiss"  "pumice" 
"gneiss"  "pumice" 
"pumice"  "gneiss" 
"pumice"  "marl"
"marl"    "chalk"
"granite" "gneiss" 
"granite" "chalk" 
"gneiss"  "granite"
"pumice"  "granite"
"pumice"  "chalk"
"pumice"  "granite"
"gneiss"  "pumice" 
"pumice"  "granite"
"granite" "marl" 
"marl"    "gneiss" 
"pumice"  "gneiss" 

但是,对于岩石“粉笔”和“泥灰岩”,我只有 3 个样品。因此,我生成的dataframe box是不可行的。

如何让我的脚本只生成 3 个“chalk”和“marl”样本?

提前致谢,

不是最优雅的方式,但应该这样做:

rocks <- c("granite","pumice","gneiss","marl","chalk")
samp <- NULL
nb.chalk = 0
nb.marl = 0
for (i in 1:24) {
  # bad.sample: when 3 rocks from chalk and marl were already attributed but still sampled
  bad.sample = TRUE
  while (bad.sample){
    samp <- sample(rocks,2)
    bad.sample = (nb.chalk == 3 & 'chalk' %in% samp) | (nb.marl == 3 & 'marl'  %in% samp)
  }
  # count the numbers of chalk and marl already obtained
  if ('chalk'  %in% samp) nb.chalk = nb.chalk +1
  if ('marl'  %in% samp) nb.marl = nb.marl +1
  
  if (i==1) {
    box <- samp
  } else {
    box <- rbind(box,samp)
  }
}

这是一个没有 for 循环的 tidyverse 解决方案:

library(tidyverse)

rox <- bind_rows(
tibble(rox = c("marl","chalk")) %>% 
  split(.$rox) %>% 
  map_df(~.x %>% slice(rep(row_number(), 3))),

tibble(rox = c("granite","pumice","gneiss")) %>% 
  split(.$rox) %>% 
  map_df(~.x %>% slice(rep(row_number(), 10)))
)

box <- rox %>% 
  sample_n(24, replace = F) %>% 
  bind_cols(rox %>% 
              sample_n(24, replace = F)) %>% 
  rename(box_1 = 1, box_2 = 2)

table(box$box_1)
#> 
#>   chalk  gneiss granite  pumice 
#>       1       9       7       7
table(box$box_2)
#> 
#>   chalk  gneiss granite    marl  pumice 
#>       1       8       8       1       6

另一种方法是先清点可用的岩石,然后将 non-matching 对分配给洗牌的学生。

这确保了约束岩石得到充分利用。

library(tidyverse)

# Set rocks available
rocks_available <- tribble(
  ~rock, ~constraint,
  "granite", 14,
  "pumice", 14,
  "gneiss", 14,
  "marl", 3,
  "chalk", 3
) |> 
  uncount(constraint)

# Assign shuffled rocks
boxed <- rocks_available |> 
  group_by(rock) |> 
  mutate(rock_id = row_number()) |> 
  arrange(rock_id) |> 
  ungroup() |> 
  select(-rock_id) |> 
  mutate(student = rep(1:24, each = 2),
         pick = if_else(row_number() %% 2 == 1, "one", "two")) |> 
  pivot_wider(names_from = pick, names_prefix = "pick_", values_from = rock) |> 
  mutate(student = permute::shuffle(student))

# Peak in the boxes
boxed |> head()
#> # A tibble: 6 × 3
#>   student pick_one pick_two
#>     <int> <chr>    <chr>   
#> 1      20 granite  pumice  
#> 2       6 gneiss   marl    
#> 3      22 chalk    granite 
#> 4       9 pumice   gneiss  
#> 5      19 marl     chalk   
#> 6      24 granite  pumice
  
# Verify rocks boxed
boxed |> 
  pivot_longer(-student) |> 
  count(value)
#> # A tibble: 5 × 2
#>   value       n
#>   <chr>   <int>
#> 1 chalk       3
#> 2 gneiss     14
#> 3 granite    14
#> 4 marl        3
#> 5 pumice     14

reprex package (v2.0.1)

于 2022-06-04 创建