当我们有 NA 时从另一个数据帧子集数据帧

Subsetting a dataframe from another dataframe when we have an NA

我有一个包含我全部数据的数据框 (df1)

                    Measures                          Format
1            space and shape     Constructed Response Expert
2            space and shape     Constructed Response Manual
3            space and shape     Constructed Response Expert
4            space and shape          Simple Multiple Choice
5            space and shape Constructed Response Auto-coded
6   change and relationships     Constructed Response Expert
7   change and relationships     Constructed Response Expert
8   change and relationships     Constructed Response Expert
9   change and relationships         Complex Multiple Choice
10  change and relationships         Complex Multiple Choice
11           space and shape         Complex Multiple Choice
12           space and shape          Simple Multiple Choice
13           space and shape     Constructed Response Expert
14           space and shape     Constructed Response Expert
15      uncertainty and data         Complex Multiple Choice
16                  quantity     Constructed Response Manual
17      uncertainty and data          Simple Multiple Choice
18      uncertainty and data         Complex Multiple Choice
19      uncertainty and data          Simple Multiple Choice
20                  quantity     Constructed Response Manual
21  change and relationships     Constructed Response Manual
22  change and relationships     Constructed Response Expert
23           space and shape          Simple Multiple Choice
24           space and shape     Constructed Response Expert
25           space and shape Constructed Response Auto-coded
26                  quantity     Constructed Response Manual
27                  quantity         Complex Multiple Choice
28                  quantity     Constructed Response Manual
29                  quantity          Simple Multiple Choice
30                  quantity          Simple Multiple Choice
31      uncertainty and data          Simple Multiple Choice
32  change and relationships          Simple Multiple Choice
33                  quantity         Complex Multiple Choice
34                  quantity          Simple Multiple Choice
35      uncertainty and data Constructed Response Auto-coded
36  change and relationships     Constructed Response Expert
37      uncertainty and data     Constructed Response Manual
38                  quantity     Constructed Response Manual
39  change and relationships     Constructed Response Expert
40  change and relationships     Constructed Response Manual
41                  quantity         Complex Multiple Choice
42                  quantity     Constructed Response Expert
43                  quantity          Simple Multiple Choice
44                  quantity     Constructed Response Expert
45                  quantity     Constructed Response Manual
46                  quantity          Simple Multiple Choice
47  change and relationships     Constructed Response Expert
48      uncertainty and data          Simple Multiple Choice
49  change and relationships     Constructed Response Manual
50      uncertainty and data          Simple Multiple Choice
51      uncertainty and data          Simple Multiple Choice
52      uncertainty and data          Simple Multiple Choice
53                  quantity     Constructed Response Manual
54                  quantity     Constructed Response Manual
55                  quantity          Simple Multiple Choice
56           space and shape          Simple Multiple Choice
57  change and relationships     Constructed Response Expert
58                  quantity     Constructed Response Manual
59           space and shape     Constructed Response Manual
60           space and shape          Simple Multiple Choice
61  change and relationships     Constructed Response Manual
62  change and relationships     Constructed Response Expert
63      uncertainty and data          Simple Multiple Choice
64      uncertainty and data          Simple Multiple Choice
65                  quantity          Simple Multiple Choice
66  change and relationships     Constructed Response Expert
67                  quantity     Constructed Response Manual
68  change and relationships          Simple Multiple Choice
69           space and shape     Constructed Response Expert
70                  quantity          Simple Multiple Choice
71                  quantity     Constructed Response Manual
72                  quantity     Constructed Response Expert
73           space and shape         Complex Multiple Choice
74           space and shape         Complex Multiple Choice
75           space and shape     Constructed Response Expert
76      uncertainty and data     Constructed Response Expert
77      uncertainty and data     Constructed Response Manual
78      uncertainty and data     Constructed Response Expert
79  change and relationships     Constructed Response Manual
80  change and relationships     Constructed Response Expert
81  change and relationships     Constructed Response Expert
82      uncertainty and data     Constructed Response Manual
83      uncertainty and data     Constructed Response Expert
84      uncertainty and data     Constructed Response Expert
85  change and relationships          Simple Multiple Choice
86  change and relationships          Simple Multiple Choice
87  change and relationships     Constructed Response Manual
88  change and relationships     Constructed Response Expert
89  change and relationships          Simple Multiple Choice
90      uncertainty and data     Constructed Response Expert
91           space and shape     Constructed Response Manual
92           space and shape         Complex Multiple Choice
93      uncertainty and data     Constructed Response Manual
94      uncertainty and data     Constructed Response Manual
95      uncertainty and data         Complex Multiple Choice
96      uncertainty and data          Simple Multiple Choice
97      uncertainty and data          Simple Multiple Choice
98                  quantity          Simple Multiple Choice
99                  quantity     Constructed Response Manual
100          space and shape          Simple Multiple Choice
101          space and shape     Constructed Response Expert
102          space and shape     Constructed Response Manual
103          space and shape     Constructed Response Manual
104 change and relationships     Constructed Response Expert
105          space and shape     Constructed Response Manual
106          space and shape     Constructed Response Expert
107                 quantity          Simple Multiple Choice
108 change and relationships     Constructed Response Manual
109 change and relationships         Complex Multiple Choice

我有另一个数据框 df2(注意它有一个 'number' 列),我用它从我的 df1 中提取子集. 'Number' 列告诉我“多少”这种类型的行我想从我的原始数据集 (df1)

         Measures                          Format Number
1 space and shape     Constructed Response Expert      2
2 space and shape     Constructed Response Manual      1
4 space and shape          Simple Multiple Choice      2
5 space and shape Constructed Response Auto-coded      1
6           asdaf                           asfas      0

我使用下面的代码来做到这一点

library(tidyverse)

inner_join(df1,df2) %>% 
  group_by(Measures, Format) %>% 
  slice(n=1:min(Number)) %>% 
  ungroup

但是假设我的数据集看起来像这样(注意我们有一个 'NA')。在这种情况下,我想获得 任何格式 的 'space and shape' 任何行的 4 种类型(Ofc 我不希望它重复 - 我的意思是第二个,第 4 行和第 5 行也要求 'space and shape'。当我要求 space 的任何格式和第 1 行中的形状时,我不希望重复这些行。

         Measures                          Format Number
1 space and shape                            <NA>      4
2 space and shape     Constructed Response Manual      1
4 space and shape          Simple Multiple Choice      2
5 space and shape Constructed Response Auto-coded      1
6           asdaf                           asfas      0

我该怎么做?

数据如下:

df1

df1<-structure(list(Measures = c("space and shape", "space and shape", 
    "space and shape", "space and shape", "space and shape", "change and relationships", 
    "change and relationships", "change and relationships", "change and relationships", 
    "change and relationships", "space and shape", "space and shape", 
    "space and shape", "space and shape", "uncertainty and data", 
    "quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data", 
    "quantity", "change and relationships", "change and relationships", 
    "space and shape", "space and shape", "space and shape", "quantity", 
    "quantity", "quantity", "quantity", "quantity", "uncertainty and data", 
    "change and relationships", "quantity", "quantity", "uncertainty and data", 
    "change and relationships", "uncertainty and data", "quantity", 
    "change and relationships", "change and relationships", "quantity", 
    "quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships", 
    "uncertainty and data", "change and relationships", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "quantity", "quantity", 
    "quantity", "space and shape", "change and relationships", "quantity", 
    "space and shape", "space and shape", "change and relationships", 
    "change and relationships", "uncertainty and data", "uncertainty and data", 
    "quantity", "change and relationships", "quantity", "change and relationships", 
    "space and shape", "quantity", "quantity", "quantity", "space and shape", 
    "space and shape", "space and shape", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "change and relationships", 
    "change and relationships", "change and relationships", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "change and relationships", 
    "change and relationships", "change and relationships", "change and relationships", 
    "change and relationships", "uncertainty and data", "space and shape", 
    "space and shape", "uncertainty and data", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "uncertainty and data", 
    "quantity", "quantity", "space and shape", "space and shape", 
    "space and shape", "space and shape", "change and relationships", 
    "space and shape", "space and shape", "quantity", "change and relationships", 
    "change and relationships"), Format = c("Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Auto-coded", 
    "Constructed Response Expert", "Constructed Response Expert", 
    "Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual", 
    "Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Manual", 
    "Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Auto-coded", "Constructed Response Manual", 
    "Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
    "Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Expert", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
    "Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
    )), row.names = c(NA, -109L), class = "data.frame")

df2(无 NA)

df2<- structure(list(Measures = c("space and shape", "space and shape", 
    "space and shape", "space and shape", "asdaf"), Format = c("Constructed Response Expert", 
    "Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded", 
    "asfas"), Number = c(2, 1, 2, 1, 0)), row.names = c("1", "2", 
    "4", "5", "6"), class = "data.frame")

df2(含 NA)

df2<- structure(list(Measures = c("space and shape", "space and shape", "space and shape", "space and shape", "asdaf"), Format = c(NA, "Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded", "asfas"), Number = c(4, 1, 2, 1, 0)), row.names = c("1", "2", "4", "5", "6"), class = "data.frame")

这里是一个预期输出的例子(也可以是别的)。我要求 4 种类型的 'space and shape' 行,它们可以是 'any' 格式(因为我已经放了 NA):

library(tidyverse)

df1 <- structure(list(Measures = c(
  "space and shape", "space and shape",
  "space and shape", "space and shape", "space and shape", "change and relationships",
  "change and relationships", "change and relationships", "change and relationships",
  "change and relationships", "space and shape", "space and shape",
  "space and shape", "space and shape", "uncertainty and data",
  "quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data",
  "quantity", "change and relationships", "change and relationships",
  "space and shape", "space and shape", "space and shape", "quantity",
  "quantity", "quantity", "quantity", "quantity", "uncertainty and data",
  "change and relationships", "quantity", "quantity", "uncertainty and data",
  "change and relationships", "uncertainty and data", "quantity",
  "change and relationships", "change and relationships", "quantity",
  "quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships",
  "uncertainty and data", "change and relationships", "uncertainty and data",
  "uncertainty and data", "uncertainty and data", "quantity", "quantity",
  "quantity", "space and shape", "change and relationships", "quantity",
  "space and shape", "space and shape", "change and relationships",
  "change and relationships", "uncertainty and data", "uncertainty and data",
  "quantity", "change and relationships", "quantity", "change and relationships",
  "space and shape", "quantity", "quantity", "quantity", "space and shape",
  "space and shape", "space and shape", "uncertainty and data",
  "uncertainty and data", "uncertainty and data", "change and relationships",
  "change and relationships", "change and relationships", "uncertainty and data",
  "uncertainty and data", "uncertainty and data", "change and relationships",
  "change and relationships", "change and relationships", "change and relationships",
  "change and relationships", "uncertainty and data", "space and shape",
  "space and shape", "uncertainty and data", "uncertainty and data",
  "uncertainty and data", "uncertainty and data", "uncertainty and data",
  "quantity", "quantity", "space and shape", "space and shape",
  "space and shape", "space and shape", "change and relationships",
  "space and shape", "space and shape", "quantity", "change and relationships",
  "change and relationships"
), Format = c(
  "Constructed Response Expert",
  "Constructed Response Manual", "Constructed Response Expert",
  "Simple Multiple Choice", "Constructed Response Auto-coded",
  "Constructed Response Expert", "Constructed Response Expert",
  "Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice",
  "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
  "Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual",
  "Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice",
  "Constructed Response Manual", "Constructed Response Manual",
  "Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert",
  "Constructed Response Auto-coded", "Constructed Response Manual",
  "Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
  "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
  "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded",
  "Constructed Response Expert", "Constructed Response Manual",
  "Constructed Response Manual", "Constructed Response Expert",
  "Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert",
  "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
  "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
  "Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice",
  "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
  "Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
  "Constructed Response Manual", "Constructed Response Manual",
  "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert",
  "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
  "Constructed Response Expert", "Constructed Response Manual",
  "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
  "Constructed Response Manual", "Constructed Response Expert",
  "Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert",
  "Constructed Response Expert", "Constructed Response Manual",
  "Constructed Response Expert", "Constructed Response Manual",
  "Constructed Response Expert", "Constructed Response Expert",
  "Constructed Response Manual", "Constructed Response Expert",
  "Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice",
  "Constructed Response Manual", "Constructed Response Expert",
  "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
  "Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
  "Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
  "Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
  "Constructed Response Expert", "Constructed Response Manual",
  "Constructed Response Manual", "Constructed Response Expert",
  "Constructed Response Manual", "Constructed Response Expert",
  "Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
)), row.names = c(NA, -109L), class = "data.frame")

df2 <- structure(list(Measures = c("space and shape", "space and shape", "space and shape", "space and shape", "asdaf"), Format = c(NA, "Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded", "asfas"), Number = c(4, 1, 2, 1, 0)), row.names = c("1", "2", "4", "5", "6"), class = "data.frame")


set.seed(1337)

df2 %>%
  nrow() %>%
  seq() %>%
  map(~ {
    row <- df2 %>%
      slice(.x) %>%
      as.list()
    if (is.na(row$Format)) {
      # any format
      df1 %>%
        filter(Measures == row$Measures) %>%
        sample_n(row$Number) %>%
        mutate(Number = row$Number)
    } else {
      df1 %>%
        filter(Measures == row$Measures & Format == row$Format) %>%
        sample_n(row$Number) %>%
        mutate(Number = row$Number)
    }
  }) %>%
  bind_rows()
#>          Measures                          Format Number
#> 1 space and shape          Simple Multiple Choice      4
#> 2 space and shape     Constructed Response Expert      4
#> 3 space and shape         Complex Multiple Choice      4
#> 4 space and shape         Complex Multiple Choice      4
#> 5 space and shape     Constructed Response Manual      1
#> 6 space and shape          Simple Multiple Choice      2
#> 7 space and shape          Simple Multiple Choice      2
#> 8 space and shape Constructed Response Auto-coded      1

reprex package (v2.0.0)

于 2022-05-03 创建