调用函数中其他列的控制流程

Question

我正在尝试在给定条件的情况下连接到函数中的其他列。

本质上，我想让一个数据框在给定条件的情况下从长变宽，其中一列中的那些值是 NA 相对于在同一行中具有值的另一列，将 NAs 转换成特定的数字。

尽管分配的值必须特定于列。因此，如果 2010 具有 NAs 而 2019 具有值，则 return 16 否则，如果 2019 在同一行中具有 NA， 2010 有值，return 16.

我尝试过的：

# A tibble: 26 x 4
   year  locality_id landcover  pland
   <chr> <chr>           <int>  <dbl>
 1 2010  L2228604           10 0.0645
 2 2010  L2228604           13 0.935 
 3 2010  L452817             8 0.0968
 4 2010  L452817             9 0.0323
 5 2010  L452817            12 0.613 
 6 2010  L452817            13 0.194 
 7 2010  L452817            14 0.0645
 8 2010  L596267             0 0.194 
 9 2010  L596267             9 0.0323
10 2010  L596267            11 0.0645
# ... with 16 more rows

p <- function(x){
    if(x == 'NA') {
        return(16)
    } else {
        17
    }
}

#wrong outcome
# A tibble: 26 x 4
   locality_id  pland `2010` `2019`
   <chr>        <dbl>  <dbl>  <dbl>
 1 L2228604    0.0645     17     NA
 2 L2228604    0.935      17     NA
 3 L452817     0.0968     17     NA
 4 L452817     0.0323     17     NA
 5 L452817     0.613      17     NA
 6 L452817     0.194      17     NA
 7 L452817     0.0645     17     NA
 8 L596267     0.194      17     NA
 9 L596267     0.0323     17     NA
10 L596267     0.0645     17     NA
# ... with 16 more rows

我期待的是：

# A tibble: 26 x 4
   locality_id  pland `2010` `2019`
   <chr>        <dbl>  <int>  <int>
 1 L2228604    0.0645     10     17
 2 L2228604    0.935      13     17
 3 L452817     0.0968      8     17
 4 L452817     0.0323      9     17
 5 L9185766 0.54838710    16      8
 6 L9185766 0.19354839    16      9
 7 L9185766 0.03225806    16     13
 8 L9185766 0.16129032    16     14
 9 L9234578 1.00000000    16     12

可重现代码：

structure(list(year = c("2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019"), locality_id = c("L2228604", 
"L2228604", "L452817", "L452817", "L452817", "L452817", "L452817", 
"L596267", "L596267", "L596267", "L596267", "L152650", "L910180", 
"L910180", "L910180", "L4791597", "L4791597", "L9149985", "L9149985", 
"L9149985", "L9185766", "L9185766", "L9185766", "L9185766", "L9185766", 
"L9234578"), landcover = c(10L, 13L, 8L, 9L, 12L, 13L, 14L, 0L, 
9L, 11L, 13L, 13L, 0L, 8L, 9L, 5L, 8L, 10L, 11L, 12L, 4L, 8L, 
9L, 13L, 14L, 12L), pland = c(0.0645161290322581, 0.935483870967742, 
0.0967741935483871, 0.032258064516129, 0.612903225806452, 0.193548387096774, 
0.0645161290322581, 0.193548387096774, 0.032258064516129, 0.0645161290322581, 
0.709677419354839, 1, 0.4375, 0.34375, 0.03125, 0.566666666666667, 
0.0333333333333333, 0.1, 0.0333333333333333, 0.866666666666667, 
0.0645161290322581, 0.548387096774194, 0.193548387096774, 0.032258064516129, 
0.161290322580645, 1)), row.names = c(NA, -26L), class = c("tbl_df", 
"tbl", "data.frame"))

Answer 1

有一个名为 libr 的包，它具有 datastep() 功能，可以轻松完成此操作。 datastep() 函数允许您逐行遍历数据，并根据列的值做出决策。您还可以即时创建新列。

这是针对您的特定问题的一些代码。我将您的样本数据放在一个名为 dat:

的小标题中

library(libr)


final <- datastep(dat, 
                  keep = c("locality_id", "pland", "2010", "2019"), 
                  {
                    
                    if (year == "2010") {
                      "2010" <- landcover
                      "2019" <- 17
                    } else if (year == "2019") {
                      "2010" <- 16
                      "2019" <- landcover 
                    }

                  })

以上小标题 final 包含以下结果：

# locality_id      pland 2010 2019
# 1     L2228604 0.06451613   10   17
# 2     L2228604 0.93548387   13   17
# 3      L452817 0.09677419    8   17
# 4      L452817 0.03225806    9   17
# 5      L452817 0.61290323   12   17
# 6      L452817 0.19354839   13   17
# 7      L452817 0.06451613   14   17
# 8      L596267 0.19354839    0   17
# 9      L596267 0.03225806    9   17
# 10     L596267 0.06451613   11   17
# 11     L596267 0.70967742   13   17
# 12     L152650 1.00000000   13   17
# 13     L910180 0.43750000    0   17
# 14     L910180 0.34375000    8   17
# 15     L910180 0.03125000    9   17
# 16    L4791597 0.56666667   16    5
# 17    L4791597 0.03333333   16    8
# 18    L9149985 0.10000000   16   10
# 19    L9149985 0.03333333   16   11
# 20    L9149985 0.86666667   16   12
# 21    L9185766 0.06451613   16    4
# 22    L9185766 0.54838710   16    8
# 23    L9185766 0.19354839   16    9
# 24    L9185766 0.03225806   16   13
# 25    L9185766 0.16129032   16   14
# 26    L9234578 1.00000000   16   12

Answer 2

您可以获得宽格式的数据并根据列名替换 NA 值。

library(dplyr)
library(tidyr)

df %>%
  pivot_wider(names_from = year, values_from = landcover) %>%
  mutate(across(`2010`:`2019`, ~if(cur_column() == '2019') 
                               replace_na(.x, 17) else replace_na(.x, 16))) 


#   locality_id  pland `2010` `2019`
#   <chr>        <dbl>  <dbl>  <dbl>
# 1 L2228604    0.0645     10     17
# 2 L2228604    0.935      13     17
# 3 L452817     0.0968      8     17
# 4 L452817     0.0323      9     17
# 5 L452817     0.613      12     17
# 6 L452817     0.194      13     17
# 7 L452817     0.0645     14     17
# 8 L596267     0.194       0     17
# 9 L596267     0.0323      9     17
#10 L596267     0.0645     11     17
# … with 16 more rows

调用函数中其他列的控制流程

control flow for calling other columns in function

pivot

r

function

control-flow