调用函数中其他列的控制流程
control flow for calling other columns in function
我正在尝试在给定条件的情况下连接到函数中的其他列。
本质上,我想让一个数据框在给定条件的情况下从长变宽,其中一列中的那些值是 NA
相对于在同一行中具有值的另一列,将 NAs
转换成特定的数字。
尽管分配的值必须特定于列。因此,如果 2010
具有 NAs
而 2019
具有值,则 return 16
否则,如果 2019
在同一行中具有 NA, 2010
有值,return 16
.
我尝试过的:
# A tibble: 26 x 4
year locality_id landcover pland
<chr> <chr> <int> <dbl>
1 2010 L2228604 10 0.0645
2 2010 L2228604 13 0.935
3 2010 L452817 8 0.0968
4 2010 L452817 9 0.0323
5 2010 L452817 12 0.613
6 2010 L452817 13 0.194
7 2010 L452817 14 0.0645
8 2010 L596267 0 0.194
9 2010 L596267 9 0.0323
10 2010 L596267 11 0.0645
# ... with 16 more rows
p <- function(x){
if(x == 'NA') {
return(16)
} else {
17
}
}
#wrong outcome
# A tibble: 26 x 4
locality_id pland `2010` `2019`
<chr> <dbl> <dbl> <dbl>
1 L2228604 0.0645 17 NA
2 L2228604 0.935 17 NA
3 L452817 0.0968 17 NA
4 L452817 0.0323 17 NA
5 L452817 0.613 17 NA
6 L452817 0.194 17 NA
7 L452817 0.0645 17 NA
8 L596267 0.194 17 NA
9 L596267 0.0323 17 NA
10 L596267 0.0645 17 NA
# ... with 16 more rows
我期待的是:
# A tibble: 26 x 4
locality_id pland `2010` `2019`
<chr> <dbl> <int> <int>
1 L2228604 0.0645 10 17
2 L2228604 0.935 13 17
3 L452817 0.0968 8 17
4 L452817 0.0323 9 17
5 L9185766 0.54838710 16 8
6 L9185766 0.19354839 16 9
7 L9185766 0.03225806 16 13
8 L9185766 0.16129032 16 14
9 L9234578 1.00000000 16 12
可重现代码:
structure(list(year = c("2010", "2010", "2010", "2010", "2010",
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010",
"2010", "2010", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019"), locality_id = c("L2228604",
"L2228604", "L452817", "L452817", "L452817", "L452817", "L452817",
"L596267", "L596267", "L596267", "L596267", "L152650", "L910180",
"L910180", "L910180", "L4791597", "L4791597", "L9149985", "L9149985",
"L9149985", "L9185766", "L9185766", "L9185766", "L9185766", "L9185766",
"L9234578"), landcover = c(10L, 13L, 8L, 9L, 12L, 13L, 14L, 0L,
9L, 11L, 13L, 13L, 0L, 8L, 9L, 5L, 8L, 10L, 11L, 12L, 4L, 8L,
9L, 13L, 14L, 12L), pland = c(0.0645161290322581, 0.935483870967742,
0.0967741935483871, 0.032258064516129, 0.612903225806452, 0.193548387096774,
0.0645161290322581, 0.193548387096774, 0.032258064516129, 0.0645161290322581,
0.709677419354839, 1, 0.4375, 0.34375, 0.03125, 0.566666666666667,
0.0333333333333333, 0.1, 0.0333333333333333, 0.866666666666667,
0.0645161290322581, 0.548387096774194, 0.193548387096774, 0.032258064516129,
0.161290322580645, 1)), row.names = c(NA, -26L), class = c("tbl_df",
"tbl", "data.frame"))
有一个名为 libr 的包,它具有 datastep()
功能,可以轻松完成此操作。 datastep()
函数允许您逐行遍历数据,并根据列的值做出决策。您还可以即时创建新列。
这是针对您的特定问题的一些代码。我将您的样本数据放在一个名为 dat
:
的小标题中
library(libr)
final <- datastep(dat,
keep = c("locality_id", "pland", "2010", "2019"),
{
if (year == "2010") {
"2010" <- landcover
"2019" <- 17
} else if (year == "2019") {
"2010" <- 16
"2019" <- landcover
}
})
以上小标题 final
包含以下结果:
# locality_id pland 2010 2019
# 1 L2228604 0.06451613 10 17
# 2 L2228604 0.93548387 13 17
# 3 L452817 0.09677419 8 17
# 4 L452817 0.03225806 9 17
# 5 L452817 0.61290323 12 17
# 6 L452817 0.19354839 13 17
# 7 L452817 0.06451613 14 17
# 8 L596267 0.19354839 0 17
# 9 L596267 0.03225806 9 17
# 10 L596267 0.06451613 11 17
# 11 L596267 0.70967742 13 17
# 12 L152650 1.00000000 13 17
# 13 L910180 0.43750000 0 17
# 14 L910180 0.34375000 8 17
# 15 L910180 0.03125000 9 17
# 16 L4791597 0.56666667 16 5
# 17 L4791597 0.03333333 16 8
# 18 L9149985 0.10000000 16 10
# 19 L9149985 0.03333333 16 11
# 20 L9149985 0.86666667 16 12
# 21 L9185766 0.06451613 16 4
# 22 L9185766 0.54838710 16 8
# 23 L9185766 0.19354839 16 9
# 24 L9185766 0.03225806 16 13
# 25 L9185766 0.16129032 16 14
# 26 L9234578 1.00000000 16 12
您可以获得宽格式的数据并根据列名替换 NA
值。
library(dplyr)
library(tidyr)
df %>%
pivot_wider(names_from = year, values_from = landcover) %>%
mutate(across(`2010`:`2019`, ~if(cur_column() == '2019')
replace_na(.x, 17) else replace_na(.x, 16)))
# locality_id pland `2010` `2019`
# <chr> <dbl> <dbl> <dbl>
# 1 L2228604 0.0645 10 17
# 2 L2228604 0.935 13 17
# 3 L452817 0.0968 8 17
# 4 L452817 0.0323 9 17
# 5 L452817 0.613 12 17
# 6 L452817 0.194 13 17
# 7 L452817 0.0645 14 17
# 8 L596267 0.194 0 17
# 9 L596267 0.0323 9 17
#10 L596267 0.0645 11 17
# … with 16 more rows
我正在尝试在给定条件的情况下连接到函数中的其他列。
本质上,我想让一个数据框在给定条件的情况下从长变宽,其中一列中的那些值是 NA
相对于在同一行中具有值的另一列,将 NAs
转换成特定的数字。
尽管分配的值必须特定于列。因此,如果 2010
具有 NAs
而 2019
具有值,则 return 16
否则,如果 2019
在同一行中具有 NA, 2010
有值,return 16
.
我尝试过的:
# A tibble: 26 x 4
year locality_id landcover pland
<chr> <chr> <int> <dbl>
1 2010 L2228604 10 0.0645
2 2010 L2228604 13 0.935
3 2010 L452817 8 0.0968
4 2010 L452817 9 0.0323
5 2010 L452817 12 0.613
6 2010 L452817 13 0.194
7 2010 L452817 14 0.0645
8 2010 L596267 0 0.194
9 2010 L596267 9 0.0323
10 2010 L596267 11 0.0645
# ... with 16 more rows
p <- function(x){
if(x == 'NA') {
return(16)
} else {
17
}
}
#wrong outcome
# A tibble: 26 x 4
locality_id pland `2010` `2019`
<chr> <dbl> <dbl> <dbl>
1 L2228604 0.0645 17 NA
2 L2228604 0.935 17 NA
3 L452817 0.0968 17 NA
4 L452817 0.0323 17 NA
5 L452817 0.613 17 NA
6 L452817 0.194 17 NA
7 L452817 0.0645 17 NA
8 L596267 0.194 17 NA
9 L596267 0.0323 17 NA
10 L596267 0.0645 17 NA
# ... with 16 more rows
我期待的是:
# A tibble: 26 x 4
locality_id pland `2010` `2019`
<chr> <dbl> <int> <int>
1 L2228604 0.0645 10 17
2 L2228604 0.935 13 17
3 L452817 0.0968 8 17
4 L452817 0.0323 9 17
5 L9185766 0.54838710 16 8
6 L9185766 0.19354839 16 9
7 L9185766 0.03225806 16 13
8 L9185766 0.16129032 16 14
9 L9234578 1.00000000 16 12
可重现代码:
structure(list(year = c("2010", "2010", "2010", "2010", "2010",
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010",
"2010", "2010", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019"), locality_id = c("L2228604",
"L2228604", "L452817", "L452817", "L452817", "L452817", "L452817",
"L596267", "L596267", "L596267", "L596267", "L152650", "L910180",
"L910180", "L910180", "L4791597", "L4791597", "L9149985", "L9149985",
"L9149985", "L9185766", "L9185766", "L9185766", "L9185766", "L9185766",
"L9234578"), landcover = c(10L, 13L, 8L, 9L, 12L, 13L, 14L, 0L,
9L, 11L, 13L, 13L, 0L, 8L, 9L, 5L, 8L, 10L, 11L, 12L, 4L, 8L,
9L, 13L, 14L, 12L), pland = c(0.0645161290322581, 0.935483870967742,
0.0967741935483871, 0.032258064516129, 0.612903225806452, 0.193548387096774,
0.0645161290322581, 0.193548387096774, 0.032258064516129, 0.0645161290322581,
0.709677419354839, 1, 0.4375, 0.34375, 0.03125, 0.566666666666667,
0.0333333333333333, 0.1, 0.0333333333333333, 0.866666666666667,
0.0645161290322581, 0.548387096774194, 0.193548387096774, 0.032258064516129,
0.161290322580645, 1)), row.names = c(NA, -26L), class = c("tbl_df",
"tbl", "data.frame"))
有一个名为 libr 的包,它具有 datastep()
功能,可以轻松完成此操作。 datastep()
函数允许您逐行遍历数据,并根据列的值做出决策。您还可以即时创建新列。
这是针对您的特定问题的一些代码。我将您的样本数据放在一个名为 dat
:
library(libr)
final <- datastep(dat,
keep = c("locality_id", "pland", "2010", "2019"),
{
if (year == "2010") {
"2010" <- landcover
"2019" <- 17
} else if (year == "2019") {
"2010" <- 16
"2019" <- landcover
}
})
以上小标题 final
包含以下结果:
# locality_id pland 2010 2019
# 1 L2228604 0.06451613 10 17
# 2 L2228604 0.93548387 13 17
# 3 L452817 0.09677419 8 17
# 4 L452817 0.03225806 9 17
# 5 L452817 0.61290323 12 17
# 6 L452817 0.19354839 13 17
# 7 L452817 0.06451613 14 17
# 8 L596267 0.19354839 0 17
# 9 L596267 0.03225806 9 17
# 10 L596267 0.06451613 11 17
# 11 L596267 0.70967742 13 17
# 12 L152650 1.00000000 13 17
# 13 L910180 0.43750000 0 17
# 14 L910180 0.34375000 8 17
# 15 L910180 0.03125000 9 17
# 16 L4791597 0.56666667 16 5
# 17 L4791597 0.03333333 16 8
# 18 L9149985 0.10000000 16 10
# 19 L9149985 0.03333333 16 11
# 20 L9149985 0.86666667 16 12
# 21 L9185766 0.06451613 16 4
# 22 L9185766 0.54838710 16 8
# 23 L9185766 0.19354839 16 9
# 24 L9185766 0.03225806 16 13
# 25 L9185766 0.16129032 16 14
# 26 L9234578 1.00000000 16 12
您可以获得宽格式的数据并根据列名替换 NA
值。
library(dplyr)
library(tidyr)
df %>%
pivot_wider(names_from = year, values_from = landcover) %>%
mutate(across(`2010`:`2019`, ~if(cur_column() == '2019')
replace_na(.x, 17) else replace_na(.x, 16)))
# locality_id pland `2010` `2019`
# <chr> <dbl> <dbl> <dbl>
# 1 L2228604 0.0645 10 17
# 2 L2228604 0.935 13 17
# 3 L452817 0.0968 8 17
# 4 L452817 0.0323 9 17
# 5 L452817 0.613 12 17
# 6 L452817 0.194 13 17
# 7 L452817 0.0645 14 17
# 8 L596267 0.194 0 17
# 9 L596267 0.0323 9 17
#10 L596267 0.0645 11 17
# … with 16 more rows