为什么我的引导函数会生成 NA 值?
Why does my lead-function generate NA values?
我不得不构建一个参考 table 来跟踪我们的学生在给定当前日期应该获得多少学分。我在每个入学轮次和课程中有一排。
我想编写一个 finished-变量,每个录取轮的最后一门课程的值为 1,其他所有值的值为 0(这样我就可以处理那些应该已经完成了他们的程序)。
我写
ekon_program<-ekon_program%>%mutate(finished=ifelse(lead(kull)=kull,0,1))
其中 kull 是我的 admissionround 变量,它将在当前 admissionround 的最后一门课程之后的行中改变 +1。奇怪的是,每个录取轮次的最后一门课程现在编码为 "NA",但所有其他值都编码为 0。
我可以通过将所有 NA 值转换为 1 来轻松纠正此问题,但为什么首先会发生这种情况?
数据摘录:
ekon_program <- structure(list(sd = structure(c(17042, 17042, 17042, 17042, 17042,
17042, 17042, 17042, 17042, 17042, 17042, 17042, 17042, 17042,
17406, 17406, 17406, 17406, 17406, 17406), class = "Date"), points_ekon = c(15,
15, 15, 15, 7.5, 7.5, 15, 7.5, 7.5, 15, 15, 15, 30, 0, 15, 15,
15, 15, 7.5, 7.5), summer_break_ekon = c(0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1), weeks_course = c(10,
10, 10, 10, 5, 5, 10, 5, 5, 10, 10, 10, 20, 0, 10, 10, 10, 10,
5, 5), points_expected = c(0, 15, 30, 45, 60, 67.5, 75, 90, 97.5,
105, 120, 135, 150, 180, 0, 15, 30, 45, 60, 67.5), order = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L,
2L, 3L, 4L, 5L, 6L), starttermin = c(1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), kull = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2), start_date = structure(c(17041,
17041, 17041, 17041, 17041, 17041, 17041, 17041, 17041, 17041,
17041, 17041, 17041, 17041, 17405, 17405, 17405, 17405, 17405,
17405), class = "Date"), start_date_points = structure(c(17041,
17132, 17202, 17272, 17342, 17461, 17496, 17566, 17601, 17636,
17706, 17860, 17930, 18070, 17405, 17496, 17566, 17636, 17706,
17825), class = "Date"), end_date_points = structure(c(17131,
17201, 17271, 17341, 17460, 17495, 17565, 17600, 17635, 17705,
17859, 17929, 18069, 18069, 17495, 17565, 17635, 17705, 17824,
17859), class = "Date"), finished_date = structure(c(18070, 18070,
18070, 18070, 18070, 18070, 18070, 18070, 18070, 18070, 18070,
18070, 18070, 18070, 18434, 18434, 18434, 18434, 18434, 18434
), class = "Date")), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -20L), groups = structure(list(
start_date = structure(c(17041, 17405), class = "Date"),
.rows = list(1:14, 15:20)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
一个问题是 =
不是 ==
,其次,lead
by default
在末尾创建一个 NA
,如果我们需要的话改变,改变default
。另外,我们不需要ifelse
来强制,可以用as.integer
来完成
library(dplyr)
ekon_program %>%
mutate(finished = as.integer(lead(kull, default = last(kull)) != kull))
我不得不构建一个参考 table 来跟踪我们的学生在给定当前日期应该获得多少学分。我在每个入学轮次和课程中有一排。
我想编写一个 finished-变量,每个录取轮的最后一门课程的值为 1,其他所有值的值为 0(这样我就可以处理那些应该已经完成了他们的程序)。
我写
ekon_program<-ekon_program%>%mutate(finished=ifelse(lead(kull)=kull,0,1))
其中 kull 是我的 admissionround 变量,它将在当前 admissionround 的最后一门课程之后的行中改变 +1。奇怪的是,每个录取轮次的最后一门课程现在编码为 "NA",但所有其他值都编码为 0。
我可以通过将所有 NA 值转换为 1 来轻松纠正此问题,但为什么首先会发生这种情况?
数据摘录:
ekon_program <- structure(list(sd = structure(c(17042, 17042, 17042, 17042, 17042,
17042, 17042, 17042, 17042, 17042, 17042, 17042, 17042, 17042,
17406, 17406, 17406, 17406, 17406, 17406), class = "Date"), points_ekon = c(15,
15, 15, 15, 7.5, 7.5, 15, 7.5, 7.5, 15, 15, 15, 30, 0, 15, 15,
15, 15, 7.5, 7.5), summer_break_ekon = c(0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1), weeks_course = c(10,
10, 10, 10, 5, 5, 10, 5, 5, 10, 10, 10, 20, 0, 10, 10, 10, 10,
5, 5), points_expected = c(0, 15, 30, 45, 60, 67.5, 75, 90, 97.5,
105, 120, 135, 150, 180, 0, 15, 30, 45, 60, 67.5), order = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L,
2L, 3L, 4L, 5L, 6L), starttermin = c(1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), kull = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2), start_date = structure(c(17041,
17041, 17041, 17041, 17041, 17041, 17041, 17041, 17041, 17041,
17041, 17041, 17041, 17041, 17405, 17405, 17405, 17405, 17405,
17405), class = "Date"), start_date_points = structure(c(17041,
17132, 17202, 17272, 17342, 17461, 17496, 17566, 17601, 17636,
17706, 17860, 17930, 18070, 17405, 17496, 17566, 17636, 17706,
17825), class = "Date"), end_date_points = structure(c(17131,
17201, 17271, 17341, 17460, 17495, 17565, 17600, 17635, 17705,
17859, 17929, 18069, 18069, 17495, 17565, 17635, 17705, 17824,
17859), class = "Date"), finished_date = structure(c(18070, 18070,
18070, 18070, 18070, 18070, 18070, 18070, 18070, 18070, 18070,
18070, 18070, 18070, 18434, 18434, 18434, 18434, 18434, 18434
), class = "Date")), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -20L), groups = structure(list(
start_date = structure(c(17041, 17405), class = "Date"),
.rows = list(1:14, 15:20)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
一个问题是 =
不是 ==
,其次,lead
by default
在末尾创建一个 NA
,如果我们需要的话改变,改变default
。另外,我们不需要ifelse
来强制,可以用as.integer
library(dplyr)
ekon_program %>%
mutate(finished = as.integer(lead(kull, default = last(kull)) != kull))