对于整个数据集的循环
For loops on whole data sets
我正在尝试在 R 中的数据帧的 for 循环中执行 tidyverse
操作。我希望每个数据帧都经过循环并 return “干净”的数据帧。我知道我错过了一些简单的东西。这是我的代码:
AY14<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY15<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY16<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
Dat_list <- list(AY14,AY15,AY16)
for(i in Dat_list){
Dat_list[i]<- i %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_student_ssn,txt_college_name,txt_registration_status_code)
}
试试这个。最好使用数字索引在循环中移动。同样在列表的情况下,使用 [[]]
进行索引是一种实用的方法。这里的代码:
library(dplyr)
#Data
AY14<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY15<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY16<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
#List
Dat_list <- list(AY14,AY15,AY16)
#Loop
for(i in 1:length(Dat_list)){
Dat_list[[i]]<- Dat_list[[i]] %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_college_name,txt_registration_status_code)
}
这是一个使用 lapply
的版本。
Dat_list <- lapply(Dat_list, function(x) {
x %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_college_name,txt_registration_status_code)
}
)
我正在尝试在 R 中的数据帧的 for 循环中执行 tidyverse
操作。我希望每个数据帧都经过循环并 return “干净”的数据帧。我知道我错过了一些简单的东西。这是我的代码:
AY14<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY15<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY16<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
Dat_list <- list(AY14,AY15,AY16)
for(i in Dat_list){
Dat_list[i]<- i %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_student_ssn,txt_college_name,txt_registration_status_code)
}
试试这个。最好使用数字索引在循环中移动。同样在列表的情况下,使用 [[]]
进行索引是一种实用的方法。这里的代码:
library(dplyr)
#Data
AY14<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY15<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
AY16<- data.frame(X1=seq(1,30,by=1), txt_student_id=sample(seq(300,329,by=1),size=30,replace=T), txt_college_name= sample(c("A","B","C","D"),size=30,replace=T,prob=c(.5,.3,.15,.05)), txt_registration_status_code=sample(c("First-time, Freshman","First-time, Transfer","Continuing"),size=30,replace=T, prob=c(.4,.1,.5)))
#List
Dat_list <- list(AY14,AY15,AY16)
#Loop
for(i in 1:length(Dat_list)){
Dat_list[[i]]<- Dat_list[[i]] %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_college_name,txt_registration_status_code)
}
这是一个使用 lapply
的版本。
Dat_list <- lapply(Dat_list, function(x) {
x %>%
distinct(txt_student_id, txt_college_name,.keep_all = T) %>%
filter(txt_registration_status_code %in% list("First-time, Freshman","First-time, Transfer")) %>%
select(txt_college_name,txt_registration_status_code)
}
)