R编程,r bind打乱了输入顺序
R programming, r bind messes up order of input
我有 R 函数
- 接受目录名称(此目录包含编号的 .csv 文件)
- 接受代表文件的一组特定数字
- 将上述文件组合成一个数据框
- 打印出特定 ID 的出现次数
函数
library(plyr)
complete <- function(directory, id=1:5){
files_full <- list.files(directory, full.names=TRUE)
working_set <- files_full [id]
seethis <- lapply(working_set, read.csv)
output <- do.call(rbind, seethis)
no_na <- na.omit(output)
new_df <- as.data.frame(no_na)
new_output <- count(new_df,"ID")
colnames(new_output) <- c("id", "Occurrences")
new_output
}
问题
如果我用
调用函数
complete("diet_data",c(4,3,2))
我得到这个输出
id Occurrences
1 2 30
2 3 17
3 4 30
但我希望保留 id 顺序,即 4、3、2,以便输出为
id Occurrences
1 4 30
2 3 17
3 2 30
如何做到这一点?并且无法判断 id 是否为 1:5、5:3 等
这个函数的示例数据框看起来像这样
structure(list(ID = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), Age = c(40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L,
35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L,
35L, 35L, 35L, 35L), Weight = c(188L, 188L, 188L, 188L, 189L,
189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 192L, 192L, 192L, 192L,
192L, 192L, 192L, 175L, 175L, 175L, 175L, 175L, 175L, 175L, 175L,
175L, 175L, 175L, 175L, 175L, 175L, 175L, 175L, 177L, 210L, 209L,
209L, 209L, 209L, 209L, 209L, 208L, 208L, 208L, 208L, 208L, 208L,
207L, 206L, 206L, 206L, 205L, 205L, 205L, 205L, 204L, 204L, 204L,
203L, 203L, 202L, 202L, 202L, 201L), Day = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L)), .Names = c("ID", "Age", "Weight",
"Day"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 90L), class = "data.frame", na.action = structure(47:59, .Names = c("47",
"48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58",
"59"), class = "omit"))
您可以尝试在函数末尾添加这一行
sorted_output <- new_output[match(id, new_output$ID),]
我有 R 函数
- 接受目录名称(此目录包含编号的 .csv 文件)
- 接受代表文件的一组特定数字
- 将上述文件组合成一个数据框
- 打印出特定 ID 的出现次数
函数
library(plyr)
complete <- function(directory, id=1:5){
files_full <- list.files(directory, full.names=TRUE)
working_set <- files_full [id]
seethis <- lapply(working_set, read.csv)
output <- do.call(rbind, seethis)
no_na <- na.omit(output)
new_df <- as.data.frame(no_na)
new_output <- count(new_df,"ID")
colnames(new_output) <- c("id", "Occurrences")
new_output
}
问题
如果我用
调用函数complete("diet_data",c(4,3,2))
我得到这个输出
id Occurrences
1 2 30
2 3 17
3 4 30
但我希望保留 id 顺序,即 4、3、2,以便输出为
id Occurrences
1 4 30
2 3 17
3 2 30
如何做到这一点?并且无法判断 id 是否为 1:5、5:3 等
这个函数的示例数据框看起来像这样
structure(list(ID = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), Age = c(40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L,
35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L,
35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 35L,
35L, 35L, 35L, 35L), Weight = c(188L, 188L, 188L, 188L, 189L,
189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 190L, 190L,
190L, 190L, 190L, 190L, 190L, 190L, 190L, 192L, 192L, 192L, 192L,
192L, 192L, 192L, 175L, 175L, 175L, 175L, 175L, 175L, 175L, 175L,
175L, 175L, 175L, 175L, 175L, 175L, 175L, 175L, 177L, 210L, 209L,
209L, 209L, 209L, 209L, 209L, 208L, 208L, 208L, 208L, 208L, 208L,
207L, 206L, 206L, 206L, 205L, 205L, 205L, 205L, 204L, 204L, 204L,
203L, 203L, 202L, 202L, 202L, 201L), Day = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L)), .Names = c("ID", "Age", "Weight",
"Day"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 90L), class = "data.frame", na.action = structure(47:59, .Names = c("47",
"48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58",
"59"), class = "omit"))
您可以尝试在函数末尾添加这一行
sorted_output <- new_output[match(id, new_output$ID),]