排序数据集并在创建列表时保持顺序
Ordering dataset and maintain order when creating list
我试图在创建列表时保持数据集的特定顺序。
我想做的:按列对数据集进行排序,并在创建列表时保持此顺序。应该很简单,但我发现的所有解决方案都无法正常工作。
数据(对于这个结构的道歉会在 reprex 上有所提升)
data <- structure(list(Fac_Map = structure(c(1L, 1L, 1L, 1L, 3L, 3L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 2L,
2L, 2L, 2L), .Label = c("Fac_1", "Fac_10", "Fac_2", "Fac_3_ProblemOrder",
"Fac_4", "Fac_5", "Fac_6", "Fac_7", "Fac_8_ProblemOrder", "Fac_9"
), class = "factor"), Calendar = structure(c(2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("Non-std", "Std"), class = "factor"),
S_Residency = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L), .Label = c("Int", "Loc"), class = "factor"), Period = structure(c(1L,
2L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2020 P1",
"2020 P2", "2020 S1"), class = "factor"), Sum_A = c(92.2,
91.7, 90.2, 88.6, 96.4, 91.4, 96.4, 91.4, 87.3, 95.3, 82.5,
89.1, 89, 90.1, 87.4, 88.9, 85.1, 89.6, 89.7, 88.1, 87.1,
91.1, 88.2, 87.9, 90.8, 97.9, 91, 88.8, 86.4, 89.5, 86.3,
86.4, 86.7, 90.8, 84.6, 86.8, 82.1, 86.8, 78.4, 80.7), Sum_S = c(75.9,
75.6, 75, 73.5, 78.6, 70.5, 78.6, 70.5, 69, 86, 65.9, 72.6,
69, 69.3, 71.5, 73.8, 65.4, 70.6, 70.6, 70.3, 68, 73, 68.4,
69.1, 69.7, 80.9, 70.7, 68, 66.3, 69.9, 67.6, 68, 65.5, 68.6,
63, 64.6, 60.4, 68.6, 59.1, 63.5), Order = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L)), class = "data.frame", row.names = c(NA,
-40L))
尝试部分工作的代码:
#Reorder attempts
#1
arrange(data$Fac_Map, data,data$Order)
#2
data$Order <- as.numeric(as.character(data$Order))
data[order(data$Fac_Map,data$Order),]
#3
reorder(data$Fac_Map, data$Order)
#Create list
#List is not in order
Fac <- split(data, list(data$Fac_Map, data$Calendar))
Fac
当我创建列表时,订单不会正确显示。有什么想法吗?
我们可以将列更改为 factor
,并在 arrange
步骤后将 levels
指定为 unique
值。然后,我们执行 split
或 group_split
。注意到 'Fac_Map' 有字符串和数字。因此,通过 match
ing 和 'Fac_Map'
的 mixedsort
ed levels
来执行 arrange
可能很有用
library(dplyr)
outlst <- data %>%
arrange(match(Fac_Map, gtools::mixedsort(levels(Fac_Map))), Order) %>%
mutate(across(c(Fac_Map, Order, Calendar),
~ factor(., levels = unique(.)))) %>%
group_split(Fac_Map, Calendar)
list
中'Fac_Map'的值顺序为
library(purrr)
map_chr(outlst, ~ .x %>%
select(Fac_Map) %>%
slice(1) %>%
pull %>%
as.character())
#[1] "Fac_1" "Fac_2" "Fac_3_ProblemOrder" "Fac_3_ProblemOrder" "Fac_4"
#[6] "Fac_5" "Fac_6" "Fac_7" "Fac_8_ProblemOrder" "Fac_9"
#[11] "Fac_10"
或在 order
之后使用 base R
'Order',然后将要拆分的列转换为 factor
,levels
为 unique
值
data <- data[order(data$Order),]
data[c("Fac_Map", "Calendar")] <- lapply(data[c("Fac_Map", "Calendar")],
function(x) factor(x, levels = unique(x)))
split(data, data[c("Fac_Map", "Calendar")], drop = TRUE)
我试图在创建列表时保持数据集的特定顺序。
我想做的:按列对数据集进行排序,并在创建列表时保持此顺序。应该很简单,但我发现的所有解决方案都无法正常工作。
数据(对于这个结构的道歉会在 reprex 上有所提升)
data <- structure(list(Fac_Map = structure(c(1L, 1L, 1L, 1L, 3L, 3L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 2L,
2L, 2L, 2L), .Label = c("Fac_1", "Fac_10", "Fac_2", "Fac_3_ProblemOrder",
"Fac_4", "Fac_5", "Fac_6", "Fac_7", "Fac_8_ProblemOrder", "Fac_9"
), class = "factor"), Calendar = structure(c(2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("Non-std", "Std"), class = "factor"),
S_Residency = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L), .Label = c("Int", "Loc"), class = "factor"), Period = structure(c(1L,
2L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2020 P1",
"2020 P2", "2020 S1"), class = "factor"), Sum_A = c(92.2,
91.7, 90.2, 88.6, 96.4, 91.4, 96.4, 91.4, 87.3, 95.3, 82.5,
89.1, 89, 90.1, 87.4, 88.9, 85.1, 89.6, 89.7, 88.1, 87.1,
91.1, 88.2, 87.9, 90.8, 97.9, 91, 88.8, 86.4, 89.5, 86.3,
86.4, 86.7, 90.8, 84.6, 86.8, 82.1, 86.8, 78.4, 80.7), Sum_S = c(75.9,
75.6, 75, 73.5, 78.6, 70.5, 78.6, 70.5, 69, 86, 65.9, 72.6,
69, 69.3, 71.5, 73.8, 65.4, 70.6, 70.6, 70.3, 68, 73, 68.4,
69.1, 69.7, 80.9, 70.7, 68, 66.3, 69.9, 67.6, 68, 65.5, 68.6,
63, 64.6, 60.4, 68.6, 59.1, 63.5), Order = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L)), class = "data.frame", row.names = c(NA,
-40L))
尝试部分工作的代码:
#Reorder attempts
#1
arrange(data$Fac_Map, data,data$Order)
#2
data$Order <- as.numeric(as.character(data$Order))
data[order(data$Fac_Map,data$Order),]
#3
reorder(data$Fac_Map, data$Order)
#Create list
#List is not in order
Fac <- split(data, list(data$Fac_Map, data$Calendar))
Fac
当我创建列表时,订单不会正确显示。有什么想法吗?
我们可以将列更改为 factor
,并在 arrange
步骤后将 levels
指定为 unique
值。然后,我们执行 split
或 group_split
。注意到 'Fac_Map' 有字符串和数字。因此,通过 match
ing 和 'Fac_Map'
mixedsort
ed levels
来执行 arrange
可能很有用
library(dplyr)
outlst <- data %>%
arrange(match(Fac_Map, gtools::mixedsort(levels(Fac_Map))), Order) %>%
mutate(across(c(Fac_Map, Order, Calendar),
~ factor(., levels = unique(.)))) %>%
group_split(Fac_Map, Calendar)
list
中'Fac_Map'的值顺序为
library(purrr)
map_chr(outlst, ~ .x %>%
select(Fac_Map) %>%
slice(1) %>%
pull %>%
as.character())
#[1] "Fac_1" "Fac_2" "Fac_3_ProblemOrder" "Fac_3_ProblemOrder" "Fac_4"
#[6] "Fac_5" "Fac_6" "Fac_7" "Fac_8_ProblemOrder" "Fac_9"
#[11] "Fac_10"
或在 order
之后使用 base R
'Order',然后将要拆分的列转换为 factor
,levels
为 unique
值
data <- data[order(data$Order),]
data[c("Fac_Map", "Calendar")] <- lapply(data[c("Fac_Map", "Calendar")],
function(x) factor(x, levels = unique(x)))
split(data, data[c("Fac_Map", "Calendar")], drop = TRUE)