R,使用字符对象标记因子列
R, Label factors columns using character objects
我有一个 data.frame 包含需要标记的列。
df <- structure(list(q1 = c("5", "6", "5", "5", "7", "5", "5", "5",
"5", "6", "5", "6", "6", "6", "7", "6", "5", "6", "5", "6", "6",
"5", "7", "5", "6", "6", "5", "6", "6", "5", "5", "5", "5", "5",
"5", "5", "4", "5", "5", "4", "4", "5", "4", "4", "5", "4", "5",
"5", "4", "5"), q2 = c("2", "2", "1", "1", "2", "1", "1", "2",
"1", "1", "1", "2", "1", "1", "2", "1", "2", "1", "2", "2", "2",
"1", "2", "2", "2", "2", "2", "1", "1", "2", "2", "2", "2", "2",
"2", "1", "2", "1", "1", "1", "1", "2", "1", "1", "1", "1", "2",
"2", "1", "2"), q3 = c("3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2")), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"), na.action = structure(c(`71` = 71L, `78` = 78L,
`96` = 96L, `250` = 250L, `393` = 393L, `488` = 488L, `644` = 644L,
`847` = 847L, `862` = 862L, `1083` = 1083L, `1120` = 1120L, `1149` = 1149L,
`1322` = 1322L, `1357` = 1357L), class = "omit"))
每列需要不同的标签。这些标签位于单独的对象中,如下所示。
Q1_Label <- c("12 y", "13 y", "14 y", "15 y", "16 y", "17 y", "18 y")
Q2_Label <- c("Female", "Male" )
Q3_Label <- c("9th", "10th", "11th", "12th", "Ung"
如何在尽可能少的代码行中使用字符对象来标记数据框列?
下面是尝试执行此操作的代码,但我无法获取 sapply 结构的名称。
在此先感谢您的帮助。
a_df <- sapply(X = df, FUN = function(x) factor(x,
levels = 1:length(table(x)),
labels = get(paste(toupper(names(x)), "_Label", sep = "")) # This line is where I get the problem
))
sapply(
X = colnames(df),
FUN = function(x) factor(
df[[x]],
levels = 1:length(get(paste0(toupper(x), "_Label"))),
labels = get(paste0(toupper(x), "_Label"))
)
)
输出
q1 q2 q3
[1,] "16 y" "Male" "11th"
[2,] "17 y" "Male" "11th"
[3,] "16 y" "Female" "11th"
[4,] "16 y" "Female" "11th"
[5,] "18 y" "Male" "11th"
[6,] "16 y" "Female" "11th"
[7,] "16 y" "Female" "11th"
[8,] "16 y" "Male" "11th"
[9,] "16 y" "Female" "11th"
[10,] "17 y" "Female" "11th"
[11,] "16 y" "Female" "11th"
[12,] "17 y" "Male" "11th"
[13,] "17 y" "Female" "11th"
[14,] "17 y" "Female" "11th"
[15,] "18 y" "Male" "11th"
[16,] "17 y" "Female" "11th"
[17,] "16 y" "Male" "11th"
[18,] "17 y" "Female" "11th"
[19,] "16 y" "Male" "11th"
[20,] "17 y" "Male" "11th"
[21,] "17 y" "Male" "11th"
[22,] "16 y" "Female" "11th"
[23,] "18 y" "Male" "11th"
[24,] "16 y" "Male" "11th"
[25,] "17 y" "Male" "11th"
[26,] "17 y" "Male" "11th"
[27,] "16 y" "Male" "11th"
[28,] "17 y" "Female" "11th"
[29,] "17 y" "Female" "11th"
[30,] "16 y" "Male" "11th"
[31,] "16 y" "Male" "11th"
[32,] "16 y" "Male" "11th"
[33,] "16 y" "Male" "11th"
[34,] "16 y" "Male" "11th"
[35,] "16 y" "Male" "11th"
[36,] "16 y" "Female" "10th"
[37,] "15 y" "Male" "10th"
[38,] "16 y" "Female" "10th"
[39,] "16 y" "Female" "10th"
[40,] "15 y" "Female" "10th"
[41,] "15 y" "Female" "10th"
[42,] "16 y" "Male" "10th"
[43,] "15 y" "Female" "10th"
[44,] "15 y" "Female" "10th"
[45,] "16 y" "Female" "10th"
[46,] "15 y" "Female" "10th"
[47,] "16 y" "Male" "10th"
[48,] "16 y" "Male" "10th"
[49,] "15 y" "Female" "10th"
[50,] "16 y" "Male" "10th"
我们可以将所有标签放在一个列表中,然后使用 factor
更改列中的值。
df[] <- Map(function(x, y) factor(x, labels = y[1:length(unique(x))]),
df,mget(ls(pattern = "Q\d+_Label")))
df
# A tibble: 50 x 3
# q1 q2 q3
# <fct> <fct> <fct>
# 1 13 y Male 10th
# 2 14 y Male 10th
# 3 13 y Female 10th
# 4 13 y Female 10th
# 5 15 y Male 10th
# 6 13 y Female 10th
# 7 13 y Female 10th
# 8 13 y Male 10th
# 9 13 y Female 10th
#10 14 y Female 10th
# … with 40 more rows
我有一个 data.frame 包含需要标记的列。
df <- structure(list(q1 = c("5", "6", "5", "5", "7", "5", "5", "5",
"5", "6", "5", "6", "6", "6", "7", "6", "5", "6", "5", "6", "6",
"5", "7", "5", "6", "6", "5", "6", "6", "5", "5", "5", "5", "5",
"5", "5", "4", "5", "5", "4", "4", "5", "4", "4", "5", "4", "5",
"5", "4", "5"), q2 = c("2", "2", "1", "1", "2", "1", "1", "2",
"1", "1", "1", "2", "1", "1", "2", "1", "2", "1", "2", "2", "2",
"1", "2", "2", "2", "2", "2", "1", "1", "2", "2", "2", "2", "2",
"2", "1", "2", "1", "1", "1", "1", "2", "1", "1", "1", "1", "2",
"2", "1", "2"), q3 = c("3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2")), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"), na.action = structure(c(`71` = 71L, `78` = 78L,
`96` = 96L, `250` = 250L, `393` = 393L, `488` = 488L, `644` = 644L,
`847` = 847L, `862` = 862L, `1083` = 1083L, `1120` = 1120L, `1149` = 1149L,
`1322` = 1322L, `1357` = 1357L), class = "omit"))
每列需要不同的标签。这些标签位于单独的对象中,如下所示。
Q1_Label <- c("12 y", "13 y", "14 y", "15 y", "16 y", "17 y", "18 y")
Q2_Label <- c("Female", "Male" )
Q3_Label <- c("9th", "10th", "11th", "12th", "Ung"
如何在尽可能少的代码行中使用字符对象来标记数据框列?
下面是尝试执行此操作的代码,但我无法获取 sapply 结构的名称。
在此先感谢您的帮助。
a_df <- sapply(X = df, FUN = function(x) factor(x,
levels = 1:length(table(x)),
labels = get(paste(toupper(names(x)), "_Label", sep = "")) # This line is where I get the problem
))
sapply(
X = colnames(df),
FUN = function(x) factor(
df[[x]],
levels = 1:length(get(paste0(toupper(x), "_Label"))),
labels = get(paste0(toupper(x), "_Label"))
)
)
输出
q1 q2 q3
[1,] "16 y" "Male" "11th"
[2,] "17 y" "Male" "11th"
[3,] "16 y" "Female" "11th"
[4,] "16 y" "Female" "11th"
[5,] "18 y" "Male" "11th"
[6,] "16 y" "Female" "11th"
[7,] "16 y" "Female" "11th"
[8,] "16 y" "Male" "11th"
[9,] "16 y" "Female" "11th"
[10,] "17 y" "Female" "11th"
[11,] "16 y" "Female" "11th"
[12,] "17 y" "Male" "11th"
[13,] "17 y" "Female" "11th"
[14,] "17 y" "Female" "11th"
[15,] "18 y" "Male" "11th"
[16,] "17 y" "Female" "11th"
[17,] "16 y" "Male" "11th"
[18,] "17 y" "Female" "11th"
[19,] "16 y" "Male" "11th"
[20,] "17 y" "Male" "11th"
[21,] "17 y" "Male" "11th"
[22,] "16 y" "Female" "11th"
[23,] "18 y" "Male" "11th"
[24,] "16 y" "Male" "11th"
[25,] "17 y" "Male" "11th"
[26,] "17 y" "Male" "11th"
[27,] "16 y" "Male" "11th"
[28,] "17 y" "Female" "11th"
[29,] "17 y" "Female" "11th"
[30,] "16 y" "Male" "11th"
[31,] "16 y" "Male" "11th"
[32,] "16 y" "Male" "11th"
[33,] "16 y" "Male" "11th"
[34,] "16 y" "Male" "11th"
[35,] "16 y" "Male" "11th"
[36,] "16 y" "Female" "10th"
[37,] "15 y" "Male" "10th"
[38,] "16 y" "Female" "10th"
[39,] "16 y" "Female" "10th"
[40,] "15 y" "Female" "10th"
[41,] "15 y" "Female" "10th"
[42,] "16 y" "Male" "10th"
[43,] "15 y" "Female" "10th"
[44,] "15 y" "Female" "10th"
[45,] "16 y" "Female" "10th"
[46,] "15 y" "Female" "10th"
[47,] "16 y" "Male" "10th"
[48,] "16 y" "Male" "10th"
[49,] "15 y" "Female" "10th"
[50,] "16 y" "Male" "10th"
我们可以将所有标签放在一个列表中,然后使用 factor
更改列中的值。
df[] <- Map(function(x, y) factor(x, labels = y[1:length(unique(x))]),
df,mget(ls(pattern = "Q\d+_Label")))
df
# A tibble: 50 x 3
# q1 q2 q3
# <fct> <fct> <fct>
# 1 13 y Male 10th
# 2 14 y Male 10th
# 3 13 y Female 10th
# 4 13 y Female 10th
# 5 15 y Male 10th
# 6 13 y Female 10th
# 7 13 y Female 10th
# 8 13 y Male 10th
# 9 13 y Female 10th
#10 14 y Female 10th
# … with 40 more rows