我应该如何在 R 中高效地格式化 gt table 的行?
How should I format across rows of a gt table efficiently in R?
如果我想高效地格式化 gt table 的行,是否有比我在下面显示的方法更好的方法。
有些行是字符,因此不需要格式化,
有些是需要一位小数的数字,
有些是需要两位小数的数字,
有些是需要保留两位小数的百分比。无论做什么,理想情况下都应该推广到其他可能的格式。
我创建了一个数据框来创建格式化规范,但每种格式都需要在管道中使用单独的命令。
library(dplyr)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#transpose data
row_labels <- colnames(gtcars_8)
gtcars_8_t <- as.data.frame(t(as.matrix(gtcars_8)))
gtcars_8_t$row_labels <- row_labels
my_column_names <- colnames(gtcars_8_t)[1:8]
#format data
format_specs <- as.data.frame(row_labels[1:10])
format_specs$type <- c("c","c","n","c","c","n","n","n","n","p")
format_specs$decimals <- c( 0 , 0 , 0 , 0 , 0 , 1 , 2 , 2 , 1 , 2 )
format_specs
#make basic gt table
gtcars_8_t %>%
slice(1:10) %>%
gt()
#make gt table with formats hardcoded (desired output)
gtcars_8_t %>%
slice(1:10) %>%
gt() %>%
cols_move_to_start("row_labels") %>%
#format for rows where: type = n, and decimals = 1
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "n" & format_specs$decimals == 1 ),
fns = function(x) {
formatC(as.numeric(x), digits = 1, format = "f")
} ) %>%
#format for rows where: type = n, and decimals = 2
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "n" & format_specs$decimals == 2 ),
fns = function(x) {
formatC(as.numeric(x), digits = 2, format = "f")
} ) %>%
#format for rows where: type = p, and decimals = 2
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "p" & format_specs$decimals == 2 ),
fns = function(x) {
paste0(formatC(as.numeric(x), digits = 2, format = "f"),"%")
} )
虽然不完全相同,但在 gt 中应用格式似乎比人们最初预期的要复杂一些 ()。
实现此目的的通用方法是设置一个包装器,它循环遍历 format_specs
数据帧并逐行应用格式规则。对于循环部分,我使用 purrr::reduce
但一个简单的 for 循环也应该有效:
library(dplyr)
library(purrr)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#> Selecting by msrp
#transpose data
row_labels <- colnames(gtcars_8)
gtcars_8_t <- as.data.frame(t(as.matrix(gtcars_8)))
gtcars_8_t$row_labels <- row_labels
my_column_names <- colnames(gtcars_8_t)[1:8]
#format data
format_specs <- data.frame(row = row_labels[1:10]) # Name column with row labels
format_specs$type <- c("c","c","n","c","c","n","n","n","n","p")
format_specs$decimals <- c( 0 , 0 , 0 , 0 , 0 , 1 , 2 , 2 , 1 , 2 )
myfmt <- function(data, cols, row_spec) {
reduce(row_spec$row, function(x, y) {
row_spec <- filter(row_spec, row == y)
fmt(x, columns = cols,
rows = which(x[["_data"]][["row_labels"]] == y),
fns = function(x) switch(row_spec$type,
n = scales::number(as.numeric(x), accuracy = 10^(-row_spec$decimals), big.mark = ""),
p = scales::percent(as.numeric(x), scale = 1, accuracy = 10^(-row_spec$decimals))))
}, .init = data)
}
gtcars_8_t %>%
slice(1:10) %>%
gt() %>%
cols_move_to_start("row_labels") %>%
myfmt(vars(my_column_names), format_specs)
由 reprex package (v0.3.0)
于 2020 年 6 月 12 日创建
结果table:
少了几行就明白了。
library(tidyverse)
library(scales)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#> Selecting by msrp
gtcars_8 %>%
rownames_to_column() %>%
#mutate(hp_rpm = scales::number(hp_rpm)) %>% example formatting change
mutate_all(as.character) %>%
pivot_longer(-rowname) %>%
pivot_wider(names_from = rowname) %>%
gt()
如果我想高效地格式化 gt table 的行,是否有比我在下面显示的方法更好的方法。
有些行是字符,因此不需要格式化, 有些是需要一位小数的数字, 有些是需要两位小数的数字, 有些是需要保留两位小数的百分比。无论做什么,理想情况下都应该推广到其他可能的格式。
我创建了一个数据框来创建格式化规范,但每种格式都需要在管道中使用单独的命令。
library(dplyr)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#transpose data
row_labels <- colnames(gtcars_8)
gtcars_8_t <- as.data.frame(t(as.matrix(gtcars_8)))
gtcars_8_t$row_labels <- row_labels
my_column_names <- colnames(gtcars_8_t)[1:8]
#format data
format_specs <- as.data.frame(row_labels[1:10])
format_specs$type <- c("c","c","n","c","c","n","n","n","n","p")
format_specs$decimals <- c( 0 , 0 , 0 , 0 , 0 , 1 , 2 , 2 , 1 , 2 )
format_specs
#make basic gt table
gtcars_8_t %>%
slice(1:10) %>%
gt()
#make gt table with formats hardcoded (desired output)
gtcars_8_t %>%
slice(1:10) %>%
gt() %>%
cols_move_to_start("row_labels") %>%
#format for rows where: type = n, and decimals = 1
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "n" & format_specs$decimals == 1 ),
fns = function(x) {
formatC(as.numeric(x), digits = 1, format = "f")
} ) %>%
#format for rows where: type = n, and decimals = 2
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "n" & format_specs$decimals == 2 ),
fns = function(x) {
formatC(as.numeric(x), digits = 2, format = "f")
} ) %>%
#format for rows where: type = p, and decimals = 2
fmt(columns = vars(my_column_names),
rows = which(format_specs$type == "p" & format_specs$decimals == 2 ),
fns = function(x) {
paste0(formatC(as.numeric(x), digits = 2, format = "f"),"%")
} )
虽然不完全相同,但在 gt 中应用格式似乎比人们最初预期的要复杂一些 (
实现此目的的通用方法是设置一个包装器,它循环遍历 format_specs
数据帧并逐行应用格式规则。对于循环部分,我使用 purrr::reduce
但一个简单的 for 循环也应该有效:
library(dplyr)
library(purrr)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#> Selecting by msrp
#transpose data
row_labels <- colnames(gtcars_8)
gtcars_8_t <- as.data.frame(t(as.matrix(gtcars_8)))
gtcars_8_t$row_labels <- row_labels
my_column_names <- colnames(gtcars_8_t)[1:8]
#format data
format_specs <- data.frame(row = row_labels[1:10]) # Name column with row labels
format_specs$type <- c("c","c","n","c","c","n","n","n","n","p")
format_specs$decimals <- c( 0 , 0 , 0 , 0 , 0 , 1 , 2 , 2 , 1 , 2 )
myfmt <- function(data, cols, row_spec) {
reduce(row_spec$row, function(x, y) {
row_spec <- filter(row_spec, row == y)
fmt(x, columns = cols,
rows = which(x[["_data"]][["row_labels"]] == y),
fns = function(x) switch(row_spec$type,
n = scales::number(as.numeric(x), accuracy = 10^(-row_spec$decimals), big.mark = ""),
p = scales::percent(as.numeric(x), scale = 1, accuracy = 10^(-row_spec$decimals))))
}, .init = data)
}
gtcars_8_t %>%
slice(1:10) %>%
gt() %>%
cols_move_to_start("row_labels") %>%
myfmt(vars(my_column_names), format_specs)
由 reprex package (v0.3.0)
于 2020 年 6 月 12 日创建结果table:
少了几行就明白了。
library(tidyverse)
library(scales)
library(gt)
#create small dataset
gtcars_8 <-
gtcars %>%
dplyr::group_by(ctry_origin) %>%
dplyr::top_n(2) %>%
dplyr::ungroup() %>%
dplyr::filter(ctry_origin != "United Kingdom")
#> Selecting by msrp
gtcars_8 %>%
rownames_to_column() %>%
#mutate(hp_rpm = scales::number(hp_rpm)) %>% example formatting change
mutate_all(as.character) %>%
pivot_longer(-rowname) %>%
pivot_wider(names_from = rowname) %>%
gt()