如何重新格式化数据框以计算 R 中的基本值(平均值、SD、SE 等)?
How to reformat dataframe in order to calculate basic values (mean, SD, SE, etc.) in R?
我有一个名为 'my_data' 的数据框,看起来像这样:
Injury BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB Acute.BBB Chronic.BBB
1 Moderate 0.35 1.10 0.60 1.84 0.150 21 11 18
2 Severe 0.42 1.47 0.86 3.04 0.420 21 3 14
3 Moderate 0.45 1.02 0.65 1.80 0.150 21 11 18
4 Moderate 0.42 0.97 0.70 1.76 0.000 21 8 17
5 Severe 0.40 1.55 0.80 3.12 0.370 21 4 11
6 Moderate 0.37 0.96 0.65 1.65 0.240 21 9 14
7 Severe 0.32 1.64 0.75 3.34 0.400 21 2 12
8 Severe 0.40 1.44 0.90 3.24 0.298 21 1 9
9 Moderate 0.47 1.01 0.82 1.77 0.140 21 7 17
10 Moderate 0.41 0.90 0.67 1.51 0.190 21 9 15
11 Moderate 0.38 0.86 0.57 1.78 0.100 21 10 17
12 Severe 0.39 1.59 0.70 3.27 0.360 21 2 12
我想将伤害因素(Mod率和严重)格式化为列,每列都有其相关的测量值(时间延迟、面积不足等)。还有三个时间点(BL、急性、慢性),我也想将其纳入我的最终数据框。
我 运行 此代码用于基本统计数据,但未按伤害组分组(Mod评级与严重)
updated_df <- sapply(my_data[, c(2:9)], function(my_data) c("Mean"= mean(my_data,na.rm=TRUE),
"Stand dev" = sd(my_data),
"Stand Err" = sd(my_data)/sqrt(length(my_data)),
"Median" = median(my_data),
"CoeffofVariation" = sd(my_data)/mean(my_data,na.rm=TRUE),
"Minimum" = min(my_data),
"Maximun" = max(my_data),
"Upper Quantile" = quantile(my_data,.75),
"LowerQuartile" = quantile(my_data,.25),
"n" = length(my_data)
)
)
如何 运行 上面显示的统计数据并按伤害类型(Mod vs Sev)分组并考虑时间点?
在当前设置下,一个选项是 split
通过 'Injury Type',用 lapply
循环 list
并应用代码
lst1 <- split(my_data, my_data$Injury, drop = TRUE)
lst2 <- lapply(lst1, function(dat)
sapply(dat[, 2:9], function(my_data1) c("Mean"= mean(my_data1,na.rm=TRUE),
"Stand dev" = sd(my_data1),
"Stand Err" = sd(my_data1)/sqrt(length(my_data1)),
"Median" = median(my_data1),
"CoeffofVariation" = sd(my_data1)/mean(my_data1,na.rm=TRUE),
"Minimum" = min(my_data1),
"Maximun" = max(my_data1),
"Upper Quantile" = quantile(my_data1,.75),
"LowerQuartile" = quantile(my_data1,.25),
"n" = length(my_data1))))
-输出
lst2
#$Moderate
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.40714286 0.97428571 0.66571429 1.73000000 0.13857143 21
# Stand dev 0.04347961 0.07955232 0.08059304 0.11313708 0.07515064 0
# Stand Err 0.01643375 0.03006795 0.03046131 0.04276180 0.02840427 0
# Median 0.41000000 0.97000000 0.65000000 1.77000000 0.15000000 21
# CoeffofVariation 0.10679202 0.08165194 0.12106251 0.06539716 0.54232422 0
# Minimum 0.35000000 0.86000000 0.57000000 1.51000000 0.00000000 21
# Maximun 0.47000000 1.10000000 0.82000000 1.84000000 0.24000000 21
# Upper Quantile.75% 0.43500000 1.01500000 0.68500000 1.79000000 0.17000000 21
# LowerQuartile.25% 0.37500000 0.93000000 0.62500000 1.70500000 0.12000000 21
# n 7.00000000 7.00000000 7.00000000 7.00000000 7.00000000 7
# Acute.BBB Chronic.BBB
# Mean 9.2857143 16.5714286
# Stand dev 1.4960265 1.5118579
# Stand Err 0.5654449 0.5714286
# Median 9.0000000 17.0000000
# CoeffofVariation 0.1611105 0.0912328
# Minimum 7.0000000 14.0000000
# Maximun 11.0000000 18.0000000
# Upper Quantile.75% 10.5000000 17.5000000
# LowerQuartile.25% 8.5000000 16.0000000
# n 7.0000000 7.0000000
# $Severe
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.38600000 1.53800000 0.80200000 3.20200000 0.36960000 21
# Stand dev 0.03847077 0.08288546 0.08074652 0.12049896 0.04659184 0
# Stand Err 0.01720465 0.03706751 0.03611094 0.05388877 0.02083651 0
# Median 0.40000000 1.55000000 0.80000000 3.24000000 0.37000000 21
# CoeffofVariation 0.09966520 0.05389172 0.10068144 0.03763241 0.12606019 0
# Minimum 0.32000000 1.44000000 0.70000000 3.04000000 0.29800000 21
# Maximun 0.42000000 1.64000000 0.90000000 3.34000000 0.42000000 21
# Upper Quantile.75% 0.40000000 1.59000000 0.86000000 3.27000000 0.40000000 21
# LowerQuartile.25% 0.39000000 1.47000000 0.75000000 3.12000000 0.36000000 21
# n 5.00000000 5.00000000 5.00000000 5.00000000 5.00000000 5
# Acute.BBB Chronic.BBB
# Mean 2.4000000 11.6000000
# Stand dev 1.1401754 1.8165902
# Stand Err 0.5099020 0.8124038
# Median 2.0000000 12.0000000
# CoeffofVariation 0.4750731 0.1566026
# Minimum 1.0000000 9.0000000
# Maximun 4.0000000 14.0000000
# Upper Quantile.75% 3.0000000 12.0000000
# LowerQuartile.25% 2.0000000 11.0000000
# n 5.0000000 5.0000000
我们可以通过 rbind
ing
转换为单个数据集
out <- do.call(rbind, unname(Map(function(x, y)
`row.names<-`(transform(data.frame(Injury = x, y,
stringsAsFactors = FALSE), rn = row.names(y)), NULL), names(lst2), lst2)))
数据
my_data <- structure(list(Injury = c("Moderate", "Severe", "Moderate", "Moderate",
"Severe", "Moderate", "Severe", "Severe", "Moderate", "Moderate",
"Moderate", "Severe"), BL.Time.Delay = c(0.35, 0.42, 0.45, 0.42,
0.4, 0.37, 0.32, 0.4, 0.47, 0.41, 0.38, 0.39), Acute.Time.Delay = c(1.1,
1.47, 1.02, 0.97, 1.55, 0.96, 1.64, 1.44, 1.01, 0.9, 0.86, 1.59
), Chronic.Time.Delay = c(0.6, 0.86, 0.65, 0.7, 0.8, 0.65, 0.75,
0.9, 0.82, 0.67, 0.57, 0.7), Acute.Area.Def = c(1.84, 3.04, 1.8,
1.76, 3.12, 1.65, 3.34, 3.24, 1.77, 1.51, 1.78, 3.27), Chronic.Area.Def = c(0.15,
0.42, 0.15, 0, 0.37, 0.24, 0.4, 0.298, 0.14, 0.19, 0.1, 0.36),
BL.BBB = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L), Acute.BBB = c(11L, 3L, 11L, 8L, 4L, 9L, 2L, 1L,
7L, 9L, 10L, 2L), Chronic.BBB = c(18L, 14L, 18L, 17L, 11L,
14L, 12L, 9L, 17L, 15L, 17L, 12L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
这样的事情应该可行,pivot_wider
和 pivot_longer
足够灵活,可以帮助您使用更多分组变量来做到这一点
df %>%
mutate(id = seq_along(Injury)) %>%
pivot_longer(BL.Time.Delay:Chronic.BBB, names_to = "var", values_to = "value") %>%
pivot_wider(id_cols = c(id, var), names_from = Injury, values_from = value) %>%
group_by(var) %>%
summarise(Moderate = mean(Moderate, na.rm = T),
Severe = mean(Severe, na.rm = T))
或者,这将为您提供更长格式的输出
df %>%
mutate(id = seq_along(Injury)) %>%
pivot_longer(BL.Time.Delay:Chronic.BBB, names_to = "var", values_to = "value") %>%
# pivot_wider(id_cols = c(id, var), names_from = Injury, values_from = value) %>%
group_by(var, Injury) %>%
summarise(mean = mean(value, na.rm = T))
我有一个名为 'my_data' 的数据框,看起来像这样:
Injury BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB Acute.BBB Chronic.BBB
1 Moderate 0.35 1.10 0.60 1.84 0.150 21 11 18
2 Severe 0.42 1.47 0.86 3.04 0.420 21 3 14
3 Moderate 0.45 1.02 0.65 1.80 0.150 21 11 18
4 Moderate 0.42 0.97 0.70 1.76 0.000 21 8 17
5 Severe 0.40 1.55 0.80 3.12 0.370 21 4 11
6 Moderate 0.37 0.96 0.65 1.65 0.240 21 9 14
7 Severe 0.32 1.64 0.75 3.34 0.400 21 2 12
8 Severe 0.40 1.44 0.90 3.24 0.298 21 1 9
9 Moderate 0.47 1.01 0.82 1.77 0.140 21 7 17
10 Moderate 0.41 0.90 0.67 1.51 0.190 21 9 15
11 Moderate 0.38 0.86 0.57 1.78 0.100 21 10 17
12 Severe 0.39 1.59 0.70 3.27 0.360 21 2 12
我想将伤害因素(Mod率和严重)格式化为列,每列都有其相关的测量值(时间延迟、面积不足等)。还有三个时间点(BL、急性、慢性),我也想将其纳入我的最终数据框。
我 运行 此代码用于基本统计数据,但未按伤害组分组(Mod评级与严重)
updated_df <- sapply(my_data[, c(2:9)], function(my_data) c("Mean"= mean(my_data,na.rm=TRUE),
"Stand dev" = sd(my_data),
"Stand Err" = sd(my_data)/sqrt(length(my_data)),
"Median" = median(my_data),
"CoeffofVariation" = sd(my_data)/mean(my_data,na.rm=TRUE),
"Minimum" = min(my_data),
"Maximun" = max(my_data),
"Upper Quantile" = quantile(my_data,.75),
"LowerQuartile" = quantile(my_data,.25),
"n" = length(my_data)
)
)
如何 运行 上面显示的统计数据并按伤害类型(Mod vs Sev)分组并考虑时间点?
在当前设置下,一个选项是 split
通过 'Injury Type',用 lapply
循环 list
并应用代码
lst1 <- split(my_data, my_data$Injury, drop = TRUE)
lst2 <- lapply(lst1, function(dat)
sapply(dat[, 2:9], function(my_data1) c("Mean"= mean(my_data1,na.rm=TRUE),
"Stand dev" = sd(my_data1),
"Stand Err" = sd(my_data1)/sqrt(length(my_data1)),
"Median" = median(my_data1),
"CoeffofVariation" = sd(my_data1)/mean(my_data1,na.rm=TRUE),
"Minimum" = min(my_data1),
"Maximun" = max(my_data1),
"Upper Quantile" = quantile(my_data1,.75),
"LowerQuartile" = quantile(my_data1,.25),
"n" = length(my_data1))))
-输出
lst2
#$Moderate
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.40714286 0.97428571 0.66571429 1.73000000 0.13857143 21
# Stand dev 0.04347961 0.07955232 0.08059304 0.11313708 0.07515064 0
# Stand Err 0.01643375 0.03006795 0.03046131 0.04276180 0.02840427 0
# Median 0.41000000 0.97000000 0.65000000 1.77000000 0.15000000 21
# CoeffofVariation 0.10679202 0.08165194 0.12106251 0.06539716 0.54232422 0
# Minimum 0.35000000 0.86000000 0.57000000 1.51000000 0.00000000 21
# Maximun 0.47000000 1.10000000 0.82000000 1.84000000 0.24000000 21
# Upper Quantile.75% 0.43500000 1.01500000 0.68500000 1.79000000 0.17000000 21
# LowerQuartile.25% 0.37500000 0.93000000 0.62500000 1.70500000 0.12000000 21
# n 7.00000000 7.00000000 7.00000000 7.00000000 7.00000000 7
# Acute.BBB Chronic.BBB
# Mean 9.2857143 16.5714286
# Stand dev 1.4960265 1.5118579
# Stand Err 0.5654449 0.5714286
# Median 9.0000000 17.0000000
# CoeffofVariation 0.1611105 0.0912328
# Minimum 7.0000000 14.0000000
# Maximun 11.0000000 18.0000000
# Upper Quantile.75% 10.5000000 17.5000000
# LowerQuartile.25% 8.5000000 16.0000000
# n 7.0000000 7.0000000
# $Severe
# BL.Time.Delay Acute.Time.Delay Chronic.Time.Delay Acute.Area.Def Chronic.Area.Def BL.BBB
# Mean 0.38600000 1.53800000 0.80200000 3.20200000 0.36960000 21
# Stand dev 0.03847077 0.08288546 0.08074652 0.12049896 0.04659184 0
# Stand Err 0.01720465 0.03706751 0.03611094 0.05388877 0.02083651 0
# Median 0.40000000 1.55000000 0.80000000 3.24000000 0.37000000 21
# CoeffofVariation 0.09966520 0.05389172 0.10068144 0.03763241 0.12606019 0
# Minimum 0.32000000 1.44000000 0.70000000 3.04000000 0.29800000 21
# Maximun 0.42000000 1.64000000 0.90000000 3.34000000 0.42000000 21
# Upper Quantile.75% 0.40000000 1.59000000 0.86000000 3.27000000 0.40000000 21
# LowerQuartile.25% 0.39000000 1.47000000 0.75000000 3.12000000 0.36000000 21
# n 5.00000000 5.00000000 5.00000000 5.00000000 5.00000000 5
# Acute.BBB Chronic.BBB
# Mean 2.4000000 11.6000000
# Stand dev 1.1401754 1.8165902
# Stand Err 0.5099020 0.8124038
# Median 2.0000000 12.0000000
# CoeffofVariation 0.4750731 0.1566026
# Minimum 1.0000000 9.0000000
# Maximun 4.0000000 14.0000000
# Upper Quantile.75% 3.0000000 12.0000000
# LowerQuartile.25% 2.0000000 11.0000000
# n 5.0000000 5.0000000
我们可以通过 rbind
ing
out <- do.call(rbind, unname(Map(function(x, y)
`row.names<-`(transform(data.frame(Injury = x, y,
stringsAsFactors = FALSE), rn = row.names(y)), NULL), names(lst2), lst2)))
数据
my_data <- structure(list(Injury = c("Moderate", "Severe", "Moderate", "Moderate",
"Severe", "Moderate", "Severe", "Severe", "Moderate", "Moderate",
"Moderate", "Severe"), BL.Time.Delay = c(0.35, 0.42, 0.45, 0.42,
0.4, 0.37, 0.32, 0.4, 0.47, 0.41, 0.38, 0.39), Acute.Time.Delay = c(1.1,
1.47, 1.02, 0.97, 1.55, 0.96, 1.64, 1.44, 1.01, 0.9, 0.86, 1.59
), Chronic.Time.Delay = c(0.6, 0.86, 0.65, 0.7, 0.8, 0.65, 0.75,
0.9, 0.82, 0.67, 0.57, 0.7), Acute.Area.Def = c(1.84, 3.04, 1.8,
1.76, 3.12, 1.65, 3.34, 3.24, 1.77, 1.51, 1.78, 3.27), Chronic.Area.Def = c(0.15,
0.42, 0.15, 0, 0.37, 0.24, 0.4, 0.298, 0.14, 0.19, 0.1, 0.36),
BL.BBB = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 21L), Acute.BBB = c(11L, 3L, 11L, 8L, 4L, 9L, 2L, 1L,
7L, 9L, 10L, 2L), Chronic.BBB = c(18L, 14L, 18L, 17L, 11L,
14L, 12L, 9L, 17L, 15L, 17L, 12L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
这样的事情应该可行,pivot_wider
和 pivot_longer
足够灵活,可以帮助您使用更多分组变量来做到这一点
df %>%
mutate(id = seq_along(Injury)) %>%
pivot_longer(BL.Time.Delay:Chronic.BBB, names_to = "var", values_to = "value") %>%
pivot_wider(id_cols = c(id, var), names_from = Injury, values_from = value) %>%
group_by(var) %>%
summarise(Moderate = mean(Moderate, na.rm = T),
Severe = mean(Severe, na.rm = T))
或者,这将为您提供更长格式的输出
df %>%
mutate(id = seq_along(Injury)) %>%
pivot_longer(BL.Time.Delay:Chronic.BBB, names_to = "var", values_to = "value") %>%
# pivot_wider(id_cols = c(id, var), names_from = Injury, values_from = value) %>%
group_by(var, Injury) %>%
summarise(mean = mean(value, na.rm = T))