如何根据R中的组计算两行之间的百分比
how to calculate the percentage between two rows based on a group in R
我有一个 datafrema,其中包含 2005 年到 2018 年之间 6 个点的土地使用数据。我想计算 2005 年到 2018 年之间的百分比变化。
df<-structure(list(place = c("F01", "F01", "F02", "F02", "F03", "F03",
"F04", "F04", "F05", "F05", "F06", "F06"), year = c(2005, 2018, 2005,
2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018), Veg =
c(12281.5824712026, 12292.2267477317, 7254.98919713131,
7488.9138055415, 864.182200710528, 941.602680778032, 549.510775817472, 584.104674537216, 5577.10195081334, 5688.28474549675, 1244.96456185886, 1306.41862713264), Agri = c(113.178596532624, 1376.68748390712, 85.2373706436, 1048.71071335262, 0, 46.236076173504, 0, 46.236076173504, 85.2373706436, 1002.47463717912, 1.413692976528,
228.851945376768 ), Past = c(9190.16856517738, 7855.55923692456, 5029.33750161394, 3776.9718412309, 983.015569149264, 800.981808818688, 710.255983089744, 572.213021852304, 3726.66100294858, 2700.40306039963, 879.982298683488, 597.410020198656), Urb = c(146.026168634304, 200.910719487744, 146.026168634304,
200.910719487744, 141.119822421648, 194.840155529712, 141.119822421648, 194.840155529712, 4.906346212656, 6.070563958032, NA, NA), SoloExp = c(61.12143163224, 67.940421283728, 61.12143163224,
62.451966198384, 50.144521461552, 54.801392443056, 49.146620536944, 52.639273773072, 9.895850835696, 7.650573755328, 6.320039189184, 1.164217745376), Hidro = c(9.230583552624, 7.983207396864, 9.230583552624, 7.983207396864, NA, NA, NA, NA, 7.401098524176, 6.320039189184, 5.654771906112, 4.490554160736), total = c(691953.981181971, 691953.981181971, 691953.981181971,
691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971)), row.names = c(NA, -12L), class = "data.frame")
我尝试使用lead
命令计算2005年和2018年之间的差异,但我没有成功:
df2<-df%>%
select(-c(total))%>%
replace(is.na(.), 0)%>%
pivot_longer(cols = c(3:8),
names_to = 'classe',
values_to = 'area')%>%
group_by(place, classe)%>%
mutate(percent=(((((area)-lead(area))/area)*100)*-1))%>%
pivot_wider(names_from = 'classe',
values_from = 'percent')%>%
select(-c(area, year))
例如对于 Veg
class 我希望得到:
place Veg
F01 0.09
F02 3.22
F03 8.96
F04 6.30
F05 1.99
F06 4.94
如果您需要所有参数的百分比,这是一种解决方案
library(dplyr)
library(tidyr)
df_new<-df %>%
select(-(total))%>%
replace(is.na(.), 0)%>%
pivot_longer(cols = c(3:8),
names_to = 'classe',
values_to = 'area') %>%
pivot_wider(names_from=year, values_from=area) %>%
mutate(percent=(`2018`-`2005`)/`2005`) %>%
select(-`2018`,-`2005`) %>%
pivot_wider( names_from="classe", values_from="percent")
df_new
#> # A tibble: 6 × 7
#> place Veg Agri Past Urb SoloExp Hidro
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 F01 0.000867 11.2 -0.145 0.376 0.112 -0.135
#> 2 F02 0.0322 11.3 -0.249 0.376 0.0218 -0.135
#> 3 F03 0.0896 Inf -0.185 0.381 0.0929 NaN
#> 4 F04 0.0630 Inf -0.194 0.381 0.0711 NaN
#> 5 F05 0.0199 10.8 -0.275 0.237 -0.227 -0.146
#> 6 F06 0.0494 161. -0.321 NaN -0.816 -0.206
由 reprex package (v2.0.1)
创建于 2022-01-09
另一个可能的解决方案:
library(tidyverse)
df %>%
group_by(place) %>%
summarise(across(-year, ~ 100*(last(.x) / first(.x) - 1)))
#> # A tibble: 6 × 8
#> place Veg Agri Past Urb SoloExp Hidro total
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 F01 0.0867 1116. -14.5 37.6 11.2 -13.5 0
#> 2 F02 3.22 1130. -24.9 37.6 2.18 -13.5 0
#> 3 F03 8.96 Inf -18.5 38.1 9.29 NA 0
#> 4 F04 6.30 Inf -19.4 38.1 7.11 NA 0
#> 5 F05 1.99 1076. -27.5 23.7 -22.7 -14.6 0
#> 6 F06 4.94 16088. -32.1 NA -81.6 -20.6 0
我有一个 datafrema,其中包含 2005 年到 2018 年之间 6 个点的土地使用数据。我想计算 2005 年到 2018 年之间的百分比变化。
df<-structure(list(place = c("F01", "F01", "F02", "F02", "F03", "F03", "F04", "F04", "F05", "F05", "F06", "F06"), year = c(2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018), Veg = c(12281.5824712026, 12292.2267477317, 7254.98919713131, 7488.9138055415, 864.182200710528, 941.602680778032, 549.510775817472, 584.104674537216, 5577.10195081334, 5688.28474549675, 1244.96456185886, 1306.41862713264), Agri = c(113.178596532624, 1376.68748390712, 85.2373706436, 1048.71071335262, 0, 46.236076173504, 0, 46.236076173504, 85.2373706436, 1002.47463717912, 1.413692976528, 228.851945376768 ), Past = c(9190.16856517738, 7855.55923692456, 5029.33750161394, 3776.9718412309, 983.015569149264, 800.981808818688, 710.255983089744, 572.213021852304, 3726.66100294858, 2700.40306039963, 879.982298683488, 597.410020198656), Urb = c(146.026168634304, 200.910719487744, 146.026168634304, 200.910719487744, 141.119822421648, 194.840155529712, 141.119822421648, 194.840155529712, 4.906346212656, 6.070563958032, NA, NA), SoloExp = c(61.12143163224, 67.940421283728, 61.12143163224, 62.451966198384, 50.144521461552, 54.801392443056, 49.146620536944, 52.639273773072, 9.895850835696, 7.650573755328, 6.320039189184, 1.164217745376), Hidro = c(9.230583552624, 7.983207396864, 9.230583552624, 7.983207396864, NA, NA, NA, NA, 7.401098524176, 6.320039189184, 5.654771906112, 4.490554160736), total = c(691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971)), row.names = c(NA, -12L), class = "data.frame")
我尝试使用lead
命令计算2005年和2018年之间的差异,但我没有成功:
df2<-df%>%
select(-c(total))%>%
replace(is.na(.), 0)%>%
pivot_longer(cols = c(3:8),
names_to = 'classe',
values_to = 'area')%>%
group_by(place, classe)%>%
mutate(percent=(((((area)-lead(area))/area)*100)*-1))%>%
pivot_wider(names_from = 'classe',
values_from = 'percent')%>%
select(-c(area, year))
例如对于 Veg
class 我希望得到:
place Veg
F01 0.09
F02 3.22
F03 8.96
F04 6.30
F05 1.99
F06 4.94
如果您需要所有参数的百分比,这是一种解决方案
library(dplyr)
library(tidyr)
df_new<-df %>%
select(-(total))%>%
replace(is.na(.), 0)%>%
pivot_longer(cols = c(3:8),
names_to = 'classe',
values_to = 'area') %>%
pivot_wider(names_from=year, values_from=area) %>%
mutate(percent=(`2018`-`2005`)/`2005`) %>%
select(-`2018`,-`2005`) %>%
pivot_wider( names_from="classe", values_from="percent")
df_new
#> # A tibble: 6 × 7
#> place Veg Agri Past Urb SoloExp Hidro
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 F01 0.000867 11.2 -0.145 0.376 0.112 -0.135
#> 2 F02 0.0322 11.3 -0.249 0.376 0.0218 -0.135
#> 3 F03 0.0896 Inf -0.185 0.381 0.0929 NaN
#> 4 F04 0.0630 Inf -0.194 0.381 0.0711 NaN
#> 5 F05 0.0199 10.8 -0.275 0.237 -0.227 -0.146
#> 6 F06 0.0494 161. -0.321 NaN -0.816 -0.206
由 reprex package (v2.0.1)
创建于 2022-01-09另一个可能的解决方案:
library(tidyverse)
df %>%
group_by(place) %>%
summarise(across(-year, ~ 100*(last(.x) / first(.x) - 1)))
#> # A tibble: 6 × 8
#> place Veg Agri Past Urb SoloExp Hidro total
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 F01 0.0867 1116. -14.5 37.6 11.2 -13.5 0
#> 2 F02 3.22 1130. -24.9 37.6 2.18 -13.5 0
#> 3 F03 8.96 Inf -18.5 38.1 9.29 NA 0
#> 4 F04 6.30 Inf -19.4 38.1 7.11 NA 0
#> 5 F05 1.99 1076. -27.5 23.7 -22.7 -14.6 0
#> 6 F06 4.94 16088. -32.1 NA -81.6 -20.6 0