R按标签差异聚合
R aggregate by label difference
我有一个大型数据框,其中包含 28 个欧盟国家/地区的进出口数据以及其他详细信息。我想计算每个国家的贸易差额(出口减去进口)。
我使用以下代码成功汇总了每个国家/地区的进出口:
x = aggregate( value_in_euros ~ as_factor(flow) + as_factor(declarant), data, sum)
生成的数据集如下所示:
as_factor(flow) as_factor(declarant) value_in_euros
1 Imports FR 1.019259e+12
2 Exports FR 8.539884e+11
3 Imports BE 6.936985e+11
4 Exports BE 7.350930e+11
5 Imports NL 1.043084e+12
6 Exports NL 1.180904e+12
7 Imports DE 2.048670e+12
8 Exports DE 2.415090e+12
9 Imports IT 7.399388e+11
10 Exports IT 8.671186e+11
11 Imports GB 9.705866e+10
12 Exports GB 6.669265e+10
13 Imports IE 1.725745e+11
14 Exports IE 3.150999e+11
15 Imports DK 1.707441e+11
16 Exports DK 1.897421e+11
现在我需要一种聚合方法来计算每个国家/地区的贸易差额(出口减去进口)。任何人都可以向我提供有关如何操作的提示吗?谢谢
我们可以使用split
将数据帧分割成declarant
,然后计算流之间的差异。
set.seed(1)
df <- data.frame(flow=rep(c('Imports','Exports'), 8),
declarant=rep(c('FR','BE','NL','DE','IT','GB','IE','DK'), each=2),
value_in_euros=rnorm(n=16L,mean=1e12,sd=1e4))
out <- sapply(split(df, df$declarant), function(x) diff(x$value_in_euros))
# ------------------------------------------
> out[1:4]
BE DE DK FR
24309.094 2508.957 -11698.645 8100.971
考虑聚合后重塑。那么,运行的区别。
agg_df <- aggregate(
value_in_euros ~ flow + declarant, data=data, FUN=sum
) |> reshape(
idvar = "declarant",
v.names = "value_in_euros",
timevar = "flow",
direction = "wide",
sep = "_"
) |> transform(
balance = value_in_euros_Exports - value_in_euros_Imports
)
agg_df
declarant value_in_euros_Imports value_in_euros_Exports balance
1 FR 1.019259e+12 8.539884e+11 -165270600000
3 BE 6.936985e+11 7.350930e+11 41394500000
5 NL 1.043084e+12 1.180904e+12 137820000000
7 DE 2.048670e+12 2.415090e+12 366420000000
9 IT 7.399388e+11 8.671186e+11 127179800000
11 GB 9.705866e+10 6.669265e+10 -30366010000
13 IE 1.725745e+11 3.150999e+11 142525400000
15 DK 1.707441e+11 1.897421e+11 18998000000
数据
txt = ' flow declarant value_in_euros
1 Imports FR 1.019259e+12
2 Exports FR 8.539884e+11
3 Imports BE 6.936985e+11
4 Exports BE 7.350930e+11
5 Imports NL 1.043084e+12
6 Exports NL 1.180904e+12
7 Imports DE 2.048670e+12
8 Exports DE 2.415090e+12
9 Imports IT 7.399388e+11
10 Exports IT 8.671186e+11
11 Imports GB 9.705866e+10
12 Exports GB 6.669265e+10
13 Imports IE 1.725745e+11
14 Exports IE 3.150999e+11
15 Imports DK 1.707441e+11
16 Exports DK 1.897421e+11'
agg_df <- read.table(text=txt)
您可以直接将此应用到您的 data
而不是进行汇总。
library(dplyr)
library(tidyr)
data %>%
select(flow, declarant, value_in_euros) %>%
pivot_wider(names_from = flow,
values_from = value_in_euros, values_fn = sum) %>%
mutate(difference = Exports - Imports)
我有一个大型数据框,其中包含 28 个欧盟国家/地区的进出口数据以及其他详细信息。我想计算每个国家的贸易差额(出口减去进口)。
我使用以下代码成功汇总了每个国家/地区的进出口:
x = aggregate( value_in_euros ~ as_factor(flow) + as_factor(declarant), data, sum)
生成的数据集如下所示:
as_factor(flow) as_factor(declarant) value_in_euros
1 Imports FR 1.019259e+12
2 Exports FR 8.539884e+11
3 Imports BE 6.936985e+11
4 Exports BE 7.350930e+11
5 Imports NL 1.043084e+12
6 Exports NL 1.180904e+12
7 Imports DE 2.048670e+12
8 Exports DE 2.415090e+12
9 Imports IT 7.399388e+11
10 Exports IT 8.671186e+11
11 Imports GB 9.705866e+10
12 Exports GB 6.669265e+10
13 Imports IE 1.725745e+11
14 Exports IE 3.150999e+11
15 Imports DK 1.707441e+11
16 Exports DK 1.897421e+11
现在我需要一种聚合方法来计算每个国家/地区的贸易差额(出口减去进口)。任何人都可以向我提供有关如何操作的提示吗?谢谢
我们可以使用split
将数据帧分割成declarant
,然后计算流之间的差异。
set.seed(1)
df <- data.frame(flow=rep(c('Imports','Exports'), 8),
declarant=rep(c('FR','BE','NL','DE','IT','GB','IE','DK'), each=2),
value_in_euros=rnorm(n=16L,mean=1e12,sd=1e4))
out <- sapply(split(df, df$declarant), function(x) diff(x$value_in_euros))
# ------------------------------------------
> out[1:4]
BE DE DK FR
24309.094 2508.957 -11698.645 8100.971
考虑聚合后重塑。那么,运行的区别。
agg_df <- aggregate(
value_in_euros ~ flow + declarant, data=data, FUN=sum
) |> reshape(
idvar = "declarant",
v.names = "value_in_euros",
timevar = "flow",
direction = "wide",
sep = "_"
) |> transform(
balance = value_in_euros_Exports - value_in_euros_Imports
)
agg_df
declarant value_in_euros_Imports value_in_euros_Exports balance
1 FR 1.019259e+12 8.539884e+11 -165270600000
3 BE 6.936985e+11 7.350930e+11 41394500000
5 NL 1.043084e+12 1.180904e+12 137820000000
7 DE 2.048670e+12 2.415090e+12 366420000000
9 IT 7.399388e+11 8.671186e+11 127179800000
11 GB 9.705866e+10 6.669265e+10 -30366010000
13 IE 1.725745e+11 3.150999e+11 142525400000
15 DK 1.707441e+11 1.897421e+11 18998000000
数据
txt = ' flow declarant value_in_euros
1 Imports FR 1.019259e+12
2 Exports FR 8.539884e+11
3 Imports BE 6.936985e+11
4 Exports BE 7.350930e+11
5 Imports NL 1.043084e+12
6 Exports NL 1.180904e+12
7 Imports DE 2.048670e+12
8 Exports DE 2.415090e+12
9 Imports IT 7.399388e+11
10 Exports IT 8.671186e+11
11 Imports GB 9.705866e+10
12 Exports GB 6.669265e+10
13 Imports IE 1.725745e+11
14 Exports IE 3.150999e+11
15 Imports DK 1.707441e+11
16 Exports DK 1.897421e+11'
agg_df <- read.table(text=txt)
您可以直接将此应用到您的 data
而不是进行汇总。
library(dplyr)
library(tidyr)
data %>%
select(flow, declarant, value_in_euros) %>%
pivot_wider(names_from = flow,
values_from = value_in_euros, values_fn = sum) %>%
mutate(difference = Exports - Imports)