将行最大值从长到宽投射
Casting the row maxima from long to wide
我有一个DT如下(谁能解释一下为什么这里的fread
不能正常工作?):
library(data.table)
DT <- fread("
pty_n pty pev1 vot1 vv1 ivv1 to1 pv1 ctr_n yr
A 236 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1983
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1983
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1983
D 207 6567389 5759215 5554662 204553 876941383 587 Argentina 1983
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1983",
header=TRUE)
我想按如下方式更改 DT
的格式:
1) 对于pv1
最高的两个pty_n
,我想要党名。
ctr_n yr First_Party Second_Party
Argentina 1983 A C
2)我想按比例加上他们的得票份额。
ctr_n yr First_Party Second_Party First_Party_Votes First_Party_Votes
Argentina 1983 A C pv1/vot1 = 2743064/5759215 = 0.476 0.011 (rounded from 0.0108)
我正在考虑做如下事情:
# Selecting the row with the maximum amount of votes (But how do I do the second max?)
DT[, .SD[max(pv1)==prop_votes ], on=.(ctr_n, yr)]
# Calculating the proportion for the remaining rows
DT[, prop_votes := pv1/vot1]
# Recasting, here I get stuck as well
DT <- reshape2::dcast(DT, ctr_n + yr ~ pty_n + pty + prop_votes + ?
有人可以帮我继续吗?
是吗?
DT[order(-pty)[1:2]
][, .(ctr_n = ctr_n[1],
First_Party= pty_n[1],
Second_Party= pty_n[2],
yr = yr[1],
First_Party_Votes = pv1[1] / vot1[1]
)]
ctr_n First_Party Second_Party yr First_Party_Votes
1: Argentina C A 1983 0.01087978
我的尝试基于@sindri_baldur的回答
library(data.table)
DT <- fread("
pty_n pty pev1 vot1 vv1 ivv1 to1 pv1 ctr_n yr
A 236 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1983
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1983
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1983
D 207 6567389 5759215 5554662 204553 876941383 587 Argentina 1983
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1983
A 236 6567389 5759215 5554662 204553 876941383 56987 Argentina 1984
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1984
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1984
D 207 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1984
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1984",
header=TRUE)
DT[order(ctr_n, yr, -pv1)
][, .(First_Party= pty_n[1],
Second_Party= pty_n[2],
First_Party_Votes = pv1[1] / vot1[1],
Second_Party_Votes = pv1[2] / vot1[1]
), .(ctr_n, yr)]
ctr_n yr First_Party Second_Party First_Party_Votes Second_Party_Votes
1: Argentina 1983 A C 0.4762913 0.01087978
2: Argentina 1984 D C 0.4762913 0.01087978
不太确定这是不是你想要的,但你可以试试
h <- head(DT[order(DT$pv1,decreasing = T),],2)
df <- data.frame(t(c(unlist(Map(as.character,with(h,list(unique(ctr_n),unique(yr),pty_n, round(pv1/vot1,3))))))))
colnames(df) <- c("ctr_n","yr","First_Party","Second_Party","First_Party_Votes","Second_Party_Votes")
屈服
> df
ctr_n yr First_Party Second_Party First_Party_Votes Second_Party_Votes
1 Argentina 1983 A C 0.476 0.011
我有一个DT如下(谁能解释一下为什么这里的fread
不能正常工作?):
library(data.table)
DT <- fread("
pty_n pty pev1 vot1 vv1 ivv1 to1 pv1 ctr_n yr
A 236 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1983
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1983
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1983
D 207 6567389 5759215 5554662 204553 876941383 587 Argentina 1983
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1983",
header=TRUE)
我想按如下方式更改 DT
的格式:
1) 对于pv1
最高的两个pty_n
,我想要党名。
ctr_n yr First_Party Second_Party
Argentina 1983 A C
2)我想按比例加上他们的得票份额。
ctr_n yr First_Party Second_Party First_Party_Votes First_Party_Votes
Argentina 1983 A C pv1/vot1 = 2743064/5759215 = 0.476 0.011 (rounded from 0.0108)
我正在考虑做如下事情:
# Selecting the row with the maximum amount of votes (But how do I do the second max?)
DT[, .SD[max(pv1)==prop_votes ], on=.(ctr_n, yr)]
# Calculating the proportion for the remaining rows
DT[, prop_votes := pv1/vot1]
# Recasting, here I get stuck as well
DT <- reshape2::dcast(DT, ctr_n + yr ~ pty_n + pty + prop_votes + ?
有人可以帮我继续吗?
是吗?
DT[order(-pty)[1:2]
][, .(ctr_n = ctr_n[1],
First_Party= pty_n[1],
Second_Party= pty_n[2],
yr = yr[1],
First_Party_Votes = pv1[1] / vot1[1]
)]
ctr_n First_Party Second_Party yr First_Party_Votes
1: Argentina C A 1983 0.01087978
我的尝试基于@sindri_baldur的回答
library(data.table)
DT <- fread("
pty_n pty pev1 vot1 vv1 ivv1 to1 pv1 ctr_n yr
A 236 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1983
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1983
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1983
D 207 6567389 5759215 5554662 204553 876941383 587 Argentina 1983
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1983
A 236 6567389 5759215 5554662 204553 876941383 56987 Argentina 1984
B 41 6567389 5759215 5554662 204553 876941383 56987 Argentina 1984
C 237 6567389 5759215 5554662 204553 876941383 62659 Argentina 1984
D 207 6567389 5759215 5554662 204553 876941383 2743064 Argentina 1984
E 125 6567389 5759215 5554662 204553 876941383 3728 Argentina 1984",
header=TRUE)
DT[order(ctr_n, yr, -pv1)
][, .(First_Party= pty_n[1],
Second_Party= pty_n[2],
First_Party_Votes = pv1[1] / vot1[1],
Second_Party_Votes = pv1[2] / vot1[1]
), .(ctr_n, yr)]
ctr_n yr First_Party Second_Party First_Party_Votes Second_Party_Votes
1: Argentina 1983 A C 0.4762913 0.01087978
2: Argentina 1984 D C 0.4762913 0.01087978
不太确定这是不是你想要的,但你可以试试
h <- head(DT[order(DT$pv1,decreasing = T),],2)
df <- data.frame(t(c(unlist(Map(as.character,with(h,list(unique(ctr_n),unique(yr),pty_n, round(pv1/vot1,3))))))))
colnames(df) <- c("ctr_n","yr","First_Party","Second_Party","First_Party_Votes","Second_Party_Votes")
屈服
> df
ctr_n yr First_Party Second_Party First_Party_Votes Second_Party_Votes
1 Argentina 1983 A C 0.476 0.011