Combining Rows in R with Pivot or Spread?


# Representative Example
test.df <- tibble(yr=rep(1956),mn=rep(11),
yr   | mn  | sub  | unit_type | unit_name | TotalVotes | RepVotes |  RepCan  | DemVotes | DemCan 
1956   11   Alabama  County    Autauga        1000        500      EisenHower   500     Stevenson
1956   11   Alabama  County    Baldwin        2000       1000      EisenHower   1000    Stevenson
1956   11   Alabama  County    Barbour        3000       2000      EisenHower   2000    Stevenson

yr   | mn  | sub  | unit_type | unit_name |   pty_n   |   can    |   TotalVotes   | CanVotes
1956   11   Alabama  County     Autauga    Republican   Eisenhower     1000          500 
1956   11   Alabama  County     Autauga    Democrat     Stevenson      1000          500 
1956   11   Alabama  County     Autauga    Independent  Uncommitted    1000            0 
# and etc. for other counties in example (Baldwin, Barbour, etc)


这是一个解决方案,首先使用 pivot_longer 将投票转换为长格式。然后我使用 mutatecase_when 将以前的列名称替换为实际的候选名称并删除单个候选列:

long_table <- pivot_longer(test.df,
                           cols = c(RepVotes, DemVotes, ThirdVotes),
                           names_to = "pty_n",
                           values_to = "CanVotes") %>% 
  mutate(can = case_when(
    pty_n == "RepVotes" ~ RepCandidate,
    pty_n == "DemVotes" ~ DemCandidate,
    pty_n == "ThirdVotes" ~ ThirdCandidate
  pty_n = case_when(
    pty_n == "RepVotes" ~ "Republican",
    pty_n == "DemVotes" ~ "Democrat",
    pty_n == "ThirdVotes" ~ "Independent"
  )) %>% 
  select(-c(RepCandidate, DemCandidate, ThirdCandidate))
# A tibble: 9 x 12
     yr    mn sub     unit_type unit_name TotalVotes RepVotesTotalPerc DemVotesTotalPerc ThirdVotesTotalPe~ pty_n      CanVotes can       
  <dbl> <dbl> <chr>   <chr>     <chr>          <dbl>             <dbl>             <dbl>              <dbl> <chr>         <dbl> <chr>     
1  1956    11 Alabama County    Autauga         1000                50                50                  0 Republican      500 Eisenhower
2  1956    11 Alabama County    Autauga         1000                50                50                  0 Democrat        500 Stevenson 
3  1956    11 Alabama County    Autauga         1000                50                50                  0 Independe~        0 Uncommitt~
4  1956    11 Alabama County    Baldwin         2000                50                50                  0 Republican     1000 Eisenhower
5  1956    11 Alabama County    Baldwin         2000                50                50                  0 Democrat       1000 Stevenson 
6  1956    11 Alabama County    Baldwin         2000                50                50                  0 Independe~        0 Uncommitt~
7  1956    11 Alabama County    Barbour         3000                50                50                  0 Republican     1500 Eisenhower
8  1956    11 Alabama County    Barbour         3000                50                50                  0 Democrat       1500 Stevenson 
9  1956    11 Alabama County    Barbour         3000                50                50                  0 Independe~        0 Uncommitt~

library( data.table )
#convert data to the data.table-format
setDT( test.df )
#get the different paries to update the variable balter in
parties <- gsub( "Candidate", "", grep( "^.*Candidate$", names( test.df ), value = TRUE ) )
#melt to each candidate and his/her votes
DT.melt <- melt(test.df, 
                id.vars = c("yr", "mn", "sub", "unit_type", "unit_name"),
                measure.vars = patterns( can = "^.*Candidate$",
                                         canVotes = "^(Rep|Dem|Third)Votes$" ),
                variable.name = "pty_n"
#get the totals from the original date (by unit_name) through joining
DT.melt[ test.df, TotalVotes := i.TotalVotes, on = .(unit_name)]
#and pass the correct party name to the pty_n column
DT.melt[, pty_n := parties[ pty_n ] ][]

#      yr mn     sub unit_type unit_name pty_n         can canVotes TotalVotes
# 1: 1956 11 Alabama    County   Autauga   Rep  Eisenhower      500       1000
# 2: 1956 11 Alabama    County   Baldwin   Rep  Eisenhower     1000       2000
# 3: 1956 11 Alabama    County   Barbour   Rep  Eisenhower     1500       3000
# 4: 1956 11 Alabama    County   Autauga   Dem   Stevenson      500       1000
# 5: 1956 11 Alabama    County   Baldwin   Dem   Stevenson     1000       2000
# 6: 1956 11 Alabama    County   Barbour   Dem   Stevenson     1500       3000
# 7: 1956 11 Alabama    County   Autauga Third Uncommitted        0       1000
# 8: 1956 11 Alabama    County   Baldwin Third Uncommitted        0       2000
# 9: 1956 11 Alabama    County   Barbour Third Uncommitted        0       3000