转置数据帧

Transposing data frames

周末快乐。

我一直在尝试在 R 中复制此 blog post 的结果。我正在寻找一种不使用 t 转置数据的方法,最好使用 tidyrreshape。在下面的示例中,metadata 是通过转置 data.

获得的
metadata <- data.frame(colnames(data), t(data[1:4, ]) )
colnames(metadata) <- t(metadata[1,])
metadata <- metadata[-1,]
metadata$Multiplier <- as.numeric(metadata$Multiplier)

虽然达到了我的要求,但我觉得有点不熟练。是否有任何有效的工作流程来转置数据框?

数据输入

data <- structure(list(Series.Description = c("Unit:", "Multiplier:", 
"Currency:", "Unique Identifier: "), Nominal.Broad.Dollar.Index. = c("Index:_1997_Jan_100", 
"1", NA, "H10/H10/JRXWTFB_N.M"), Nominal.Major.Currencies.Dollar.Index. = c("Index:_1973_Mar_100", 
"1", NA, "H10/H10/JRXWTFN_N.M"), Nominal.Other.Important.Trading.Partners.Dollar.Index. = c("Index:_1997_Jan_100", 
"1", NA, "H10/H10/JRXWTFO_N.M"), AUSTRALIA....SPOT.EXCHANGE.RATE..US..AUSTRALIAN...RECIPROCAL.OF.RXI_N.M.AL. = c("Currency:_Per_AUD", 
"1", "USD", "H10/H10/RXI$US_N.M.AL"), SPOT.EXCHANGE.RATE...EURO.AREA. = c("Currency:_Per_EUR", 
"1", "USD", "H10/H10/RXI$US_N.M.EU"), NEW.ZEALAND....SPOT.EXCHANGE.RATE..US..NZ...RECIPROCAL.OF.RXI_N.M.NZ.. = c("Currency:_Per_NZD", 
"1", "USD", "H10/H10/RXI$US_N.M.NZ"), United.Kingdom....Spot.Exchange.Rate..US..Pound.Sterling.Reciprocal.of.rxi_n.m.uk = c("Currency:_Per_GBP", 
"0.01", "USD", "H10/H10/RXI$US_N.M.UK"), BRAZIL....SPOT.EXCHANGE.RATE..REAIS.US.. = c("Currency:_Per_USD", 
"1", "BRL", "H10/H10/RXI_N.M.BZ"), CANADA....SPOT.EXCHANGE.RATE..CANADIAN...US.. = c("Currency:_Per_USD", 
"1", "CAD", "H10/H10/RXI_N.M.CA"), CHINA....SPOT.EXCHANGE.RATE..YUAN.US.. = c("Currency:_Per_USD", 
"1", "CNY", "H10/H10/RXI_N.M.CH"), DENMARK....SPOT.EXCHANGE.RATE..KRONER.US.. = c("Currency:_Per_USD", 
"1", "DKK", "H10/H10/RXI_N.M.DN"), HONG.KONG....SPOT.EXCHANGE.RATE..HK..US.. = c("Currency:_Per_USD", 
"1", "HKD", "H10/H10/RXI_N.M.HK"), INDIA....SPOT.EXCHANGE.RATE..RUPEES.US. = c("Currency:_Per_USD", 
"1", "INR", "H10/H10/RXI_N.M.IN"), JAPAN....SPOT.EXCHANGE.RATE..YEA.US.. = c("Currency:_Per_USD", 
"1", "JPY", "H10/H10/RXI_N.M.JA"), KOREA....SPOT.EXCHANGE.RATE..WON.US.. = c("Currency:_Per_USD", 
"1", "KRW", "H10/H10/RXI_N.M.KO"), Malaysia...Spot.Exchange.Rate..Ringgit.US.. = c("Currency:_Per_USD", 
"1", "MYR", "H10/H10/RXI_N.M.MA"), MEXICO....SPOT.EXCHANGE.RATE..PESOS.US.. = c("Currency:_Per_USD", 
"1", "MXN", "H10/H10/RXI_N.M.MX"), NORWAY....SPOT.EXCHANGE.RATE..KRONER.US.. = c("Currency:_Per_USD", 
"1", "NOK", "H10/H10/RXI_N.M.NO"), SWEDEN....SPOT.EXCHANGE.RATE..KRONOR.US.. = c("Currency:_Per_USD", 
"1", "SEK", "H10/H10/RXI_N.M.SD"), SOUTH.AFRICA....SPOT.EXCHANGE.RATE..RAND.US.. = c("Currency:_Per_USD", 
"1", "ZAR", "H10/H10/RXI_N.M.SF"), Singapore...SPOT.EXCHANGE.RATE..SINGAPORE...US.. = c("Currency:_Per_USD", 
"1", "SGD", "H10/H10/RXI_N.M.SI"), SRI.LANKA....SPOT.EXCHANGE.RATE..RUPEES.US.. = c("Currency:_Per_USD", 
"1", "LKR", "H10/H10/RXI_N.M.SL"), SWITZERLAND....SPOT.EXCHANGE.RATE..FRANCS.US.. = c("Currency:_Per_USD", 
"1", "CHF", "H10/H10/RXI_N.M.SZ"), TAIWAN....SPOT.EXCHANGE.RATE..NT..US.. = c("Currency:_Per_USD", 
"1", "TWD", "H10/H10/RXI_N.M.TA"), THAILAND....SPOT.EXCHANGE.RATE....THAILAND. = c("Currency:_Per_USD", 
"1", "THB", "H10/H10/RXI_N.M.TH"), VENEZUELA....SPOT.EXCHANGE.RATE..BOLIVARES.US.. = c("Currency:_Per_USD", 
"1", "VEB", "H10/H10/RXI_N.M.VE")), .Names = c("Series.Description", 
"Nominal.Broad.Dollar.Index.", "Nominal.Major.Currencies.Dollar.Index.", 
"Nominal.Other.Important.Trading.Partners.Dollar.Index.", "AUSTRALIA....SPOT.EXCHANGE.RATE..US..AUSTRALIAN...RECIPROCAL.OF.RXI_N.M.AL.", 
"SPOT.EXCHANGE.RATE...EURO.AREA.", "NEW.ZEALAND....SPOT.EXCHANGE.RATE..US..NZ...RECIPROCAL.OF.RXI_N.M.NZ..", 
"United.Kingdom....Spot.Exchange.Rate..US..Pound.Sterling.Reciprocal.of.rxi_n.m.uk", 
"BRAZIL....SPOT.EXCHANGE.RATE..REAIS.US..", "CANADA....SPOT.EXCHANGE.RATE..CANADIAN...US..", 
"CHINA....SPOT.EXCHANGE.RATE..YUAN.US..", "DENMARK....SPOT.EXCHANGE.RATE..KRONER.US..", 
"HONG.KONG....SPOT.EXCHANGE.RATE..HK..US..", "INDIA....SPOT.EXCHANGE.RATE..RUPEES.US.", 
"JAPAN....SPOT.EXCHANGE.RATE..YEA.US..", "KOREA....SPOT.EXCHANGE.RATE..WON.US..", 
"Malaysia...Spot.Exchange.Rate..Ringgit.US..", "MEXICO....SPOT.EXCHANGE.RATE..PESOS.US..", 
"NORWAY....SPOT.EXCHANGE.RATE..KRONER.US..", "SWEDEN....SPOT.EXCHANGE.RATE..KRONOR.US..", 
"SOUTH.AFRICA....SPOT.EXCHANGE.RATE..RAND.US..", "Singapore...SPOT.EXCHANGE.RATE..SINGAPORE...US..", 
"SRI.LANKA....SPOT.EXCHANGE.RATE..RUPEES.US..", "SWITZERLAND....SPOT.EXCHANGE.RATE..FRANCS.US..", 
"TAIWAN....SPOT.EXCHANGE.RATE..NT..US..", "THAILAND....SPOT.EXCHANGE.RATE....THAILAND.", 
"VENEZUELA....SPOT.EXCHANGE.RATE..BOLIVARES.US.."), row.names = c(NA, 
4L), class = "data.frame")

使用 tidyr,你 gather 除了第一列之外的所有列,然后你 spread 收集的列。

尝试:

library(dplyr)
library(tidyr)
data %>%
  gather(var, val, 2:ncol(data)) %>%
  spread(Series.Description, val)
library(dplyr)
# Omitted data <- structure part ...

这是复制主要答案中的内容的内容,但更通用(例如,在 Series.Description 不是结果的第一列的情况下工作)并使用较新的 pivot_wider/pivot_longer 动词.

df_transpose <- function(df) {
  
  df %>% 
    tidyr::pivot_longer(-1) %>%
    tidyr::pivot_wider(names_from = 1, values_from = value)

}

df_transpose(data)
#> # A tibble: 26 x 5
#>    name                   `Unit:`    `Multiplier:` `Currency:` `Unique Identifi…
#>    <chr>                  <chr>      <chr>         <chr>       <chr>            
#>  1 Nominal.Broad.Dollar.… Index:_19… 1             <NA>        H10/H10/JRXWTFB_…
#>  2 Nominal.Major.Currenc… Index:_19… 1             <NA>        H10/H10/JRXWTFN_…
#>  3 Nominal.Other.Importa… Index:_19… 1             <NA>        H10/H10/JRXWTFO_…
#>  4 AUSTRALIA....SPOT.EXC… Currency:… 1             USD         H10/H10/RXI$US_N…
#>  5 SPOT.EXCHANGE.RATE...… Currency:… 1             USD         H10/H10/RXI$US_N…
#>  6 NEW.ZEALAND....SPOT.E… Currency:… 1             USD         H10/H10/RXI$US_N…
#>  7 United.Kingdom....Spo… Currency:… 0.01          USD         H10/H10/RXI$US_N…
#>  8 BRAZIL....SPOT.EXCHAN… Currency:… 1             BRL         H10/H10/RXI_N.M.…
#>  9 CANADA....SPOT.EXCHAN… Currency:… 1             CAD         H10/H10/RXI_N.M.…
#> 10 CHINA....SPOT.EXCHANG… Currency:… 1             CNY         H10/H10/RXI_N.M.…
#> # … with 16 more rows

但请注意(如上面的答案)第一列的名称丢失了。以下内容保留了这一点(我想上面@jbkunst 提出的 spread_(names(data)[1], "val") 方法也是如此)。

df_transpose <- function(df) {
  
  first_name <- colnames(df)[1]
  
  temp <-
    df %>% 
    tidyr::pivot_longer(-1) %>%
    tidyr::pivot_wider(names_from = 1, values_from = value)
  
  colnames(temp)[1] <- first_name
  temp
}

df_transpose(data)
#> # A tibble: 26 x 5
#>    Series.Description       `Unit:`   `Multiplier:` `Currency:` `Unique Identif…
#>    <chr>                    <chr>     <chr>         <chr>       <chr>           
#>  1 Nominal.Broad.Dollar.In… Index:_1… 1             <NA>        H10/H10/JRXWTFB…
#>  2 Nominal.Major.Currencie… Index:_1… 1             <NA>        H10/H10/JRXWTFN…
#>  3 Nominal.Other.Important… Index:_1… 1             <NA>        H10/H10/JRXWTFO…
#>  4 AUSTRALIA....SPOT.EXCHA… Currency… 1             USD         H10/H10/RXI$US_…
#>  5 SPOT.EXCHANGE.RATE...EU… Currency… 1             USD         H10/H10/RXI$US_…
#>  6 NEW.ZEALAND....SPOT.EXC… Currency… 1             USD         H10/H10/RXI$US_…
#>  7 United.Kingdom....Spot.… Currency… 0.01          USD         H10/H10/RXI$US_…
#>  8 BRAZIL....SPOT.EXCHANGE… Currency… 1             BRL         H10/H10/RXI_N.M…
#>  9 CANADA....SPOT.EXCHANGE… Currency… 1             CAD         H10/H10/RXI_N.M…
#> 10 CHINA....SPOT.EXCHANGE.… Currency… 1             CNY         H10/H10/RXI_N.M…
#> # … with 16 more rows

reprex package (v2.0.0)

于 2021-05-30 创建