在 dplyr 中转置
Transposing in dplyr
我有以下data.frame
df = structure(list(HEADER = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM","AWAY_TEAM"),
price = c("0.863104076023855", "-0.845186446996287","CHA", "NOP")),
.Names = c("HEADER", "price"), row.names = c(NA, 4L), class = "data.frame")
df
#> HEADER price
#> 1 HOME_TRPM 0.863104076023855
#> 2 AWAY_TRPM -0.845186446996287
#> 3 HOME_TEAM CHA
#> 4 AWAY_TEAM NOP
我想转置。如何在不使用 t() 的情况下在 dplyr 中执行此操作?我试过了
df %>% tidyr::spread(HEADER , price)
但它没有提供平面结构,而是这样做的:
structure(list(AWAY_TEAM = c(NA, NA, NA, "NOP"),
AWAY_TRPM = c(NA, "-0.845186446996287", NA, NA),
HOME_TEAM = c(NA, NA, "CHA", NA),
HOME_TRPM = c("0.863104076023855", NA, NA, NA)),
.Names = c("AWAY_TEAM", "AWAY_TRPM", "HOME_TEAM", "HOME_TRPM"),
class = "data.frame", row.names = c(NA, 4L))
结果data.frame应该是这样的:
structure(list(HOME_TRPM = "0.863104076023855",
AWAY_TRPM = "-0.845186446996287",
HOME_TEAM = "CHA",
AWAY_TEAM = "NOP"),
.Names = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM", "AWAY_TEAM"),
row.names = c(NA, -1L), class = "data.frame"))
我想你想要 tidyr
而不是 dplyr
:
library(tidyr)
library(dplyr)
df %>% mutate(group = 1) %>%
spread(HEADER, price)
group AWAY_TEAM AWAY_TRPM HOME_TEAM HOME_TRPM
1 1 NOP -0.845186446996287 CHA 0.863104076023855
使用它,您可以指定您的分组 - 您可以添加 select(-group)
以便稍后删除它们。
他们一定已经更新了 tidyr,因为它最初是发布的,因为我认为它可以满足您现在最初的要求:
> library(dplyr)
> library(tidyr)
Warning message:
package ‘tidyr’ was built under R version 3.4.4
> df
HEADER price
1 HOME_TRPM 0.863104076023855
2 AWAY_TRPM -0.845186446996287
3 HOME_TEAM CHA
4 AWAY_TEAM NOP
> tidyr::spread(df, HEADER, price)
AWAY_TEAM AWAY_TRPM HOME_TEAM HOME_TRPM
1 NOP -0.845186446996287 CHA 0.863104076023855
如果你有一个更大的数据框,你总是可以收集然后传播:
> mdf <- data.frame(Things = c("Cookies","Cake","Knives","Kittens", "Politics"), Darkness = sample(1:5), Despair = sample(1:5), Defeat = sample(1:5))> mdf
Things Darkness Despair Defeat
1 Cookies 3 4 1
2 Cake 2 2 5
3 Knives 1 3 2
4 Kittens 5 5 3
5 Politics 4 1 4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat)
Things Idea Warning_Level
1 Cookies Darkness 3
2 Cake Darkness 2
3 Knives Darkness 1
4 Kittens Darkness 5
5 Politics Darkness 4
6 Cookies Despair 4
7 Cake Despair 2
8 Knives Despair 3
9 Kittens Despair 5
10 Politics Despair 1
11 Cookies Defeat 1
12 Cake Defeat 5
13 Knives Defeat 2
14 Kittens Defeat 3
15 Politics Defeat 4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat) %>% tidyr::spread(Things, Warning_Level)
Idea Cake Cookies Kittens Knives Politics
1 Darkness 2 3 5 1 4
2 Defeat 5 1 3 2 4
3 Despair 2 4 5 3 1
使用 tibble
包中的 as_tibble()
函数,可以消除 t()
的不良影响。
df_t = as_tibble(t(df[, -1]))
names(df_t) = df[, 1]
spread
已停用,tidyr
现在建议使用 pivot_wider()
:
library(tidyverse)
df %>%
pivot_wider(names_from = HEADER, values_from = price)
我有以下data.frame
df = structure(list(HEADER = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM","AWAY_TEAM"),
price = c("0.863104076023855", "-0.845186446996287","CHA", "NOP")),
.Names = c("HEADER", "price"), row.names = c(NA, 4L), class = "data.frame")
df
#> HEADER price
#> 1 HOME_TRPM 0.863104076023855
#> 2 AWAY_TRPM -0.845186446996287
#> 3 HOME_TEAM CHA
#> 4 AWAY_TEAM NOP
我想转置。如何在不使用 t() 的情况下在 dplyr 中执行此操作?我试过了
df %>% tidyr::spread(HEADER , price)
但它没有提供平面结构,而是这样做的:
structure(list(AWAY_TEAM = c(NA, NA, NA, "NOP"),
AWAY_TRPM = c(NA, "-0.845186446996287", NA, NA),
HOME_TEAM = c(NA, NA, "CHA", NA),
HOME_TRPM = c("0.863104076023855", NA, NA, NA)),
.Names = c("AWAY_TEAM", "AWAY_TRPM", "HOME_TEAM", "HOME_TRPM"),
class = "data.frame", row.names = c(NA, 4L))
结果data.frame应该是这样的:
structure(list(HOME_TRPM = "0.863104076023855",
AWAY_TRPM = "-0.845186446996287",
HOME_TEAM = "CHA",
AWAY_TEAM = "NOP"),
.Names = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM", "AWAY_TEAM"),
row.names = c(NA, -1L), class = "data.frame"))
我想你想要 tidyr
而不是 dplyr
:
library(tidyr)
library(dplyr)
df %>% mutate(group = 1) %>%
spread(HEADER, price)
group AWAY_TEAM AWAY_TRPM HOME_TEAM HOME_TRPM
1 1 NOP -0.845186446996287 CHA 0.863104076023855
使用它,您可以指定您的分组 - 您可以添加 select(-group)
以便稍后删除它们。
他们一定已经更新了 tidyr,因为它最初是发布的,因为我认为它可以满足您现在最初的要求:
> library(dplyr)
> library(tidyr)
Warning message:
package ‘tidyr’ was built under R version 3.4.4
> df
HEADER price
1 HOME_TRPM 0.863104076023855
2 AWAY_TRPM -0.845186446996287
3 HOME_TEAM CHA
4 AWAY_TEAM NOP
> tidyr::spread(df, HEADER, price)
AWAY_TEAM AWAY_TRPM HOME_TEAM HOME_TRPM
1 NOP -0.845186446996287 CHA 0.863104076023855
如果你有一个更大的数据框,你总是可以收集然后传播:
> mdf <- data.frame(Things = c("Cookies","Cake","Knives","Kittens", "Politics"), Darkness = sample(1:5), Despair = sample(1:5), Defeat = sample(1:5))> mdf
Things Darkness Despair Defeat
1 Cookies 3 4 1
2 Cake 2 2 5
3 Knives 1 3 2
4 Kittens 5 5 3
5 Politics 4 1 4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat)
Things Idea Warning_Level
1 Cookies Darkness 3
2 Cake Darkness 2
3 Knives Darkness 1
4 Kittens Darkness 5
5 Politics Darkness 4
6 Cookies Despair 4
7 Cake Despair 2
8 Knives Despair 3
9 Kittens Despair 5
10 Politics Despair 1
11 Cookies Defeat 1
12 Cake Defeat 5
13 Knives Defeat 2
14 Kittens Defeat 3
15 Politics Defeat 4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat) %>% tidyr::spread(Things, Warning_Level)
Idea Cake Cookies Kittens Knives Politics
1 Darkness 2 3 5 1 4
2 Defeat 5 1 3 2 4
3 Despair 2 4 5 3 1
使用 tibble
包中的 as_tibble()
函数,可以消除 t()
的不良影响。
df_t = as_tibble(t(df[, -1]))
names(df_t) = df[, 1]
spread
已停用,tidyr
现在建议使用 pivot_wider()
:
library(tidyverse)
df %>%
pivot_wider(names_from = HEADER, values_from = price)