将 `pivot_wider` 限制为与模式匹配的行
Restrict `pivot_wider` to rows matching a pattern
我不想根据列中的所有值,而是仅根据与模式匹配的值来扩大列的宽度。
一些玩具数据:
df <- data.frame(utterance = c("A and stuff",
"X and something",
"A and some more",
"B etc.",
"B",
"x yz and so on",
"BBB"),
timestamp = c("00:05:31.736 - 00:05:35.263", "00:05:31.829 - 00:05:36.449",
"00:05:31.829 - 00:05:36.449", "00:05:31.829 - 00:05:36.449",
"00:05:31.842 - 00:05:35.302", "00:05:35.088 - 00:05:36.134",
"00:05:35.263 - 00:05:53.052"))
我只想将 utterance
中以 A
或 B
开头的那些行旋转得更宽。我只能在 utterance
:
中的所有行上旋转更宽
library(tidyr)
df %>%
group_by(timestamp) %>%
pivot_wider(-utterance,
names_from = utterance,
values_from = utterance)
# A tibble: 5 x 8
# Groups: timestamp [5]
timestamp `A and stuff` `X and something` `A and some more` `B etc.` B `x yz and so on` BBB
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 00:05:31.736 - 00:05:35.263 A and stuff NA NA NA NA NA NA
2 00:05:31.829 - 00:05:36.449 NA X and something A and some more B etc. NA NA NA
3 00:05:31.842 - 00:05:35.302 NA NA NA NA B NA NA
4 00:05:35.088 - 00:05:36.134 NA NA NA NA NA x yz and so on NA
5 00:05:35.263 - 00:05:53.052 NA NA NA NA NA NA BBB
我尝试对模式进行 utterance
子集化,但出现错误:
df %>%
group_by(timestamp) %>%
pivot_wider(names_from = utterance[grepl("^(A|B)", utterance)],
values_from = utterance[grepl("^(A|B)", utterance)])
Error: object 'utterance' not found
如何仅在匹配行上旋转?
预期:
# timestamp `A` utterance `B`
# <chr> <chr> <chr> <chr>
# 00:05:31.736 - 00:05:35.263 A and stuff NA NA
# 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
# 00:05:31.842 - 00:05:35.302 NA NA B
# 00:05:35.088 - 00:05:36.134 NA x yz and so on NA
# 00:05:35.263 - 00:05:53.052 NA NA BBB
您可以创建一个新的 names
列:
library(stringr)
library(dplyr)
library(tidyr)
df %>%
mutate(pvt = case_when(str_detect(utterance, "^A") ~ "A",
str_detect(utterance, "^B") ~ "B",
TRUE ~ "utterance")) %>%
pivot_wider(names_from = pvt,
values_from = utterance)
这个returns
# A tibble: 5 x 4
timestamp A utterance B
<chr> <chr> <chr> <chr>
1 00:05:31.736 - 00:05:35.263 A and stuff NA NA
2 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
3 00:05:31.842 - 00:05:35.302 NA NA B
4 00:05:35.088 - 00:05:36.134 NA x yz and so on NA
5 00:05:35.263 - 00:05:53.052 NA NA BBB
没有pivot_wider
的解决方案:
library(tidyverse)
df <- data.frame(utterance = c("A and stuff",
"X and something",
"A and some more",
"B etc.",
"B",
"x yz and so on",
"BBB"),
timestamp = c("00:05:31.736 - 00:05:35.263", "00:05:31.829 - 00:05:36.449",
"00:05:31.829 - 00:05:36.449", "00:05:31.829 - 00:05:36.449",
"00:05:31.842 - 00:05:35.302", "00:05:35.088 - 00:05:36.134",
"00:05:35.263 - 00:05:53.052"))
df %>%
mutate(A = ifelse(str_detect(utterance,"^A"),utterance,NA),
B = ifelse(str_detect(utterance,"^B"),utterance,NA),
utterance = ifelse(str_detect(utterance,"^A|^B"),NA, utterance)) %>%
relocate(utterance,.before="B") %>%
group_by(timestamp) %>%
fill(everything(),.direction = "downup") %>%
ungroup() %>%
distinct()
#> # A tibble: 5 × 4
#> timestamp A utterance B
#> <chr> <chr> <chr> <chr>
#> 1 00:05:31.736 - 00:05:35.263 A and stuff <NA> <NA>
#> 2 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
#> 3 00:05:31.842 - 00:05:35.302 <NA> <NA> B
#> 4 00:05:35.088 - 00:05:36.134 <NA> x yz and so on <NA>
#> 5 00:05:35.263 - 00:05:53.052 <NA> <NA> BBB
我不想根据列中的所有值,而是仅根据与模式匹配的值来扩大列的宽度。
一些玩具数据:
df <- data.frame(utterance = c("A and stuff",
"X and something",
"A and some more",
"B etc.",
"B",
"x yz and so on",
"BBB"),
timestamp = c("00:05:31.736 - 00:05:35.263", "00:05:31.829 - 00:05:36.449",
"00:05:31.829 - 00:05:36.449", "00:05:31.829 - 00:05:36.449",
"00:05:31.842 - 00:05:35.302", "00:05:35.088 - 00:05:36.134",
"00:05:35.263 - 00:05:53.052"))
我只想将 utterance
中以 A
或 B
开头的那些行旋转得更宽。我只能在 utterance
:
library(tidyr)
df %>%
group_by(timestamp) %>%
pivot_wider(-utterance,
names_from = utterance,
values_from = utterance)
# A tibble: 5 x 8
# Groups: timestamp [5]
timestamp `A and stuff` `X and something` `A and some more` `B etc.` B `x yz and so on` BBB
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 00:05:31.736 - 00:05:35.263 A and stuff NA NA NA NA NA NA
2 00:05:31.829 - 00:05:36.449 NA X and something A and some more B etc. NA NA NA
3 00:05:31.842 - 00:05:35.302 NA NA NA NA B NA NA
4 00:05:35.088 - 00:05:36.134 NA NA NA NA NA x yz and so on NA
5 00:05:35.263 - 00:05:53.052 NA NA NA NA NA NA BBB
我尝试对模式进行 utterance
子集化,但出现错误:
df %>%
group_by(timestamp) %>%
pivot_wider(names_from = utterance[grepl("^(A|B)", utterance)],
values_from = utterance[grepl("^(A|B)", utterance)])
Error: object 'utterance' not found
如何仅在匹配行上旋转?
预期:
# timestamp `A` utterance `B`
# <chr> <chr> <chr> <chr>
# 00:05:31.736 - 00:05:35.263 A and stuff NA NA
# 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
# 00:05:31.842 - 00:05:35.302 NA NA B
# 00:05:35.088 - 00:05:36.134 NA x yz and so on NA
# 00:05:35.263 - 00:05:53.052 NA NA BBB
您可以创建一个新的 names
列:
library(stringr)
library(dplyr)
library(tidyr)
df %>%
mutate(pvt = case_when(str_detect(utterance, "^A") ~ "A",
str_detect(utterance, "^B") ~ "B",
TRUE ~ "utterance")) %>%
pivot_wider(names_from = pvt,
values_from = utterance)
这个returns
# A tibble: 5 x 4
timestamp A utterance B
<chr> <chr> <chr> <chr>
1 00:05:31.736 - 00:05:35.263 A and stuff NA NA
2 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
3 00:05:31.842 - 00:05:35.302 NA NA B
4 00:05:35.088 - 00:05:36.134 NA x yz and so on NA
5 00:05:35.263 - 00:05:53.052 NA NA BBB
没有pivot_wider
的解决方案:
library(tidyverse)
df <- data.frame(utterance = c("A and stuff",
"X and something",
"A and some more",
"B etc.",
"B",
"x yz and so on",
"BBB"),
timestamp = c("00:05:31.736 - 00:05:35.263", "00:05:31.829 - 00:05:36.449",
"00:05:31.829 - 00:05:36.449", "00:05:31.829 - 00:05:36.449",
"00:05:31.842 - 00:05:35.302", "00:05:35.088 - 00:05:36.134",
"00:05:35.263 - 00:05:53.052"))
df %>%
mutate(A = ifelse(str_detect(utterance,"^A"),utterance,NA),
B = ifelse(str_detect(utterance,"^B"),utterance,NA),
utterance = ifelse(str_detect(utterance,"^A|^B"),NA, utterance)) %>%
relocate(utterance,.before="B") %>%
group_by(timestamp) %>%
fill(everything(),.direction = "downup") %>%
ungroup() %>%
distinct()
#> # A tibble: 5 × 4
#> timestamp A utterance B
#> <chr> <chr> <chr> <chr>
#> 1 00:05:31.736 - 00:05:35.263 A and stuff <NA> <NA>
#> 2 00:05:31.829 - 00:05:36.449 A and some more X and something B etc.
#> 3 00:05:31.842 - 00:05:35.302 <NA> <NA> B
#> 4 00:05:35.088 - 00:05:36.134 <NA> x yz and so on <NA>
#> 5 00:05:35.263 - 00:05:53.052 <NA> <NA> BBB