有条件地连接多行字符串
Conditionally concatenate string over multiple rows
我从包含多行字符串的 PDF 中提取了多个表格。我使用了 tabulizer 包中的 extract_table() 函数,唯一的问题是字符串作为单独的行导入。
例如
action <- c(1, NA, NA, 2, NA, 3, NA, NA, NA, 4, NA)
description <- c("a", "b", "c", "a", "b", "a", "b", "c", "d", "a", "b")
data.frame(action, description)
action description
1 1 a
2 NA b
3 NA c
4 2 a
5 NA b
6 3 a
7 NA b
8 NA c
9 NA d
10 4 a
11 NA b
我想连接字符串,使它们显示为相同的元素,例如:
action description
1 1 a b c
2 2 a b
3 3 a b c d
4 4 a b
希望有道理,感谢任何帮助!
您可以像这样使用 zoo
和 dplyr
包
library(zoo)
library(dplyr)
action <- c(1, NA, NA, 2, NA, 3, NA, NA, NA, 4, NA)
description <- c("a", "b", "c", "a", "b", "a", "b", "c", "d", "a", "b")
df = data.frame(action, description)
df$action = na.locf(df$action)
df =
df %>%
group_by(action) %>%
summarise(description = paste(description, collapse = ' '))
tidyverse
方法是 fill
action
具有先前非 NA 值的列然后 group_by
Action
和 paste
description
在一起。
library(tidyverse)
df %>%
fill(action) %>%
group_by(action) %>%
summarise(description = paste(description, collapse = " "))
# action description
# <dbl> <chr>
#1 1. a b c
#2 2. a b
#3 3. a b c d
#4 4. a b
一个base R
选项
dat <- data.frame(action, description)
aggregate(
description ~ action,
transform(dat, action = cumsum(!is.na(dat$action))),
FUN = paste,
... = collapse = " "
)
# action description
#1 1 a b c
#2 2 a b
#3 3 a b c d
#4 4 a b
要使 aggregate
正常工作,我们需要将 action
更改为 cumsum(!is.na(dat$action)))
返回的内容,即
cumsum(!is.na(dat$action)))
#[1] 1 1 1 2 2 3 3 3 3 4 4
这里有一个选项 data.table
library(data.table)
setDT(df1)[, .(description = paste(description, collapse = ' ')),
.(action = cumsum(!is.na(action)))]
# action description
#1: 1 a b c
#2: 2 a b
#3: 3 a b c d
#4: 4 a b
或使用 zoo
中的 na.locf
library(zoo)
setDT(df1)[, .(description = paste(description, collapse = ' ')),
.(action = na.locf(action))]
数据
df1 <- data.frame(action, description)
我从包含多行字符串的 PDF 中提取了多个表格。我使用了 tabulizer 包中的 extract_table() 函数,唯一的问题是字符串作为单独的行导入。
例如
action <- c(1, NA, NA, 2, NA, 3, NA, NA, NA, 4, NA)
description <- c("a", "b", "c", "a", "b", "a", "b", "c", "d", "a", "b")
data.frame(action, description)
action description
1 1 a
2 NA b
3 NA c
4 2 a
5 NA b
6 3 a
7 NA b
8 NA c
9 NA d
10 4 a
11 NA b
我想连接字符串,使它们显示为相同的元素,例如:
action description
1 1 a b c
2 2 a b
3 3 a b c d
4 4 a b
希望有道理,感谢任何帮助!
您可以像这样使用 zoo
和 dplyr
包
library(zoo)
library(dplyr)
action <- c(1, NA, NA, 2, NA, 3, NA, NA, NA, 4, NA)
description <- c("a", "b", "c", "a", "b", "a", "b", "c", "d", "a", "b")
df = data.frame(action, description)
df$action = na.locf(df$action)
df =
df %>%
group_by(action) %>%
summarise(description = paste(description, collapse = ' '))
tidyverse
方法是 fill
action
具有先前非 NA 值的列然后 group_by
Action
和 paste
description
在一起。
library(tidyverse)
df %>%
fill(action) %>%
group_by(action) %>%
summarise(description = paste(description, collapse = " "))
# action description
# <dbl> <chr>
#1 1. a b c
#2 2. a b
#3 3. a b c d
#4 4. a b
一个base R
选项
dat <- data.frame(action, description)
aggregate(
description ~ action,
transform(dat, action = cumsum(!is.na(dat$action))),
FUN = paste,
... = collapse = " "
)
# action description
#1 1 a b c
#2 2 a b
#3 3 a b c d
#4 4 a b
要使 aggregate
正常工作,我们需要将 action
更改为 cumsum(!is.na(dat$action)))
返回的内容,即
cumsum(!is.na(dat$action)))
#[1] 1 1 1 2 2 3 3 3 3 4 4
这里有一个选项 data.table
library(data.table)
setDT(df1)[, .(description = paste(description, collapse = ' ')),
.(action = cumsum(!is.na(action)))]
# action description
#1: 1 a b c
#2: 2 a b
#3: 3 a b c d
#4: 4 a b
或使用 zoo
na.locf
library(zoo)
setDT(df1)[, .(description = paste(description, collapse = ' ')),
.(action = na.locf(action))]
数据
df1 <- data.frame(action, description)