将 str_split 应用于数据框中的列
applying str_split to a column in dataframe
我有以下名为 i 的 df:
structure(list(price = c(11772, 14790, 2990, 1499, 21980, 27999
), fuel = c("diesel", "petrol", "petrol", "diesel", "diesel",
"petrol"), gearbox = c("manual", "manual", "manual", "manual",
"automatic", "manual"), colour = c("white", "purple", "yellow",
"silver", "red", "rising blue metalli"), engine_size = c(1685,
1199, 998, 1753, 2179, 1984), mileage = c(18839, 7649, 45058,
126000, 31891, 100), year = c("2013 hyundai ix35", "2016 citroen citroen ds3 cabrio",
"2007 peugeot 107 hatchback", "2007 ford ford focus hatchback", "2012 jaguar xf saloon",
"2016 volkswagen scirocco coupe"), doors = c(5, 2, 3, 5, 4, 3
)), .Names = c("price", "fuel", "gearbox", "colour", "engine_size",
"mileage", "year", "doors"), row.names = c(NA, 6L), class = "data.frame")
'year' 列中的某些单词重复。我想删除它们。作为第一步,我想用单独的单词分隔此列中的字符串。
我能够为单独的字符串执行此操作,但是当我尝试将其应用于整个数据框时,它会出现错误
unlist(str_split( "2013 hyunday ix35", "[[:blank:]]"))
[1]“2013”"hyunday""ix35"
for( k in 1:nrow(i))
+ i[k,7]<-unlist(str_split( i[k, 7], "[[:blank:]]"))
错误 [<-.data.frame
(*tmp*
, k, 7, value = c("2013", "hyundai", :
替换有 3 行,数据有 1
我们可以通过循环 list
输出( sapply(..
)
i$year <- sapply(strsplit(i$year, "\s+"), function(x) paste(unique(x), collapse=' '))
使用 dplyr
和 stringr
(在 purrr
的帮助下使用列表),您可以这样做:
library(dplyr)
df %>%
mutate(newyear = purrr::map_chr(
stringr::str_split(year, pattern = "[[:blank:]]"),
~ paste(unique(.x), collapse = " ")
))
#> price fuel gearbox colour engine_size mileage
#> 1 11772 diesel manual white 1685 18839
#> 2 14790 petrol manual purple 1199 7649
#> 3 2990 petrol manual yellow 998 45058
#> 4 1499 diesel manual silver 1753 126000
#> 5 21980 diesel automatic red 2179 31891
#> 6 27999 petrol manual rising blue metalli 1984 100
#> year doors newyear
#> 1 2013 hyundai ix35 5 2013 hyundai ix35
#> 2 2016 citroen citroen ds3 cabrio 2 2016 citroen ds3 cabrio
#> 3 2007 peugeot 107 hatchback 3 2007 peugeot 107 hatchback
#> 4 2007 ford ford focus hatchback 5 2007 ford focus hatchback
#> 5 2012 jaguar xf saloon 4 2012 jaguar xf saloon
#> 6 2016 volkswagen scirocco coupe 3 2016 volkswagen scirocco coupe
我有以下名为 i 的 df:
structure(list(price = c(11772, 14790, 2990, 1499, 21980, 27999
), fuel = c("diesel", "petrol", "petrol", "diesel", "diesel",
"petrol"), gearbox = c("manual", "manual", "manual", "manual",
"automatic", "manual"), colour = c("white", "purple", "yellow",
"silver", "red", "rising blue metalli"), engine_size = c(1685,
1199, 998, 1753, 2179, 1984), mileage = c(18839, 7649, 45058,
126000, 31891, 100), year = c("2013 hyundai ix35", "2016 citroen citroen ds3 cabrio",
"2007 peugeot 107 hatchback", "2007 ford ford focus hatchback", "2012 jaguar xf saloon",
"2016 volkswagen scirocco coupe"), doors = c(5, 2, 3, 5, 4, 3
)), .Names = c("price", "fuel", "gearbox", "colour", "engine_size",
"mileage", "year", "doors"), row.names = c(NA, 6L), class = "data.frame")
'year' 列中的某些单词重复。我想删除它们。作为第一步,我想用单独的单词分隔此列中的字符串。 我能够为单独的字符串执行此操作,但是当我尝试将其应用于整个数据框时,它会出现错误
unlist(str_split( "2013 hyunday ix35", "[[:blank:]]"))
[1]“2013”"hyunday""ix35"
for( k in 1:nrow(i))
+ i[k,7]<-unlist(str_split( i[k, 7], "[[:blank:]]"))
错误 [<-.data.frame
(*tmp*
, k, 7, value = c("2013", "hyundai", :
替换有 3 行,数据有 1
我们可以通过循环 list
输出( sapply(..
)
i$year <- sapply(strsplit(i$year, "\s+"), function(x) paste(unique(x), collapse=' '))
使用 dplyr
和 stringr
(在 purrr
的帮助下使用列表),您可以这样做:
library(dplyr)
df %>%
mutate(newyear = purrr::map_chr(
stringr::str_split(year, pattern = "[[:blank:]]"),
~ paste(unique(.x), collapse = " ")
))
#> price fuel gearbox colour engine_size mileage
#> 1 11772 diesel manual white 1685 18839
#> 2 14790 petrol manual purple 1199 7649
#> 3 2990 petrol manual yellow 998 45058
#> 4 1499 diesel manual silver 1753 126000
#> 5 21980 diesel automatic red 2179 31891
#> 6 27999 petrol manual rising blue metalli 1984 100
#> year doors newyear
#> 1 2013 hyundai ix35 5 2013 hyundai ix35
#> 2 2016 citroen citroen ds3 cabrio 2 2016 citroen ds3 cabrio
#> 3 2007 peugeot 107 hatchback 3 2007 peugeot 107 hatchback
#> 4 2007 ford ford focus hatchback 5 2007 ford focus hatchback
#> 5 2012 jaguar xf saloon 4 2012 jaguar xf saloon
#> 6 2016 volkswagen scirocco coupe 3 2016 volkswagen scirocco coupe