删除不需要的逗号并更新列
Removing unwanted comma and updating a column
我有这样的数据集
Id Comments
1 ,,,,A,,,,,B,,,,,,C
2 ,,,,,,A,,,,,,,C,,,
3
4 ,,,,,,,,,,Z,,,,,,,,
我想要生成的是这样的输出
Id Feedback Comments
1 Yes A,B,C
2 Yes A,C
3 NA
4 Yes Z
这里需要帮助。
------------------------实际数据的子集-------------------- ------
t9 = structure(list(ID = c(242938L, 309790L, 339402L), Description = c("",
" , , , , , , , , , , , A, , , , , , , , , , , B, , , , , , C, , , , D, , , , , , , , , , E, , , , , , , , , , , F",
" , , A, , , D, , , , , , , , , S, , , , , , , D")), .Names = c("ID", "Comments"), row.names = c(NA, 3L), class = "data.frame")
您可以使用 gsub
删除不需要的“,”,根据 "Comments" 中的元素是否为空字符串创建一个新列 ("Feedback") (nzchar
).
df1$Comments <- gsub(',+', ',', gsub('^,+|,+$', '', df1$Comments))
df1$Feedback <- ifelse(nzchar(df1$Comments), 'Yes', NA)
df1
# Id Comments Feedback
#1 1 A,B,C Yes
#2 2 A,C Yes
#3 3 <NA>
#4 4 Z Yes
或者您可以更改 "Comments" 列
library(stringr)
df1$Comments <- sapply(str_extract_all(df1$Comments, '[A-Za-z]+'), toString)
更新
使用 t9
数据,删除 "spaces" 并使用上面的代码
t9$Comments <- gsub(' ', '', t9$Comments)
(t9$Comments <- gsub(',+', ',', gsub('^,+|,+$', '', t9$Comments)))
#[1] "" "A,B,C,D,E,F" "A,D,S,D"
数据
df1 <- structure(list(Id = 1:4, Comments = c(",,,,A,,,,,B,,,,,,C",
",,,,,,A,,,,,,,C,,,",
"", ",,,,,,,,,,Z,,,,,,,,")), .Names = c("Id", "Comments"),
class = "data.frame", row.names = c(NA, -4L))
我有这样的数据集
Id Comments
1 ,,,,A,,,,,B,,,,,,C
2 ,,,,,,A,,,,,,,C,,,
3
4 ,,,,,,,,,,Z,,,,,,,,
我想要生成的是这样的输出
Id Feedback Comments
1 Yes A,B,C
2 Yes A,C
3 NA
4 Yes Z
这里需要帮助。
------------------------实际数据的子集-------------------- ------
t9 = structure(list(ID = c(242938L, 309790L, 339402L), Description = c("",
" , , , , , , , , , , , A, , , , , , , , , , , B, , , , , , C, , , , D, , , , , , , , , , E, , , , , , , , , , , F",
" , , A, , , D, , , , , , , , , S, , , , , , , D")), .Names = c("ID", "Comments"), row.names = c(NA, 3L), class = "data.frame")
您可以使用 gsub
删除不需要的“,”,根据 "Comments" 中的元素是否为空字符串创建一个新列 ("Feedback") (nzchar
).
df1$Comments <- gsub(',+', ',', gsub('^,+|,+$', '', df1$Comments))
df1$Feedback <- ifelse(nzchar(df1$Comments), 'Yes', NA)
df1
# Id Comments Feedback
#1 1 A,B,C Yes
#2 2 A,C Yes
#3 3 <NA>
#4 4 Z Yes
或者您可以更改 "Comments" 列
library(stringr)
df1$Comments <- sapply(str_extract_all(df1$Comments, '[A-Za-z]+'), toString)
更新
使用 t9
数据,删除 "spaces" 并使用上面的代码
t9$Comments <- gsub(' ', '', t9$Comments)
(t9$Comments <- gsub(',+', ',', gsub('^,+|,+$', '', t9$Comments)))
#[1] "" "A,B,C,D,E,F" "A,D,S,D"
数据
df1 <- structure(list(Id = 1:4, Comments = c(",,,,A,,,,,B,,,,,,C",
",,,,,,A,,,,,,,C,,,",
"", ",,,,,,,,,,Z,,,,,,,,")), .Names = c("Id", "Comments"),
class = "data.frame", row.names = c(NA, -4L))