变异以从 R 中的字符串中删除所有括号(和内容)
Mutate to remove all parenthesis (and contents) from string in R
我正在尝试使用 mutate/str_replace 通过删除括号(包括内容)从“Class”生成“表型”,但需要一些有关正则表达式的帮助?
然后我还想对“表型”字符串中的文本重新排序,以便按 PanCK>PD-L1>CD8>FoxP3>PD-1>CD68 的顺序显示文本。
为非标准数据集道歉!
非常感谢!
test<- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)", "PanCK (Opal 690): CD68 (Opal 780)",
"FoxP3 (Opal 570): PanCK (Opal 690)"))
我遇到的问题
test.output<- test %>% mutate(Phenotype = str_replace(Class, "\([^()]{0,}\)", ""))
期望的输出:
test.output <- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)",
"PanCK (Opal 690): CD68 (Opal 780)", "FoxP3 (Opal 570): PanCK (Opal 690)"),
Phenotype = c("FoxP3:PanCK:PD-1:CD68", "CD8:PanCK:CD68:PD-L1",
"PanCK:CD68", "CD8:PanCK:CD68:PD-L1"))
然后重新排序为 PanCK>PD-L1>CD8>FoxP3>PD-1>CD68
ordered.output<- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)",
"PanCK (Opal 690): CD68 (Opal 780)", "FoxP3 (Opal 570): PanCK (Opal 690)"),
Phenotype = c("FoxP3:PanCK:PD-1:CD68", "CD8:PanCK:CD68:PD-L1",
"PanCK:CD68", "CD8:PanCK:CD68:PD-L1"),
Phenotype_Ordered = c("PanCK:FoxP3:PD-1:CD68", "PanCK:PD-L1:CD8:CD68",
"PanCK:CD68","PanCk:PD-L1:CD8:CD68"))
这个有用吗:
st <- c('PanCK','PD-L1','CD8','FoxP3','PD-1','CD68')
test %>%
mutate(Phenotype = str_remove_all(Class, '\s\(Opal [0-9]{3}\)')) %>%
mutate(Phenotype = str_remove_all(Phenotype, '(\s)')) %>%
mutate(Phenotype_Ordered = str_split(Phenotype, ':')) %>% unnest(Phenotype_Ordered) %>%
group_by(Class) %>% arrange(factor(Phenotype_Ordered, levels = st)) %>%
mutate(Phenotype_Ordered = paste(Phenotype_Ordered, collapse = ':')) %>% distinct()
# A tibble: 4 x 3
# Groups: Class [4]
Class Phenotype Phenotype_Ordered
<chr> <chr> <chr>
1 FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780) FoxP3:PanCK:PD-1:CD68 PanCK:FoxP3:PD-1:CD68
2 CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520) CD8:PanCK:CD68:PD-L1 PanCK:PD-L1:CD8:CD68
3 PanCK (Opal 690): CD68 (Opal 780) PanCK:CD68 PanCK:CD68
4 FoxP3 (Opal 570): PanCK (Opal 690) FoxP3:PanCK PanCK:FoxP3
另一个技巧是:
my_order <- c("CD68", "PD-1", "FoxP3", "CD8", "PD-L1", "PanCK")
test %>%
mutate(prototype = gsub('\s*[(][^)]+[)]','',Class),
ordered = map_chr(strsplit(prototype, '\s*:\s*'),
~str_c(sort(ordered(.x,my_order), decreasing = TRUE), collapse = ":")))
Class prototype ordered
1 FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780) FoxP3: PanCK: PD-1: CD68 PanCK:FoxP3:PD-1:CD68
2 CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520) CD8: PanCK: CD68: PD-L1 PanCK:PD-L1:CD8:CD68
3 PanCK (Opal 690): CD68 (Opal 780) PanCK: CD68 PanCK:CD68
4 FoxP3 (Opal 570): PanCK (Opal 690) FoxP3: PanCK PanCK:FoxP3
我正在尝试使用 mutate/str_replace 通过删除括号(包括内容)从“Class”生成“表型”,但需要一些有关正则表达式的帮助? 然后我还想对“表型”字符串中的文本重新排序,以便按 PanCK>PD-L1>CD8>FoxP3>PD-1>CD68 的顺序显示文本。 为非标准数据集道歉! 非常感谢!
test<- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)", "PanCK (Opal 690): CD68 (Opal 780)",
"FoxP3 (Opal 570): PanCK (Opal 690)"))
我遇到的问题
test.output<- test %>% mutate(Phenotype = str_replace(Class, "\([^()]{0,}\)", ""))
期望的输出:
test.output <- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)",
"PanCK (Opal 690): CD68 (Opal 780)", "FoxP3 (Opal 570): PanCK (Opal 690)"),
Phenotype = c("FoxP3:PanCK:PD-1:CD68", "CD8:PanCK:CD68:PD-L1",
"PanCK:CD68", "CD8:PanCK:CD68:PD-L1"))
然后重新排序为 PanCK>PD-L1>CD8>FoxP3>PD-1>CD68
ordered.output<- data.frame(Class = c("FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780)"
, "CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520)",
"PanCK (Opal 690): CD68 (Opal 780)", "FoxP3 (Opal 570): PanCK (Opal 690)"),
Phenotype = c("FoxP3:PanCK:PD-1:CD68", "CD8:PanCK:CD68:PD-L1",
"PanCK:CD68", "CD8:PanCK:CD68:PD-L1"),
Phenotype_Ordered = c("PanCK:FoxP3:PD-1:CD68", "PanCK:PD-L1:CD8:CD68",
"PanCK:CD68","PanCk:PD-L1:CD8:CD68"))
这个有用吗:
st <- c('PanCK','PD-L1','CD8','FoxP3','PD-1','CD68')
test %>%
mutate(Phenotype = str_remove_all(Class, '\s\(Opal [0-9]{3}\)')) %>%
mutate(Phenotype = str_remove_all(Phenotype, '(\s)')) %>%
mutate(Phenotype_Ordered = str_split(Phenotype, ':')) %>% unnest(Phenotype_Ordered) %>%
group_by(Class) %>% arrange(factor(Phenotype_Ordered, levels = st)) %>%
mutate(Phenotype_Ordered = paste(Phenotype_Ordered, collapse = ':')) %>% distinct()
# A tibble: 4 x 3
# Groups: Class [4]
Class Phenotype Phenotype_Ordered
<chr> <chr> <chr>
1 FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780) FoxP3:PanCK:PD-1:CD68 PanCK:FoxP3:PD-1:CD68
2 CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520) CD8:PanCK:CD68:PD-L1 PanCK:PD-L1:CD8:CD68
3 PanCK (Opal 690): CD68 (Opal 780) PanCK:CD68 PanCK:CD68
4 FoxP3 (Opal 570): PanCK (Opal 690) FoxP3:PanCK PanCK:FoxP3
另一个技巧是:
my_order <- c("CD68", "PD-1", "FoxP3", "CD8", "PD-L1", "PanCK")
test %>%
mutate(prototype = gsub('\s*[(][^)]+[)]','',Class),
ordered = map_chr(strsplit(prototype, '\s*:\s*'),
~str_c(sort(ordered(.x,my_order), decreasing = TRUE), collapse = ":")))
Class prototype ordered
1 FoxP3 (Opal 570): PanCK (Opal 690): PD-1 (Opal 620): CD68 (Opal 780) FoxP3: PanCK: PD-1: CD68 PanCK:FoxP3:PD-1:CD68
2 CD8 (Opal 480): PanCK (Opal 690): CD68 (Opal 780): PD-L1 (Opal 520) CD8: PanCK: CD68: PD-L1 PanCK:PD-L1:CD8:CD68
3 PanCK (Opal 690): CD68 (Opal 780) PanCK: CD68 PanCK:CD68
4 FoxP3 (Opal 570): PanCK (Opal 690) FoxP3: PanCK PanCK:FoxP3