在字符串中添加前导零
Add leading zero within a character string
我的 data.frame 的一栏如下所示:
c("BP_1_CSPP", "BP_2_GEGS", "BP_3_AEAG", "BP_4_KPAP", "BP_5_TAKP",
"BP_6_GGDR", "BP_7_MQQP", "BP_8_EEEE", "BP_9_RSDP", "BP_10_APAS",
"BP_11_KRGG", "BP_12_RSQQ", "BP_13_QQLS", "BP_14_EPEV", "BP_15_AAPS",
"BP_16_SDVT", "BP_17_GQQQ", "BP_18_AETP", "BP_19_PPSA", "BP_20_DATP",
"EpQ_1_AYAT", "EpQ_2_HEKL", "EpQ_3_SCSV", "EpQ_4_MAYV", "EpQ_5_LKDP",
"EpQ_6_ERCE", "EpQ_7_DNPA", "EpQ_8_YGIS", "EpQ_9_GMSS", "EpQ_10_AAKK",
"EpQ_11_NIRI", "EpQ_12_ERRR", "EpQ_13_MDRE", "EpQ_14_SRQM", "EpQ_15_DWSI",
"EpQ_16_VLVQ", "EpQ_17_GRTI", "EpQ_18_EKVR", "EpQ_19_PDVA", "EpQ_20_ADVT",
"LbT_1_RPGG", "LbT_2_TQGD", "LbT_3_EVKS", "LbT_4_VIEM", "LbT_5_GSAD",
"LbT_6_VRPI", "LbT_7_CELG", "LbT_8_APQQ", "LbT_9_SAEE", "LbT_10_GEAE",
"LbT_11_EELR", "LbT_12_EWAN", "LbT_13_IKEE", "LbT_14_VSDF", "LbT_15_WEDV",
"LbT_16_SGGA", "LbT_17_KATN", "LbT_18_EREG", "LbT_19_AWAS", "LbT_20_VDRD",
"abc_1_CVTQ", "abc_2_KEAP", "abc_3_TAYI", "abc_4_MITN", "abc_5_MPTV",
"abc_6_TRTG", "abc_7_KSTI", "abc_8_KEAI", "abc_9_HVYS", "abc_10_LGMG",
"abc_11_VAYQ", "abc_12_AGTG", "abc_13_TDSW", "abc_14_HKKS", "abc_15_YGLA",
"abc_16_WEEW", "abc_17_HSTI", "abc_18_EKCI", "abc_19_PAGI", "abc_20_TGTI",
"TcII")
考虑到字符串中所有小于 10 的数字(例如 "BP_1_CSPP"、"BP_2_GEGS" ,我想为它们添加一个前导零,这样我就可以:
"BP_01_CSPP", "BP_02_GEGS", "BP_03_AEAG", "BP_04_KPAP", "BP_05_TAKP",
"BP_06_GGDR"
等等。
This question 几乎完成了工作,但它不适用于我的数据:
不会一直在同一个位置插入“0”(有些字符串在0之前有3个字符要插入(例如BP_ 1_CSPP) 而其他人有 4 个(例如 EpQ_3_SCSV)
零之后还有一些字符要插入,即零将插入字符串的中间。
我们可以使用 sub
来匹配 _
后跟单个数字 (([0-9])
) 的模式(括号内),后跟 _
并将其替换为 _
后跟 0,捕获组的反向引用 (\1
) 后跟 _
.
v1 <- sub("_([0-9])_", "_0\1_", v1)
v1
#[1] "BP_01_CSPP" "BP_02_GEGS" "BP_03_AEAG" "BP_04_KPAP" "BP_05_TAKP" "BP_06_GGDR" "BP_07_MQQP" "BP_08_EEEE" "BP_09_RSDP" "BP_10_APAS" "BP_11_KRGG"
#[12] "BP_12_RSQQ" "BP_13_QQLS" "BP_14_EPEV" "BP_15_AAPS" "BP_16_SDVT" "BP_17_GQQQ" "BP_18_AETP" "BP_19_PPSA" "BP_20_DATP" "EpQ_01_AYAT" "EpQ_02_HEKL"
#[23] "EpQ_03_SCSV" "EpQ_04_MAYV" "EpQ_05_LKDP" "EpQ_06_ERCE" "EpQ_07_DNPA" "EpQ_08_YGIS" "EpQ_09_GMSS" "EpQ_10_AAKK" "EpQ_11_NIRI" "EpQ_12_ERRR" "EpQ_13_MDRE"
#[34] "EpQ_14_SRQM" "EpQ_15_DWSI" "EpQ_16_VLVQ" "EpQ_17_GRTI" "EpQ_18_EKVR" "EpQ_19_PDVA" "EpQ_20_ADVT" "LbT_01_RPGG" "LbT_02_TQGD" "LbT_03_EVKS" "LbT_04_VIEM"
#[45] "LbT_05_GSAD" "LbT_06_VRPI" "LbT_07_CELG" "LbT_08_APQQ" "LbT_09_SAEE" "LbT_10_GEAE" "LbT_11_EELR" "LbT_12_EWAN" "LbT_13_IKEE" "LbT_14_VSDF" "LbT_15_WEDV"
#[56] "LbT_16_SGGA" "LbT_17_KATN" "LbT_18_EREG" "LbT_19_AWAS" "LbT_20_VDRD" "abc_01_CVTQ" "abc_02_KEAP" "abc_03_TAYI" "abc_04_MITN" "abc_05_MPTV" "abc_06_TRTG"
#[67] "abc_07_KSTI" "abc_08_KEAI" "abc_09_HVYS" "abc_10_LGMG" "abc_11_VAYQ" "abc_12_AGTG" "abc_13_TDSW" "abc_14_HKKS" "abc_15_YGLA" "abc_16_WEEW" "abc_17_HSTI"
#[78] "abc_18_EKCI" "abc_19_PAGI" "abc_20_TGTI" "TcII"
如果我们使用 strsplit
,另一种选择是 split
通过 _
,用 sprintf
格式替换数字,然后 paste
在一起
sapply(strsplit(v1, "_"), function(x) {
if(length(x)>1) x[2] <- sprintf("%02d", as.numeric(x[2]))
paste(x, collapse="_")})
我的 data.frame 的一栏如下所示:
c("BP_1_CSPP", "BP_2_GEGS", "BP_3_AEAG", "BP_4_KPAP", "BP_5_TAKP",
"BP_6_GGDR", "BP_7_MQQP", "BP_8_EEEE", "BP_9_RSDP", "BP_10_APAS",
"BP_11_KRGG", "BP_12_RSQQ", "BP_13_QQLS", "BP_14_EPEV", "BP_15_AAPS",
"BP_16_SDVT", "BP_17_GQQQ", "BP_18_AETP", "BP_19_PPSA", "BP_20_DATP",
"EpQ_1_AYAT", "EpQ_2_HEKL", "EpQ_3_SCSV", "EpQ_4_MAYV", "EpQ_5_LKDP",
"EpQ_6_ERCE", "EpQ_7_DNPA", "EpQ_8_YGIS", "EpQ_9_GMSS", "EpQ_10_AAKK",
"EpQ_11_NIRI", "EpQ_12_ERRR", "EpQ_13_MDRE", "EpQ_14_SRQM", "EpQ_15_DWSI",
"EpQ_16_VLVQ", "EpQ_17_GRTI", "EpQ_18_EKVR", "EpQ_19_PDVA", "EpQ_20_ADVT",
"LbT_1_RPGG", "LbT_2_TQGD", "LbT_3_EVKS", "LbT_4_VIEM", "LbT_5_GSAD",
"LbT_6_VRPI", "LbT_7_CELG", "LbT_8_APQQ", "LbT_9_SAEE", "LbT_10_GEAE",
"LbT_11_EELR", "LbT_12_EWAN", "LbT_13_IKEE", "LbT_14_VSDF", "LbT_15_WEDV",
"LbT_16_SGGA", "LbT_17_KATN", "LbT_18_EREG", "LbT_19_AWAS", "LbT_20_VDRD",
"abc_1_CVTQ", "abc_2_KEAP", "abc_3_TAYI", "abc_4_MITN", "abc_5_MPTV",
"abc_6_TRTG", "abc_7_KSTI", "abc_8_KEAI", "abc_9_HVYS", "abc_10_LGMG",
"abc_11_VAYQ", "abc_12_AGTG", "abc_13_TDSW", "abc_14_HKKS", "abc_15_YGLA",
"abc_16_WEEW", "abc_17_HSTI", "abc_18_EKCI", "abc_19_PAGI", "abc_20_TGTI",
"TcII")
考虑到字符串中所有小于 10 的数字(例如 "BP_1_CSPP"、"BP_2_GEGS" ,我想为它们添加一个前导零,这样我就可以:
"BP_01_CSPP", "BP_02_GEGS", "BP_03_AEAG", "BP_04_KPAP", "BP_05_TAKP",
"BP_06_GGDR"
等等。
This question 几乎完成了工作,但它不适用于我的数据:
不会一直在同一个位置插入“0”(有些字符串在0之前有3个字符要插入(例如BP_ 1_CSPP) 而其他人有 4 个(例如 EpQ_3_SCSV)
零之后还有一些字符要插入,即零将插入字符串的中间。
我们可以使用 sub
来匹配 _
后跟单个数字 (([0-9])
) 的模式(括号内),后跟 _
并将其替换为 _
后跟 0,捕获组的反向引用 (\1
) 后跟 _
.
v1 <- sub("_([0-9])_", "_0\1_", v1)
v1
#[1] "BP_01_CSPP" "BP_02_GEGS" "BP_03_AEAG" "BP_04_KPAP" "BP_05_TAKP" "BP_06_GGDR" "BP_07_MQQP" "BP_08_EEEE" "BP_09_RSDP" "BP_10_APAS" "BP_11_KRGG"
#[12] "BP_12_RSQQ" "BP_13_QQLS" "BP_14_EPEV" "BP_15_AAPS" "BP_16_SDVT" "BP_17_GQQQ" "BP_18_AETP" "BP_19_PPSA" "BP_20_DATP" "EpQ_01_AYAT" "EpQ_02_HEKL"
#[23] "EpQ_03_SCSV" "EpQ_04_MAYV" "EpQ_05_LKDP" "EpQ_06_ERCE" "EpQ_07_DNPA" "EpQ_08_YGIS" "EpQ_09_GMSS" "EpQ_10_AAKK" "EpQ_11_NIRI" "EpQ_12_ERRR" "EpQ_13_MDRE"
#[34] "EpQ_14_SRQM" "EpQ_15_DWSI" "EpQ_16_VLVQ" "EpQ_17_GRTI" "EpQ_18_EKVR" "EpQ_19_PDVA" "EpQ_20_ADVT" "LbT_01_RPGG" "LbT_02_TQGD" "LbT_03_EVKS" "LbT_04_VIEM"
#[45] "LbT_05_GSAD" "LbT_06_VRPI" "LbT_07_CELG" "LbT_08_APQQ" "LbT_09_SAEE" "LbT_10_GEAE" "LbT_11_EELR" "LbT_12_EWAN" "LbT_13_IKEE" "LbT_14_VSDF" "LbT_15_WEDV"
#[56] "LbT_16_SGGA" "LbT_17_KATN" "LbT_18_EREG" "LbT_19_AWAS" "LbT_20_VDRD" "abc_01_CVTQ" "abc_02_KEAP" "abc_03_TAYI" "abc_04_MITN" "abc_05_MPTV" "abc_06_TRTG"
#[67] "abc_07_KSTI" "abc_08_KEAI" "abc_09_HVYS" "abc_10_LGMG" "abc_11_VAYQ" "abc_12_AGTG" "abc_13_TDSW" "abc_14_HKKS" "abc_15_YGLA" "abc_16_WEEW" "abc_17_HSTI"
#[78] "abc_18_EKCI" "abc_19_PAGI" "abc_20_TGTI" "TcII"
如果我们使用 strsplit
,另一种选择是 split
通过 _
,用 sprintf
格式替换数字,然后 paste
在一起
sapply(strsplit(v1, "_"), function(x) {
if(length(x)>1) x[2] <- sprintf("%02d", as.numeric(x[2]))
paste(x, collapse="_")})