使用 `&` 和 ifelse 进行模式匹配和变异
Pattern matching and mutate with `&` and ifelse
我想将 mutate()
与 ifelse()
和 &
结合使用。然而,R 并没有意识到这个变化,但我没有得到任何错误。
所以,肯定有错别字。这是我使用的代码:
library(dplyr)
dat %>%
mutate(City=ifelse(grepl("\(030)|30|^\+4930|(30)|^\+49 30|^0049030|^\+49030|0049030|^4930|^4930|^030",
`Business Phone`) & Country == "Germany", "Berlin", City))
如果 `Business Phone`
具有 grepl()
中的模式并且 Country
是 "Germany"
,则目标是估算 "Berlin"
。
这里是小dput
:
structure(list(Country = c("Germany", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany"
), City = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), `Business Phone` = c("+49 3020618791360", "+49 (30) 24729320",
"+49 (30) 29034056", "+49 (30) 31422940", "+49 (30) 78893131",
"+49 30 2060708870", "+49 (30) 84452575", "+49 (30) 38629224",
"+49 (30) 93923158", "+49 (30) 36288666")), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
您可以使用正则表达式和条件赋值。
dat$City[grepl("^\+?(0?0)?(49)0?\s?\(?(30).+|^\(?(0?(30)).+",
dat$`Business Phone`) & dat$Country == "Germany"] <- "Berlin"
> dat
# A tibble: 10 x 3
Country City `Business Phone`
<chr> <chr> <chr>
1 Germany Berlin +49 3020618791360
2 Germany Berlin +49 (30) 24729320
3 Germany Berlin +49 (30) 29034056
4 Germany Berlin +49 (30) 31422940
5 Germany Berlin +49 (30) 78893131
6 Germany Berlin +49 30 2060708870
7 Germany Berlin +49 (30) 84452575
8 Germany Berlin +49 (30) 38629224
9 Germany Berlin +49 (30) 93923158
10 Germany Berlin +49 (30) 36288666
Test 使用此数据:
nums <- c("+49 (30) 78893131", "+49 30 2060708870", "+42 (30) 36288666 ",
"+19 (30) 36288666 ", "+49 (20) 36288666", "30456745674", "+493045674567",
"(30)45674567", "+49 3045674567", "004903045674567", "+4903045674567",
"004903045674567", "493045674567", "493045674567", "03045674567",
"+49 (20) 36288666", "004920000", "204054756", "3145675678",
"49403235678", "49030345435", "20456745674", "+193045674567",
"(20)45674567", "+41 3045674567", "004103045674567", "+4103045674567",
"004104945674567", "413045674567", "413045674567", "01045674567"
)
> nums[grepl("^\+?(0?0)?(49)0?\s?\(?(30).+|^\(?(0?(30)).+", nums)]
[1] "+49 (30) 78893131" "+49 30 2060708870" "30456745674" "+493045674567"
[5] "(30)45674567" "+49 3045674567" "004903045674567" "+4903045674567"
[9] "004903045674567" "493045674567" "493045674567" "03045674567"
[13] "49030345435"
我想将 mutate()
与 ifelse()
和 &
结合使用。然而,R 并没有意识到这个变化,但我没有得到任何错误。
所以,肯定有错别字。这是我使用的代码:
library(dplyr)
dat %>%
mutate(City=ifelse(grepl("\(030)|30|^\+4930|(30)|^\+49 30|^0049030|^\+49030|0049030|^4930|^4930|^030",
`Business Phone`) & Country == "Germany", "Berlin", City))
如果 `Business Phone`
具有 grepl()
中的模式并且 Country
是 "Germany"
,则目标是估算 "Berlin"
。
这里是小dput
:
structure(list(Country = c("Germany", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany"
), City = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), `Business Phone` = c("+49 3020618791360", "+49 (30) 24729320",
"+49 (30) 29034056", "+49 (30) 31422940", "+49 (30) 78893131",
"+49 30 2060708870", "+49 (30) 84452575", "+49 (30) 38629224",
"+49 (30) 93923158", "+49 (30) 36288666")), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
您可以使用正则表达式和条件赋值。
dat$City[grepl("^\+?(0?0)?(49)0?\s?\(?(30).+|^\(?(0?(30)).+",
dat$`Business Phone`) & dat$Country == "Germany"] <- "Berlin"
> dat
# A tibble: 10 x 3
Country City `Business Phone`
<chr> <chr> <chr>
1 Germany Berlin +49 3020618791360
2 Germany Berlin +49 (30) 24729320
3 Germany Berlin +49 (30) 29034056
4 Germany Berlin +49 (30) 31422940
5 Germany Berlin +49 (30) 78893131
6 Germany Berlin +49 30 2060708870
7 Germany Berlin +49 (30) 84452575
8 Germany Berlin +49 (30) 38629224
9 Germany Berlin +49 (30) 93923158
10 Germany Berlin +49 (30) 36288666
Test 使用此数据:
nums <- c("+49 (30) 78893131", "+49 30 2060708870", "+42 (30) 36288666 ",
"+19 (30) 36288666 ", "+49 (20) 36288666", "30456745674", "+493045674567",
"(30)45674567", "+49 3045674567", "004903045674567", "+4903045674567",
"004903045674567", "493045674567", "493045674567", "03045674567",
"+49 (20) 36288666", "004920000", "204054756", "3145675678",
"49403235678", "49030345435", "20456745674", "+193045674567",
"(20)45674567", "+41 3045674567", "004103045674567", "+4103045674567",
"004104945674567", "413045674567", "413045674567", "01045674567"
)
> nums[grepl("^\+?(0?0)?(49)0?\s?\(?(30).+|^\(?(0?(30)).+", nums)]
[1] "+49 (30) 78893131" "+49 30 2060708870" "30456745674" "+493045674567"
[5] "(30)45674567" "+49 3045674567" "004903045674567" "+4903045674567"
[9] "004903045674567" "493045674567" "493045674567" "03045674567"
[13] "49030345435"