如何粘贴有条件的两列字符串

Question

我有一个如下所示的数据框：

structure(list(date = c("01dec2013", "01jul2003", "01nov2008", 
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung", 
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung", 
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com", 
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com", 
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45, 
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L, 
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA), 
    est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar", 
    "China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)", 
    "Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid", 
    "Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified", 
    "Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
    ), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid", 
    "Paris", "Not Specified"), country = c("Algeria", "Algeria", 
    "Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar", 
    "Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified", 
    "Not Specified", "Not Specified", "Not Specified", "Not Specified", 
    "Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane", 
    "Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "", 
    "", "", "", ""), relocation = c("", "", "", "", "", ""), 
    sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas", 
    "Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products", 
    "Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction", 
    "Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction", 
    "Energy", "Energy", "Energy", "Life sciences", "Life sciences"
    ), activity = c("Manufacturing", "Extraction", "Extraction", 
    "Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L, 
    145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes", 
    "Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7, 
    29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No", 
    "No"), fdi_type = c("New", "New", "New", "Expansion", "New", 
    "New"), fdi_status = c("Announced", "Announced", "Announced", 
    "Opened", "Announced", "Opened"), year = c(2013L, 2003L, 
    2008L, 2017L, 2017L, 2003L), code_d = c("012", "012", "012", 
    "012", "012", "012"), income_d = c("MIDLW", "MIDLW", "MIDLW", 
    "MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", "Africa", 
    "Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic", 
    "Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L, 
    0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA", 
    "DZA", "DZA"), isic = c("26", "11", "11", "11", "24", "85"
    ), isic4 = c(2695, 1110, 1110, 1110, 2411, 8519), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying", 
    "Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying")), datalabel = "", time.stamp = "24 May 2021 12:23", formats = c("%111s", 
"%125s", "%125s", "%105s", "%10.0g", "%10.0g", "%10.0g", "%10.0g", 
"%28s", "%52s", "%54s", "%31s", "%44s", "%51s", "%49s", "%53s", 
"%70s", "%28s", "%29s", "%92s", "%32s", "%40s", "%10.0g", "%9s", 
"%10.0g", "%9s", "%12s", "%14s", "%19s", "%10.0g", "%10.0g", 
"%10.0g", "%3s", "%9s", "%7s", "%14s", "%8.0g", "%3s"), types = c(111L, 
125L, 125L, 105L, 65526L, 65528L, 65526L, 65529L, 28L, 52L, 54L, 
31L, 44L, 51L, 49L, 53L, 70L, 28L, 29L, 92L, 32L, 40L, 65528L, 
9L, 65526L, 9L, 12L, 14L, 19L, 65526L, 65526L, 65529L, 3L, 5L, 
7L, 14L, 65530L, 3L), val.labels = structure(c("", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "")), var.labels = c("", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "ISO country numeric code", 
"Classif World Bank by income level (See note)", "Continent", 
"Official language", "1 if landlocked", "ISO3 alpha code"), version = 118L, label.table = list(), expansion.fields = list(
    c("revenues_usd_ml", "destring", "Characters removed were:"
    ), c("revenues_usd_ml", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("Headcount", "destring", "Characters removed were:"), 
    c("Headcount", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("r_d_exp", "destring", "Characters removed were:"), 
    c("r_d_exp", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("est_year", "destring", "Characters removed were:"), 
    c("est_year", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("fdi_jobs", "destring", "Characters removed were:"), 
    c("fdi_jobs", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("capital", "destring", "Characters removed were:"), 
    c("capital", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("g_lon", "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
    ), c("g_lon", "destring", "Characters removed were:"), c("g_lat", 
    "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
    ), c("g_lat", "destring", "Characters removed were:"), c("year", 
    "destring", "Characters removed were:"), c("year", "destring_cmd", 
    "destring year, replace"), c("_dta", "ReS_i", "iso3"), c("_dta", 
    "ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta", 
    "ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("income_d", 
    "note1", "World Bank Classification:"), c("income_d", "note2", 
    "LOW: Low Income"), c("income_d", "note3", "MIDLW: Lower middle income"
    ), c("income_d", "note4", "MIDUP: Upper middle income"), 
    c("income_d", "note5", "HOECD: High income (OECD)"), c("income_d", 
    "note6", "HOTHR: High income (non OECD)"), c("income_d", 
    "note0", "6")), byteorder = "LSF", orig.dim = c(11478L, 38L
), data.label = character(0), row.names = c(NA, 6L), class = "data.frame")

我的任务是将字符串“country”粘贴到“state”列中，但仅适用于不同于 "Non Specified" 的“state”值。

多亏了这条线，我离得更近了：

FDI$state[!(FDI$state=="Not Specified")] <- do.call(paste, c(FDI[c], sep = ", "))

我得到了我想要的东西，但对于某些“未指定”值，它返回“未指定，nameofthecountry”，即使 !(FDI$state=="Not Specified") 在该单元格中为 FALSE。

它还返回了这个警告：

Warning message: In FDI$state[!(FDI$state == "Not Specified")] <- do.call(paste, : number of items to replace is not a multiple of replacement length

我该怎么办？

Answer 1

如果我没理解错的话，你可以使用包 dplyr 调用 mutate() 和 if_else().

library(dplyr)
FDI %>%
  mutate(state = if_else(state != "Non Specified", country, state))

这将检查 state 的值是否与 Non Specified 不同，如果不同，则将 state 的值替换为 [=17= 的值].

Answer 2

我们可以使用case_when

library(dplyr)
FDI %>%
    mutate(state = case_when(state != "Non Specified" ~ country, TRUE ~ state))

如何粘贴有条件的两列字符串

How to paste two column strings with condition

string

r

paste

conditional-statements