如何粘贴有条件的两列字符串
How to paste two column strings with condition
我有一个如下所示的数据框:
structure(list(date = c("01dec2013", "01jul2003", "01nov2008",
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung",
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung",
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com",
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com",
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45,
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L,
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA),
est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar",
"China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)",
"Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid",
"Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified",
"Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid",
"Paris", "Not Specified"), country = c("Algeria", "Algeria",
"Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar",
"Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified",
"Not Specified", "Not Specified", "Not Specified", "Not Specified",
"Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane",
"Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "",
"", "", "", ""), relocation = c("", "", "", "", "", ""),
sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas",
"Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products",
"Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction",
"Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction",
"Energy", "Energy", "Energy", "Life sciences", "Life sciences"
), activity = c("Manufacturing", "Extraction", "Extraction",
"Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L,
145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes",
"Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7,
29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No",
"No"), fdi_type = c("New", "New", "New", "Expansion", "New",
"New"), fdi_status = c("Announced", "Announced", "Announced",
"Opened", "Announced", "Opened"), year = c(2013L, 2003L,
2008L, 2017L, 2017L, 2003L), code_d = c("012", "012", "012",
"012", "012", "012"), income_d = c("MIDLW", "MIDLW", "MIDLW",
"MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", "Africa",
"Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic",
"Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L,
0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA",
"DZA", "DZA"), isic = c("26", "11", "11", "11", "24", "85"
), isic4 = c(2695, 1110, 1110, 1110, 2411, 8519), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying",
"Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying")), datalabel = "", time.stamp = "24 May 2021 12:23", formats = c("%111s",
"%125s", "%125s", "%105s", "%10.0g", "%10.0g", "%10.0g", "%10.0g",
"%28s", "%52s", "%54s", "%31s", "%44s", "%51s", "%49s", "%53s",
"%70s", "%28s", "%29s", "%92s", "%32s", "%40s", "%10.0g", "%9s",
"%10.0g", "%9s", "%12s", "%14s", "%19s", "%10.0g", "%10.0g",
"%10.0g", "%3s", "%9s", "%7s", "%14s", "%8.0g", "%3s"), types = c(111L,
125L, 125L, 105L, 65526L, 65528L, 65526L, 65529L, 28L, 52L, 54L,
31L, 44L, 51L, 49L, 53L, 70L, 28L, 29L, 92L, 32L, 40L, 65528L,
9L, 65526L, 9L, 12L, 14L, 19L, 65526L, 65526L, 65529L, 3L, 5L,
7L, 14L, 65530L, 3L), val.labels = structure(c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "")), var.labels = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "ISO country numeric code",
"Classif World Bank by income level (See note)", "Continent",
"Official language", "1 if landlocked", "ISO3 alpha code"), version = 118L, label.table = list(), expansion.fields = list(
c("revenues_usd_ml", "destring", "Characters removed were:"
), c("revenues_usd_ml", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("Headcount", "destring", "Characters removed were:"),
c("Headcount", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("r_d_exp", "destring", "Characters removed were:"),
c("r_d_exp", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("est_year", "destring", "Characters removed were:"),
c("est_year", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("fdi_jobs", "destring", "Characters removed were:"),
c("fdi_jobs", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("capital", "destring", "Characters removed were:"),
c("capital", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("g_lon", "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
), c("g_lon", "destring", "Characters removed were:"), c("g_lat",
"destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
), c("g_lat", "destring", "Characters removed were:"), c("year",
"destring", "Characters removed were:"), c("year", "destring_cmd",
"destring year, replace"), c("_dta", "ReS_i", "iso3"), c("_dta",
"ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta",
"ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("income_d",
"note1", "World Bank Classification:"), c("income_d", "note2",
"LOW: Low Income"), c("income_d", "note3", "MIDLW: Lower middle income"
), c("income_d", "note4", "MIDUP: Upper middle income"),
c("income_d", "note5", "HOECD: High income (OECD)"), c("income_d",
"note6", "HOTHR: High income (non OECD)"), c("income_d",
"note0", "6")), byteorder = "LSF", orig.dim = c(11478L, 38L
), data.label = character(0), row.names = c(NA, 6L), class = "data.frame")
我的任务是将字符串“country”粘贴到“state”列中,但仅适用于不同于 "Non Specified"
的“state”值。
多亏了这条线,我离得更近了:
FDI$state[!(FDI$state=="Not Specified")] <- do.call(paste, c(FDI[c], sep = ", "))
我得到了我想要的东西,但对于某些“未指定”值,它返回“未指定,nameofthecountry”,即使 !(FDI$state=="Not Specified")
在该单元格中为 FALSE。
它还返回了这个警告:
Warning message: In FDI$state[!(FDI$state == "Not Specified")] <- do.call(paste, : number of items to replace is not a multiple of replacement length
我该怎么办?
如果我没理解错的话,你可以使用包 dplyr
调用 mutate()
和 if_else()
.
library(dplyr)
FDI %>%
mutate(state = if_else(state != "Non Specified", country, state))
这将检查 state
的值是否与 Non Specified
不同,如果不同,则将 state
的值替换为 [=17= 的值].
我们可以使用case_when
library(dplyr)
FDI %>%
mutate(state = case_when(state != "Non Specified" ~ country, TRUE ~ state))
我有一个如下所示的数据框:
structure(list(date = c("01dec2013", "01jul2003", "01nov2008",
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung",
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung",
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com",
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com",
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45,
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L,
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA),
est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar",
"China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)",
"Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid",
"Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified",
"Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid",
"Paris", "Not Specified"), country = c("Algeria", "Algeria",
"Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar",
"Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified",
"Not Specified", "Not Specified", "Not Specified", "Not Specified",
"Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane",
"Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "",
"", "", "", ""), relocation = c("", "", "", "", "", ""),
sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas",
"Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products",
"Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction",
"Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction",
"Energy", "Energy", "Energy", "Life sciences", "Life sciences"
), activity = c("Manufacturing", "Extraction", "Extraction",
"Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L,
145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes",
"Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7,
29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No",
"No"), fdi_type = c("New", "New", "New", "Expansion", "New",
"New"), fdi_status = c("Announced", "Announced", "Announced",
"Opened", "Announced", "Opened"), year = c(2013L, 2003L,
2008L, 2017L, 2017L, 2003L), code_d = c("012", "012", "012",
"012", "012", "012"), income_d = c("MIDLW", "MIDLW", "MIDLW",
"MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", "Africa",
"Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic",
"Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L,
0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA",
"DZA", "DZA"), isic = c("26", "11", "11", "11", "24", "85"
), isic4 = c(2695, 1110, 1110, 1110, 2411, 8519), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying",
"Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying")), datalabel = "", time.stamp = "24 May 2021 12:23", formats = c("%111s",
"%125s", "%125s", "%105s", "%10.0g", "%10.0g", "%10.0g", "%10.0g",
"%28s", "%52s", "%54s", "%31s", "%44s", "%51s", "%49s", "%53s",
"%70s", "%28s", "%29s", "%92s", "%32s", "%40s", "%10.0g", "%9s",
"%10.0g", "%9s", "%12s", "%14s", "%19s", "%10.0g", "%10.0g",
"%10.0g", "%3s", "%9s", "%7s", "%14s", "%8.0g", "%3s"), types = c(111L,
125L, 125L, 105L, 65526L, 65528L, 65526L, 65529L, 28L, 52L, 54L,
31L, 44L, 51L, 49L, 53L, 70L, 28L, 29L, 92L, 32L, 40L, 65528L,
9L, 65526L, 9L, 12L, 14L, 19L, 65526L, 65526L, 65529L, 3L, 5L,
7L, 14L, 65530L, 3L), val.labels = structure(c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "")), var.labels = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "ISO country numeric code",
"Classif World Bank by income level (See note)", "Continent",
"Official language", "1 if landlocked", "ISO3 alpha code"), version = 118L, label.table = list(), expansion.fields = list(
c("revenues_usd_ml", "destring", "Characters removed were:"
), c("revenues_usd_ml", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("Headcount", "destring", "Characters removed were:"),
c("Headcount", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("r_d_exp", "destring", "Characters removed were:"),
c("r_d_exp", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("est_year", "destring", "Characters removed were:"),
c("est_year", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("fdi_jobs", "destring", "Characters removed were:"),
c("fdi_jobs", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("capital", "destring", "Characters removed were:"),
c("capital", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
), c("g_lon", "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
), c("g_lon", "destring", "Characters removed were:"), c("g_lat",
"destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
), c("g_lat", "destring", "Characters removed were:"), c("year",
"destring", "Characters removed were:"), c("year", "destring_cmd",
"destring year, replace"), c("_dta", "ReS_i", "iso3"), c("_dta",
"ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta",
"ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("income_d",
"note1", "World Bank Classification:"), c("income_d", "note2",
"LOW: Low Income"), c("income_d", "note3", "MIDLW: Lower middle income"
), c("income_d", "note4", "MIDUP: Upper middle income"),
c("income_d", "note5", "HOECD: High income (OECD)"), c("income_d",
"note6", "HOTHR: High income (non OECD)"), c("income_d",
"note0", "6")), byteorder = "LSF", orig.dim = c(11478L, 38L
), data.label = character(0), row.names = c(NA, 6L), class = "data.frame")
我的任务是将字符串“country”粘贴到“state”列中,但仅适用于不同于 "Non Specified"
的“state”值。
多亏了这条线,我离得更近了:
FDI$state[!(FDI$state=="Not Specified")] <- do.call(paste, c(FDI[c], sep = ", "))
我得到了我想要的东西,但对于某些“未指定”值,它返回“未指定,nameofthecountry”,即使 !(FDI$state=="Not Specified")
在该单元格中为 FALSE。
它还返回了这个警告:
Warning message: In FDI$state[!(FDI$state == "Not Specified")] <- do.call(paste, : number of items to replace is not a multiple of replacement length
我该怎么办?
如果我没理解错的话,你可以使用包 dplyr
调用 mutate()
和 if_else()
.
library(dplyr)
FDI %>%
mutate(state = if_else(state != "Non Specified", country, state))
这将检查 state
的值是否与 Non Specified
不同,如果不同,则将 state
的值替换为 [=17= 的值].
我们可以使用case_when
library(dplyr)
FDI %>%
mutate(state = case_when(state != "Non Specified" ~ country, TRUE ~ state))