应用数据帧的多个 gsub 函数列,R

apply multiple gsub functions columns of dataframe, R

我想将多个 gsub 函数应用到数据框的所有 4 列,并且已经按照以下方式尝试过,但没有用,关于如何解决这个问题有什么想法吗?谢谢!

fn <- function(jobs) {jobs %>% tolower() %>% gsub("@\w+https?://.+\d+\w*\d*#\w+[^\x01-\x7F]^\s+\s+$", "", x) %>% gsub("[[:punctx:]]", " ", x) %>% gsub("[ |\t]+", " ", x) %>% gsub("senior", "", x) %>% gsub("junior", "", x) %>% gsub("trainee", "", x) %>% gsub("head", "", x)}
fncol <- colwise(fn, .cols=c("jobs$job1", "jobs$job2", "jobs$job3", "jobs$job4"))
jobs <- structure(list(job1 = c("PhD fellow", "Java developer Intern", 
"Optical Engineer", "Senior DWH&BI Engineer", "Senior Software Engineer", 
"Software Developer", "Data Engineer", "Application Software Engineer", 
"Software Developer", "Senior Web Developer", "Web Developer", 
"Web Developer", "Software Engineer", "Software Engineer", "Trainee ES Computer", 
"Associate Software Engineer", "Fullstack IOS Developer", "Technical Delivery Manager/ Project Manager", 
"Software Architect", "Software Developer"), job2 = c("Research Scientist", 
"Analytics Analyst", "Senior Developer", "Senior Data & ML  Engineer", 
"Graduate Teaching Assistant", "Software Developer", "Machine Learning Engineer", 
"Akademischer  Mitarbeiter, Machine  Learning  and Analytics", 
"Backend Develope", "Lead PHP Developer", "PHP System Analytic / Software specialist", 
"Webcreater", "Data Engineer", "Software Engineer", "Assistant Network Administrator", 
"Frontend Engineer", "Application Infrastructor Lead", "Software Engineer", 
"Application Developer", "Software Developer"), job3 = c("Data Scientist", 
"Machine Learning Engineer", "Application Developer Associate Manager", 
NA, "Co-Founder & CTO", NA, NA, NA, NA, NA, "Lead PHP/SugarCRM developer", 
"Senior PHP Developer", "Data Analysing (Researcher)", NA, "Application Developer Consultance", 
"Manager L1 (UI/ Frontend)", "Senior Software Architect", "Software Engineering Manager/ Solution Architect", 
"Software Developer & Consultance", "AI Developer"), job4 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Software Architect / Development lead", 
"Team Leader", NA, NA, "Senior Application Development Specialist", 
"Senior Associate Experience Technology", NA, "Senior Software Developer", 
"Fullstack Developer/ ProductOwner", NA)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

使您的函数适用于一列 -

fn <- function(x) {
  x %>% 
    tolower() %>% 
    gsub("@\w+https?://.+\d+\w*\d*#\w+[^\x01-\x7F]^\s+\s+$", "", .) %>% 
    gsub("[[:punct:]]", " ", .) %>% 
    gsub("[ |\t]+", " ", .) %>% 
    gsub("senior", "", .) %>% 
    gsub("junior", "", .) %>% 
    gsub("trainee", "", .) %>% 
    gsub("head", "", .)
}

然后您可以使用 lapply -

将它应用于每一列
jobs[] <- lapply(jobs, fn)

acrossdplyr-

library(dplyr)
jobs %>% mutate(across(.fns = fn))

我们可以在 base R (R 4.1.0)

fn <- function(x) {
  x |> 
    tolower() |> 
    gsub(pattern = "@\w+https?://.+\d+\w*\d*#\w+[^\x01-\x7F]^\s+\s+$", replacement = "") |>
    gsub(pattern = "[[:punct:]]", replacement =  " ") |>
    gsub(pattern = "[ |\t]+", replacement =  " ") |>
    gsub(pattern = "senior", replacement = "") |> 
    gsub(pattern = "junior", replacement = "") |> 
    gsub(pattern = "trainee", replacement =  "") |> 
    gsub(pattern = "head", replacement = "")
}

jobs[] <- lapply(jobs, fn)

-输出

jobs
# A tibble: 20 x 4
   job1                             job2                                      job3                                job4                      
   <chr>                            <chr>                                     <chr>                               <chr>                     
 1 "phd fellow"                     "research scientist"                      "data scientist"                     <NA>                     
 2 "java developer intern"          "analytics analyst"                       "machine learning engineer"          <NA>                     
 3 "optical engineer"               " developer"                              "application developer associate m…  <NA>                     
 4 " dwh bi engineer"               " data ml engineer"                        <NA>                                <NA>                     
 5 " software engineer"             "graduate teaching assistant"             "co founder cto"                     <NA>                     
 6 "software developer"             "software developer"                       <NA>                                <NA>                     
 7 "data engineer"                  "machine learning engineer"                <NA>                                <NA>                     
 8 "application software engineer"  "akademischer mitarbeiter machine learni…  <NA>                                <NA>                     
 9 "software developer"             "backend develope"                         <NA>                                <NA>                     
10 " web developer"                 "lead php developer"                       <NA>                                <NA>                     
11 "web developer"                  "php system analytic software specialist" "lead php sugarcrm developer"       "software architect devel…
12 "web developer"                  "webcreater"                              " php developer"                    "team leader"             
13 "software engineer"              "data engineer"                           "data analysing researcher "         <NA>                     
14 "software engineer"              "software engineer"                        <NA>                                <NA>                     
15 " es computer"                   "assistant network administrator"         "application developer consultance" " application development…
16 "associate software engineer"    "frontend engineer"                       "manager l1 ui frontend "           " associate experience te…
17 "fullstack ios developer"        "application infrastructor lead"          " software architect"                <NA>                     
18 "technical delivery manager pro… "software engineer"                       "software engineering manager solu… " software developer"     
19 "software architect"             "application developer"                   "software developer consultance"    "fullstack developer prod…
20 "software developer"             "software developer"                      "ai developer"                       <NA>