Gsub returns 一列,其中列出了该列应该是什么

Gsub returns a column with lists of what should be the column

我有一个数据库如下:

DT <- structure(list(year = c(1913, 1914, 1915, 1916, 1917, 1918, 1919, 
1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 
1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 
1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 
1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 
1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 
1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 
1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015), `Personal exemptions [1]Single
persons` = c("3000", 
"3000", "3000", "3000", "1000", "1000", "1000", "1000", "1000", 
"1000", "1000", "1000", "1500", "1500", "1500", "1500", "1500", 
"1500", "1500", "1000", "1000", "1000", "1000", "1000", "1000", 
"1000", "1000", "800", "750", "500", "500", "500", "500", "500", 
"500", "600", "600", "600", "600", "600", "600", "600", "600", 
"600", "600", "600", "600", "600", "600", "600", "600", "600", 
"600", "600", "600", "600", "600", "625", "675", "750", "750", 
"750", "[14,26] 750", "750", "750", "750", "1000", "1000", "1000", 
"1000", "1000", "1000", "1040", "1080", "[30] 1,900", "[30,32,33] 1,950", 
"[30,32,33] 2,000", "[30,32,33] 2,050", "[30,32] 2,150", "[30,32] 2,300", 
"[30,32] 2,350", "[30,32] 2,450", "[30,32] 2,500", "[30,32] 2,550", 
"[30,32] 2,650", "[30,32] 2,700", "[30,32] 2,750", "[30,32] 2,800", 
"[30,32] 2,900", "[30,32] 3,000", "[30,32] 3,050", "[30,32] 3,100", 
"[30,32] 3,200", "[30,39] 3,300", "[30,39] 3,400", "[30,40] 3,500", 
"[30,40] 3,650", "[30,41] 3,650", "[30,41] 3,700", "[30,41] 3,800", 
"[30,32] 3,900", "[30,32] 3,950", "[30,32] 4,000"), `Personal exemptions [1]Married
couples` = c("4000", 
"4000", "4000", "4000", "2000", "2000", "2000", "2000", "[4] 2,500", 
"[4] 2,500", "[4] 2,500", "2500", "3500", "3500", "3500", "3500", 
"3500", "3500", "3500", "2500", "2500", "2500", "2500", "2500", 
"2500", "2500", "2500", "2000", "1500", "1200", "1200", "[11] 1,000", 
"[11] 1,000", "1000", "1000", "[14] 1,200", "[14] 1,200", "[14] 1,200", 
"[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", 
"[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", 
"[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", 
"[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,200", "[14] 1,250", 
"[14] 1,350", "[14] 1,500", "[14] 1,500", "[14] 1,500", "[14,26] 1,500", 
"[14,27] 1,500", "[14,27] 1,500", "[14,27] 1,500", "[14] 2,000", 
"[14] 2,000", "[14] 2,000", "[14] 2,000", "[14] 2,000", "[14] 2,000", 
"[14,30] 2,080", "[14,30] 2,160", "[30] 3,800", "[30,32,33] 3,900", 
"[30,32,33] 4,000", "[30,32,33] 4,100", "[30,32] 4,300", "[30,32] 4,600", 
"[30,32] 4,700", "[30,32] 4,900", "[30,32] 5,000", "[30,32] 5,100", 
"[30,32] 5,300", "[30,32] 5,400", "[30,32] 5,500", "[30,32] 5,600", 
"[30,32] 5,800", "[30,32] 6,000", "[30,32] 6,100", "[30,32] 6,200", 
"[30,32] 6,400", "[30,39] 6,600", "[30,39] 6,800", "[30,40] 7,000", 
"[30,40] 7,300", "[30,41] 7,300", "[30,41] 7,400", "[30,41] 7,600", 
"[30,32] 7,800", "[30,32] 7,900", "[30,32] 8,000"), `Personal exemptions [1]Dependents` = c("N/A", 
"N/A", "N/A", "N/A", "200", "200", "200", "200", "400", "400", 
"400", "400", "400", "400", "400", "400", "400", "400", "400", 
"400", "400", "400", "400", "400", "400", "400", "400", "400", 
"400", "350", "350", "[11]   500", "[11]   500", "500", "500", 
"600", "600", "600", "600", "600", "600", "600", "600", "600", 
"600", "600", "600", "600", "600", "600", "600", "600", "600", 
"600", "600", "600", "600", "625", "675", "750", "750", "750", 
"[26] 750", "[27] 750", "[27] 750", "[27] 750", "1000", "1000", 
"1000", "1000", "1000", "1000", "[30] 1,040", "[30] 1,080", "[30] 1,900", 
"[30,32,33] 1,950", "[30,32,33] 2,000", "[30,32,33] 2,050", "[30,32] 2,150", 
"[30,32] 2,300", "[30,32] 2,350", "[30,32] 2,450", "[30,32] 2,500", 
"[30,32] 2,550", "[30,32] 2,650", "[30,32] 2,700", "[30,32] 2,750", 
"[30,32] 2,800", "[30,32] 2,900", "[30,32] 3,000", "[30,32] 3,050", 
"[30,32] 3,100", "[30,32] 3,200", "[30,39] 3,300", "[30,39] 3,400", 
"[30,40] 3,500", "[30,40] 3,650", "[30,41] 3,650", "[30,41] 3,700", 
"[30,41] 3,800", "[30,32] 3,900", "[30,32] 3,950", "[30,32] 4,000"
), `Tax rates for regular taxLowest bracketTax rate [2]
(percent)` = c("1", 
"1", "1", "2", "2", "6", "4", "4", "4", "4", "[5] 3.0", "[6] 1.5", 
"[7] 1.125", "[7] 1.125", "[7] 1.125", "[8] 1.125", "[8] 0.375", 
"[8] 1.125", "[8] 1.125", "4", "4", "[9] 4.0", "[9] 4.0", "[9] 4.0", 
"[9] 4.0", "[9] 4.0", "[9] 4.0", "[9,10] 4.4", "[9] 10.0", "[9] 19.0", 
"[9] 19.0", "23", "23", "[13] 19.0", "[13] 19.0", "[15] 16.6", 
"[15] 16.6", "[16] 17.4", "20.399999999999999", "22.199999999999999", 
"22.199999999999999", "20", "20", "20", "20", "20", "20", "20", 
"20", "20", "20", "16", "14", "14", "14", "14", "14", "14", "14", 
"14", "14", "[25] 14.0", "14", "14", "[28] 14.0", "[28] 14.0", 
"[28] 14.0", "[28] 14.0", "[28,29] 14.0", "[28] 12.0", "[28] 11.0", 
"[28] 11.0", "[28] 11.0", "[28] 11.0", "11", "[34] 15.0", "[34,35] 15.0", 
"[34,35] 15.0", "15", "15", "15", "15", "15", "15", "15", "15", 
"15", "[36] 15.0", "[37] 10.0", "10", "10", "10", "10", "10", 
"10", "10", "10", "10", "10", "10", "10", "10", "10"), `Tax rates for regular taxLowest bracketTaxable
income
under [3]` = c("20000", 
"20000", "20000", "20000", "2000", "4000", "4000", "4000", "4000", 
"4000", "4000", "4000", "4000", "4000", "4000", "4000", "4000", 
"4000", "4000", "4000", "4000", "4000", "4000", "4000", "4000", 
"4000", "4000", "4000", "2000", "2000", "2000", "2000", "2000", 
"2000", "2000", "4000", "4000", "4000", "4000", "4000", "4000", 
"4000", "4000", "4000", "4000", "4000", "4000", "4000", "4000", 
"4000", "4000", "1000", "1000", "1000", "1000", "1000", "1000", 
"1000", "1000", "1000", "1000", "1000", "1000", "1000", "[28] 3,200", 
"[28] 3,200", "[28] 3,400", "[28] 3,400", "[28] 3,400", "[28] 3,400", 
"[28] 3,400", "[28] 3,400", "[28, 31] 3,540", "[28,31] 3,670", 
"[31] 3,000", "[31,35] 29,750", "[31,35] 30,950", "[31,35] 32,450", 
"[31] 34,000", "[31] 35,800", "[31] 36,900", "[31] 38,000", "[31] 39,000", 
"[31] 40,100", "[31] 41,200", "[31] 42,350", "[31] 43,050", "[31] 43,850", 
"[31,38] 6,000", "[31] 12,000", "[31] 14,000", "[31] 14,300", 
"[31] 14,600", "[31] 15,100", "[31] 15,650", "[31] 16,050", "[31] 16,700", 
"[31] 16,750", "[31] 17,000", "[31] 17,400", "[31] 17,850", "[31] 18,150", 
"[31] 18,550"), `Tax rates for regular taxHighest bracketTax rate [2]
(percent)` = c("7", 
"7", "7", "15", "67", "77", "73", "73", "73", "58", "[5] 43.5", 
"46", "25", "25", "25", "25", "24", "25", "25", "63", "63", "63", 
"63", "79", "79", "79", "79", "[10] 81.1", "81", "88", "88", 
"[12] 94.0", "[12] 94.0", "[13] 86.45", "[13] 86.45", "[15] 82.13", 
"[15] 82.13", "[16] 84.36", "[17] 91.0", "[18] 92.0", "[18] 92.0", 
"[19] 91.0", "[19] 91.0", "[19] 91.0", "[19] 91.0", "[19] 91.0", 
"[19] 91.0", "[19] 91.0", "[19] 91.0", "[19] 91.0", "[19] 91.0", 
"77", "70", "70", "70", "[20] 75.25", "[21] 77.0", "[22] 71.75", 
"[23] 70.0", "[24] 70.0", "[24] 70.0", "[24, 25] 70.0", "[24] 70.0", 
"[24] 70.0", "[24] 70.0", "[24] 70.0", "[24] 70.0", "[24] 70.0", 
"[24,29] 69.125", "50", "50", "50", "50", "50", "38.5", "[34,35] 28.0", 
"[34,35] 28.0", "[34,35] 28.0", "31", "31", "39.600000000000001", 
"39.600000000000001", "39.600000000000001", "39.600000000000001", 
"39.600000000000001", "39.600000000000001", "39.600000000000001", 
"[36] 39.6", "[37] 39.1", "38.600000000000001", "35", "35", "35", 
"35", "35", "35", "35", "35", "35", "35", "[42] 39.6", "[42] 39.6", 
"[42] 39.6"), `Tax rates for regular taxHighest bracketTaxable
income
over [3]` = c("500000", 
"500000", "500000", "2000000", "2000000", "1000000", "1000000", 
"1000000", "1000000", "200000", "200000", "500000", "100000", 
"100000", "100000", "100000", "100000", "100000", "100000", "1000000", 
"1000000", "1000000", "1000000", "5000000", "5000000", "5000000", 
"5000000", "5000000", "5000000", "200000", "200000", "200000", 
"200000", "200000", "200000", "400000", "400000", "400000", "400000", 
"400000", "400000", "400000", "400000", "400000", "400000", "400000", 
"400000", "400000", "400000", "400000", "400000", "400000", "200000", 
"200000", "200000", "200000", "200000", "200000", "200000", "200000", 
"200000", "200000", "200000", "200000", "203200", "203200", "215400", 
"215400", "215400", "85600", "109400", "162400", "[31] 169,020", 
"[31] 175,250", "[31] 90,000", "[31,35] 29,750", "[31,35] 30,950", 
"[31,35] 32,450", "[31] 82,150", "[31] 86,500", "[31] 250,000", 
"[31] 250,000", "[31] 256,500", "[31] 263,750", "[31] 271,050", 
"[31] 278,450", "[31] 283,150", "[31] 288,350", "[31,38] 297,350", 
"[31] 307,050", "[31 ] 311,950", "[31] 319,100", "[31] 326,450", 
"[31] 336,550", "[31] 349,700", "[31] 357,700", "[31] 372,950", 
"[31] 373,650", "[31] 379,150", "[31] 388,350", "[31] 450,000", 
"[31] 457,600", "[31] 466,950")), row.names = c(NA, -103L), class = c("tbl_df", 
"tbl", "data.frame"))

我想删除第 2 列到第 8 列方括号(加括号)之间的所有内容,如下所示:

TaxRates[,2] <- gsub("\[[^\]]*\]", "", TaxRates[,2], perl=TRUE)

然而,这不知何故使整个专栏充满了应该是该专栏的列表。

我做错了什么?

您正在将 gsub 应用于列,您应该将其应用于向量,这可以通过使用 $ 或双括号 (TaxRates[[2]]) 选择列来完成。

要将函数应用于多列,请使用 lapply :

DT[2:8] <- lapply(DT[2:8], function(x) gsub("\[[^\]]*\]", "", x, perl=TRUE))  

一个dplyr解决方案,我添加了白色space去除

DT %>% mutate(across(.cols = 2:8, 
                     .fns = ~ trimws(gsub("\[[^\]]*\]", 
                                          "", 
                                          .x, 
                                          perl=TRUE))))


# A tibble: 103 x 8
    year `Personal exemp… `Personal exemp… `Personal exemp… `Tax rates for …
   <dbl> <chr>            <chr>            <chr>            <chr>           
 1  1913 3000             4000             N/A              1               
 2  1914 3000             4000             N/A              1               
 3  1915 3000             4000             N/A              1               
 4  1916 3000             4000             N/A              2               
 5  1917 1000             2000             200              2               
 6  1918 1000             2000             200              6               
 7  1919 1000             2000             200              4               
 8  1920 1000             2000             200              4               
 9  1921 1000             2,500            400              4               
10  1922 1000             2,500            400              4               

我们可以使用 str_removedplyr

library(dplyr)
library(stringr)
DT %>%
     mutate(across(2:8, ~ trimws(str_remove_all(.,"\[[^\]]*\]"))))