如何正确删除 r 中特定列表元素的异常值?

How to properly remove outliers for specific list elements in r?

我正在尝试删除嵌套在分组数据帧列表中的两个特定列(var1var2)的异常值,

对于此任务,我使用 dplyr 中的 filter() 函数和 grDevices 中的 boxplot.stats 函数。

这是我目前尝试过的方法:

library(dplyr)
library(grDevices)

########### converting initial df to a list of groups ######################
split_tibble <- function(tibble, col = 'col') tibble %>% split(., .[, col])
dflist <- split_tibble(df, 'decil')

####### applying function to remove outliers for all list elements ######

dflist <- lapply(dflist , function(df) filter(!var1 %in% 
                                       boxplot.stats(var1)$out) %>%
                                       filter(!var2 %in%
                                       boxplot.stats(var2)$out))

控制台输出:

Error in var1 %in% boxplot.stats(var1)$out : object 'var1' not found

数据

df <- structure(list(año = structure(c(6940, 8035, 8766, 9496, 10227, 
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071, 
16801, 17532, 6940, 8035, 8766, 9496, 10227, 10957, 11688, 12418, 
12784, 13149, 13879, 14610, 15340, 16071, 16801, 17532, 6940, 
8035, 8766, 9496, 10227, 10957, 11688, 12418, 12784, 13149, 13879, 
14610, 15340, 16071, 16801, 17532, 6940, 8035, 8766, 9496, 10227, 
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071, 
16801, 17532, 6940, 8035, 8766, 9496, 10227, 10957, 11688, 12418, 
12784, 13149, 13879, 14610, 15340, 16071, 16801, 17532, 6940, 
8035, 8766, 9496, 10227, 10957, 11688, 12418, 12784, 13149, 13879, 
14610, 15340, 16071, 16801, 17532, 6940, 8035, 8766, 9496, 10227, 
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071, 
16801, 17532, 6940, 8035, 8766, 9496, 10227, 10957, 11688, 12418, 
12784, 13149, 13879, 14610, 15340, 16071, 16801, 17532, 6940, 
8035, 8766, 9496, 10227, 10957, 11688, 12418, 12784, 13149, 13879, 
14610, 15340, 16071, 16801, 17532, 6940, 8035, 8766, 9496, 10227, 
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071, 
16801, 17532), class = "Date"), decil = structure(c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L), .Label = c("Decil 1", "Decil 10", "Decil 2", "Decil 3", 
"Decil 4", "Decil 5", "Decil 6", "Decil 7", "Decil 8", "Decil 9"
), class = "factor"), var1 = c(-0.518146762073594, 0.308138265449932, 
-46.9279778645287, -2.73923609850588, -0.116791699821611, 1.276823919767, 
-0.591033604229833, 2.217410209762, -1.22771361710334, 2.91323734975109, 
1.9403678544531, -1.710307316049, 0.114882675671299, 1.31658931355581, 
1.46477971543751, 0.271712366085317, 1.34328539491741, 0.0420320020065887, 
-40.1490999871048, -2.65571871864779, 1.08040626539541, 0.501353172078248, 
0.264579769939744, 3.45907008361297, -1.47337410644853, 0.753187592344928, 
-1.67914957857585, -0.386430954714176, 1.18684061581655, -0.0837921856848724, 
0.271367995890336, -0.244522199262534, 0.244746086109351, 0.274660079065288, 
-46.5074910900121, -1.20391288467938, 0.221116802499613, 1.68878772891571, 
-0.0553564396020631, 2.47304289689454, -1.49473828679342, 2.06995781551323, 
0.995111688295987, -1.23590101185821, -0.0513493069577327, 1.57518955283768, 
1.49952051319558, 0.569875774759632, -0.0267057797736821, 0.0595105282871455, 
-45.4844375744304, -1.98229069194358, 0.692781223252039, 1.45357299983422, 
0.274519154273341, 2.50570229399105, -1.58798738651613, 1.8318771262172, 
0.541185894582713, -1.09932567808292, 0.159443784781767, 1.38386952581564, 
1.42469406477504, 0.410413545862179, -0.0805341335460025, -0.144568025477976, 
-44.9911674142431, -1.88765679042184, 1.01321912136977, 1.16094403156238, 
0.427582908849506, 2.59548917592467, -1.91921130444488, 1.86512952445194, 
0.0990294699338842, -0.837785709745078, 0.543096597486442, 0.741481129770513, 
1.29946597715581, 0.622336720429403, 0.206256733184722, 0.139031953354982, 
-44.6027046214633, -1.90241726271236, 0.690016525955506, 1.54716844723562, 
0.256469067826587, 2.46741215896569, -1.52082399890795, 1.46298279854535, 
-0.178133254716425, -0.570996073876508, 0.530083911456838, 0.771074074546392, 
1.22148158785322, 0.480369193797477, -0.179320234778936, -0.0401792821387455, 
-43.8150991019374, -2.1998371424533, 1.06632797701267, 0.742663665613463, 
0.534777779842798, 2.24439518748242, -1.19081274931006, 1.26230193789639, 
-0.256049568632538, -0.54511282604089, 0.828289107854747, 0.198585514462626, 
1.12490515302622, 0.454776270025511, 0.377935359008342, 0.308138951062849, 
-43.6715092679294, -1.68702475271693, 0.963026950989102, 0.924694309980609, 
0.0909783913993086, 2.19423265975474, -1.04193226868249, 1.26951414528773, 
-0.599866837527287, -0.504144411887512, 1.2633719573124, -0.208565860801489, 
1.28053568465138, 0.223463615789722, 0.572034514146965, 0.0762802869455859, 
-42.9497886598882, -2.21763771759914, 1.08353667375486, 1.27220562258605, 
-0.0199740141807546, 2.34686607111593, -1.01924956162359, 0.996632035015618, 
-0.943588762041933, -0.311576607205403, 1.18147547163377, -0.283811580551448, 
1.1151512141408, 0.256848322836834, 0.698508059308152, -0.0557319323771243, 
-42.0314342083723, -2.56639439214237, 1.09841238497413, 0.970528742740132, 
-0.041290083196072, 2.89203820342315, -1.13995943017165, 1.0666144712339, 
-1.48427029516519, -0.237956102817481, 1.18845793975459, -0.0723117556104279, 
0.578897782929168, 0.310574312466805), var2 = c(-22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, -22.1000495330589, 
0.285801450939386, 0.264240512093975, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629)), row.names = c(NA, 
-160L), groups = structure(list(decil = structure(1:10, .Label = c("Decil 1", 
"Decil 10", "Decil 2", "Decil 3", "Decil 4", "Decil 5", "Decil 6", 
"Decil 7", "Decil 8", "Decil 9"), class = "factor"), .rows = structure(list(
    1:16, 17:32, 33:48, 49:64, 65:80, 81:96, 97:112, 113:128, 
    129:144, 145:160), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 10L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

我可以使用任何其他库来完成此任务吗?

您可以使用 dplyr

中的新 row_wise 操作逐步执行此操作
library(tidyverse)

df %>%
  ungroup() %>% 
  nest_by(decil) %>% 
  mutate(out_var1 =  list(boxplot.stats(data$var1)$out),
         out_var2 =  list(boxplot.stats(data$var2)$out),
         filtered_df = list(data %>% filter(var1 %in% out_var1 %>% `!`,
                                            var2 %in% out_var2 %>% `!`))) %>% 
  select(decil,filtered_df) %>% 
  unnest(filtered_df) %>% 
  ungroup()