如何通过迭代方法或管道运算符从嵌套数据集中删除异常值行

How to delete rows of outliers rom a nested dataset via an iterative method or pipe operator

我正在尝试从此嵌套数据集中删除异常值

df_join
# A tibble: 12 x 2
# Groups:   signals [12]
   signals     data             
   <chr>       <list>           
 1 P3FCz       <tibble [75 x 5]>
 2 P3Cz        <tibble [75 x 5]>
 3 P3Pz        <tibble [75 x 5]>
 4 LPPearlyFCz <tibble [75 x 5]>
 5 LPPearlyCz  <tibble [75 x 5]>
 6 LPPearlyPz  <tibble [75 x 5]>
 7 LPP1FCz     <tibble [75 x 5]>
 8 LPP1Cz      <tibble [75 x 5]>
 9 LPP1Pz      <tibble [75 x 5]>
10 LPP2FCz     <tibble [75 x 5]>
11 LPP2Cz      <tibble [75 x 5]>
12 LPP2Pz      <tibble [75 x 5]>

比如它的第一个元素包含这一系列变量:

df_join[[2]][[1]]
# A tibble: 75 x 5
   ID    GR    SES   COND      value
   <fct> <fct> <fct> <fct>     <dbl>
 1 01    RP    V     NEG-CTR -11.6  
 2 01    RP    V     NEG-NOC -11.1  
 3 01    RP    V     NEU-NOC  -4.00 
 4 04    RP    V     NEG-CTR  -0.314
 5 04    RP    V     NEG-NOC   0.239
 6 04    RP    V     NEU-NOC   5.04 
 7 06    RP    V     NEG-CTR  -0.214
 8 06    RP    V     NEG-NOC  -2.96 
 9 06    RP    V     NEU-NOC  -1.97 
10 07    RP    V     NEG-CTR  -2.83 

全部内容如下:

> dput(head(df_join))
structure(list(signals = c("P3FCz", "P3Cz", "P3Pz", "LPPearlyFCz", 
"LPPearlyCz", "LPPearlyPz"), data = list(structure(list(ID = structure(c(1L, 
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 
6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 
11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 
16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 
20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 
24L, 25L, 25L, 25L), .Label = c("01", "04", "06", "07", "08", 
"09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "21", 
"22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"), 
    GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), 
    COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
    ), class = "factor"), value = c(-11.6312151716924, -11.1438413285935, 
    -3.99591470944713, -0.314155675382471, 0.238885648959708, 
    5.03749946898385, -0.213621915029167, -2.96032491743069, 
    -1.97168681693488, -2.83109425298642, 1.09291198163802, -6.692991645215, 
    4.23849942428043, 2.9898889629932, 3.5510699900835, 9.57481668808606, 
    5.4167795618285, 1.7067607715475, -6.13036076093477, -2.82955734597919, 
    -2.50672211111696, 0.528517585832501, 8.16418133488309, 1.88777321897925, 
    -7.73588468896919, -9.83058052401056, -6.97442700196932, 
    1.27327945355082, 2.11962397764132, 0.524299677616254, -1.83310726842883, 
    0.658810483381172, -0.261373488428192, 4.37524298634374, 
    0.625555654900511, 3.19617639836154, 0.0405517582137798, 
    -3.29357103412113, -0.381435057304614, -5.73445509910268, 
    -6.1129152355645, -2.45744234877604, 2.95352732001065, 0.527721249096473, 
    1.91803490989119, -3.46703346467546, -2.40438419043702, -5.35374408162217, 
    -7.27028665849262, -7.1532211375959, -5.39955520296854, 2.65765002364624, 
    0.372495441513391, 6.24433066412776, 1.85698518142405, -0.564454675803529, 
    -0.068523080368053, -7.04782633579147, -4.52263283590558, 
    -6.62134671432544, 4.56661945182626, 3.05859761335498, 2.02997952225347, 
    -6.10523962206958, -0.521871236969702, -3.97851995684846, 
    -2.61258020387919, -4.13974828699279, -3.9210032516844, -4.63162466544638, 
    -4.36762718685405, -6.71005969834916, -4.22719611676328, 
    -0.229916506217565, -5.69725200870146)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
    ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 
    9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 
    13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 
    17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 
    21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 
    25L, 25L), .Label = c("01", "04", "06", "07", "08", "09", 
    "10", "11", "12", "13", "15", "16", "17", "18", "19", "21", 
    "22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"), 
    GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), 
    COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
    ), class = "factor"), value = c(-5.16524399006139, -5.53112490175437, 
    0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441, 
    -1.30019374375434, -3.59899040898949, -1.92340529575071, 
    2.19344184533265, 5.87900720863083, -5.92378937757888, 2.44958531767688, 
    3.10043497883256, 1.65779442628225, 13.7118233181713, 6.86178446511352, 
    5.31481098188172, -4.13240668697805, 0.162182285588285, 0.142083484505352, 
    5.42592103255673, 14.5496375672716, 4.52018125654081, -2.40677805475299, 
    -5.3832670295207, -1.55736964635117, 3.48359241788107, 4.23167123533126, 
    2.00051785325202, 1.48755216347718, 2.37269462739372, 1.30346907198835, 
    3.89476490634811, 1.87516303240986, 4.36353100770575, 1.9413417416824, 
    -2.22114447555529, -0.015852062711641, -2.76146409940467, 
    -3.51627712447581, 1.01799377568815, 1.74783962328435, 1.1303870721987, 
    2.16398550183836, -3.31557794753334, -1.83920975041768, -6.06703163736936, 
    -8.1566939611461, -9.23030396302541, -4.35545141573936, 0.906302081219897, 
    0.45401759063429, 3.80236232314171, 4.0336657306528, 2.0185967445137, 
    0.835589319243251, -4.6805488231028, -1.20746167339041, -5.50475999427345, 
    4.96594373869991, 4.1349308440931, 3.00187233307059, -5.61465293602653, 
    0.544596077279702, -5.20450410570445, -0.0325220589039272, 
    -2.28038421035601, -2.01375702882255, -1.6547144697087, -0.619979893871085, 
    -4.48258340054462, -1.42281778522059, 2.62315679073783, -4.13736508533355
    )), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L, 
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11", 
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25", 
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), 
    SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", 
    "NEG-NOC", "NEU-NOC"), class = "factor"), value = c(11.8802266972569, 
    12.1053426662461, 12.955441582096, 15.0981004360619, 15.4046229884164, 
    16.671036999147, 3.13771453335467, -0.0892565159000666, 2.15365554736525, 
    13.6778924406572, 14.3862738306396, 6.86762877785576, 7.47946451329025, 
    8.93405130318593, 8.45962311067909, 23.4166601996042, 15.1868092142896, 
    9.97183712753913, 6.267521071803, 10.142198458411, 10.6320358418368, 
    12.9998037913548, 20.7052065690674, 11.8852179570666, 15.7899796085713, 
    7.50729833890206, 14.3076172484818, 9.93797956768228, 10.7693238464384, 
    5.04681800218272, 5.16656503460515, 7.87875085817396, 2.29899409536951, 
    10.0135486953849, 5.48278706243332, 7.81908431468528, 8.64382513728869, 
    3.35777109534179, 3.47474629234488, 4.35678644331281, 3.47085321062162, 
    6.56231512354717, 4.93825547529124, 7.33985613752315, 6.81966900599588, 
    6.54487921689425, 7.25872117706077, 1.10301223694429, -0.856423579793706, 
    -0.887835692028378, -0.931653372049331, 5.6617683754256, 
    2.29939831067085, 5.1554825066748, 6.59026080217083, 3.0741733363644, 
    1.80359068950898, 1.63892755704177, 3.857933716935, 0.769316188513939, 
    10.7031907391191, 9.53278894637555, 8.01071628743378, 6.04891324234645, 
    11.1964453850602, 3.46633322373091, 14.4393884282958, 11.2339563353478, 
    7.74933708914689, 7.1182095475238, 7.39260082121406, 0.627435381320771, 
    9.15473202689768, 13.6559037433263, 7.14786907480758)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
    ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 
    9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 
    13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 
    17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 
    21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 
    25L, 25L), .Label = c("01", "04", "06", "07", "08", "09", 
    "10", "11", "12", "13", "15", "16", "17", "18", "19", "21", 
    "22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"), 
    GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), 
    COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
    ), class = "factor"), value = c(-11.7785042972793, -9.14927207125904, 
    -7.58190508537766, -4.01515836011381, -6.60165385653499, 
    -2.02861964460179, 4.46729570509601, 2.54036572774646, 2.22923889930115, 
    -0.883620011106743, -2.63569087592267, -2.0629672230873, 
    1.14544537612393, 2.08056674659401, 0.0422658298956365, 13.2986259796748, 
    5.06669915366333, 3.93467692474742, 0.0229069420708053, 4.31923128857779, 
    0.237726051904304, 1.89972383690448, 3.2371880079134, 0.318100791495115, 
    -8.08292381883298, -5.73174008540523, -15.7998485301436, 
    1.75469999857951, 0.677370118816266, -1.8397955509895, 2.55445787016256, 
    -0.380810453692585, 0.62462329496673, 2.61316333850434, 2.68202480583985, 
    1.76690658846479, 0.148635887703097, -0.958853757041888, 
    -3.17305964093897, -7.82526758429289, -6.58557573679886, 
    -4.39207076049089, 2.36752476749952, 0.594715760553033, -0.29794568443312, 
    -4.5365387390683, 0.196832250811775, -2.70852853745588, 0.498995124872827, 
    0.165171574219401, 0.269498974991661, 0.901948386281446, 
    -2.45955661653299, 1.63525170542944, 0.155897732673534, 1.8491735212703, 
    -0.856727109535223, -1.16182571974245, 1.07658425742917, 
    -2.21433585407388, 4.3385479368043, 4.40588599635354, 0.127710423625772, 
    -6.26956613362656, -1.17658595005389, -7.25886366924741, 
    -0.888293709383838, -2.14177059335841, -2.42141595261389, 
    -2.958120275175, -5.1274001953303, -5.32347488769128, -4.41290818553442, 
    -1.21404719262173, -4.23649270310915)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -75L)), structure(list(
    ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
    4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 
    9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 
    13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 
    17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 
    21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 
    25L, 25L), .Label = c("01", "04", "06", "07", "08", "09", 
    "10", "11", "12", "13", "15", "16", "17", "18", "19", "21", 
    "22", "23", "25", "27", "28", "30", "44", "46", "49"), class = "factor"), 
    GR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), SES = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), 
    COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", "NEG-NOC", "NEU-NOC"
    ), class = "factor"), value = c(-5.96429031525769, -5.10918437158799, 
    -2.81732229625975, -1.43557366487622, -3.14872157912645, 
    0.160393685024631, 3.52155765271648, 2.10437989449921, 2.70693992810407, 
    5.49897156207812, 5.81171180245335, -1.37301251388987, -0.434363848460157, 
    2.87987510596148, -1.27152670283348, 17.2093269365993, 7.79412746755931, 
    8.11964589961276, 4.95253363860044, 9.50695673265293, 4.15235381401148, 
    6.1294488368639, 8.01447499455337, 0.783414018677801, -1.24197194087055, 
    -0.487178595894761, -9.79031812534203, 4.22150266269492, 
    4.20139847550095, 0.208005397351335, 4.19096721581768, 0.815283302847055, 
    1.48137456347872, 2.0809543999959, 4.35199943309111, 2.84860039832237, 
    3.05879540677983, 2.11976068962167, -0.269002712326028, -2.77155065610474, 
    -2.59002218694999, 0.17928456999128, 2.24515223348079, 1.88805943988563, 
    -0.0920286086411814, -2.00968595029144, 2.59427260100332, 
    -1.27622011197768, 0.588399071755827, -1.43982473126936, 
    1.96978732491278, -0.338674980283045, -1.86484698930706, 
    -0.0154791822607025, 2.55036185373462, 4.42520405730058, 
    -0.599156247027551, 1.60091251589958, 4.7367320574401, -0.192490723623988, 
    4.8452288234686, 5.71745745981867, 1.02554478706585, -4.5951256708181, 
    1.1704842909792, -7.42770276334892, 3.15655538248828, -0.639830772856786, 
    -0.345116641695513, -0.0391030568720636, -2.61585906518491, 
    -2.71685194532693, -1.7348388034111, 1.00287124847525, -2.4844653851482
    )), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-75L)), structure(list(ID = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 
8L, 8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 
13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 
17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L, 
21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L
), .Label = c("01", "04", "06", "07", "08", "09", "10", "11", 
"12", "13", "15", "16", "17", "18", "19", "21", "22", "23", "25", 
"27", "28", "30", "44", "46", "49"), class = "factor"), GR = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "RP", class = "factor"), 
    SES = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), COND = structure(c(1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR", 
    "NEG-NOC", "NEU-NOC"), class = "factor"), value = c(8.23981597718437, 
    9.51261484648731, 9.42367409925817, 5.06332653216481, 5.02619159395405, 
    9.07903916629231, 7.56089165217984, 5.49719893790597, 4.91476855238182, 
    13.0320953572069, 10.8414516494484, 5.86927622259489, 3.25309970442897, 
    4.6847880297099, 2.71096740085175, 25.567439566524, 16.3241813617706, 
    13.0990192799703, 11.9200281736866, 14.6901305277101, 9.67397418905514, 
    10.2974302220899, 12.0768070828642, 5.9401530589224, 12.4817579327688, 
    12.419526465857, 1.00612108990875, 9.63063375751153, 10.5631237176538, 
    3.08031473770521, 3.35694102903017, 4.28046277054405, -0.133592200169464, 
    6.9103658689166, 7.64737651416791, 6.75669517393108, 8.5369185279747, 
    7.08645126073423, 4.47409706618326, 4.39617687043259, 3.27924738047746, 
    6.06169418872804, 5.34939694712468, 5.58288092654703, 4.85729686493463, 
    7.38032829587839, 11.7259526759912, 4.95764559864061, 6.24066579989613, 
    3.49843659402445, 4.07498375647916, 3.55732294589389, 1.33918111568512, 
    0.956782967443242, 2.32002496709926, 3.15289777246607, -0.832211906889126, 
    6.39254974438057, 7.0533787627062, 2.97245026797807, 6.23573445580928, 
    7.6052386193207, 2.98791225155534, 3.10850022259445, 8.12060882554471, 
    -0.00459651443883508, 13.5899217198075, 9.93070913311253, 
    8.10285456644801, 5.04464304009428, 2.02262615478956, 1.0510618938653, 
    5.62233873107127, 10.1193593084848, 5.87476640145049)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -75L)))), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
    signals = c("LPPearlyCz", "LPPearlyFCz", "LPPearlyPz", "P3Cz", 
    "P3FCz", "P3Pz"), .rows = structure(list(5L, 4L, 6L, 2L, 
        1L, 3L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .drop = TRUE))
> 

我尝试检查是否存在异常值,如下所示:

outliers_table <- df_join %>%
  unnest() %>% 
  dplyr::select(COND, signals, value) %>% 
  group_by(COND) %>%  #it is the equivalent to use as grouping variable the time
  identify_outliers(value)

那转

A tibble: 30 x 5
   COND    signals     value is.outlier is.extreme
   <fct>   <chr>       <dbl> <lgl>      <lgl>     
 1 NEG-CTR P3FCz       -11.6 TRUE       FALSE     
 2 NEG-CTR P3Cz         13.7 TRUE       FALSE     
 3 NEG-CTR P3Pz         15.1 TRUE       FALSE     
 4 NEG-CTR P3Pz         13.7 TRUE       FALSE     
 5 NEG-CTR P3Pz         23.4 TRUE       TRUE      
 6 NEG-CTR P3Pz         15.8 TRUE       FALSE     
 7 NEG-CTR P3Pz         14.4 TRUE       FALSE     
 8 NEG-CTR LPPearlyFCz -11.8 TRUE       FALSE     
 9 NEG-CTR LPPearlyCz   17.2 TRUE       FALSE     
10 NEG-CTR LPPearlyPz   25.6 TRUE       TRUE  

如果我有兴趣删除所有那些真正极端的值,我该怎么做才能使用一些迭代函数或一些 if 语句? 请也考虑其他替代方案,以防它更容易(也可以通过添加另一个 %>% 命令行来保留我编写的命令)减少 for 循环或其他一些功能。

从一开始我就编写了我创建的失败代码:

outliers_bale <- df_join %>%
  unnest() %>% 
  dplyr::select(COND, signals, value) %>% 
  group_by(COND) %>%  #it is the equivalent to use as grouping variable the time
  identify_outliers(value) %>% 
  filter(is.outlier & is.extreme)

values <- outliers_table$value

df_join[!(df_join$data %in% values), ]

我不知道它是否有效。

提前致谢

如果你的函数 identify_outliers returns TRUE/FALSE,基于给定的 value 是否是异常值,那么你可以使用filter(identify_outliers(value)) 继续前进。

好的。让我们一步一步地一起做。据我了解,您非常担心您的数据(我将其保存在变量 df 中)存在异常值甚至极值。首先,我们将从您的数据中仅提取一组小标题并过滤 COND ==" NEG-NOC "

library(tidyverse)
library(rstatix)
library(outliers)

data = df$data[[1]] %>% filter(COND=="NEG-NOC") 

现在让我们考虑一下我们将使用哪种异常值识别方法。 为此,我们可以使用 boxplot 函数。

boxplot.stats(data$value)$out
#[1] 8.164181

这很好,但它只给我们提供了向量形式的离群值。第二种方式是使用identify_outliers。这给了我们一个 tibble 但仍然只有那些具有这些异常值的行。

data %>% identify_outliers(variable = "value")
# # A tibble: 1 x 7
# ID    GR    SES   COND    value is.outlier is.extreme
# <fct> <fct> <fct> <fct>   <dbl> <lgl>      <lgl>     
#   1 11    RP    V     NEG-NOC  8.16 TRUE       FALSE

好吧,让我们使用 outliers 包中的 outlier 函数。这可以给我们一个逻辑向量。

outlier(data$value, opposite = T)
#[1] 8.164181
outlier(data$value, opposite = T, logical = T)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[22] FALSE FALSE FALSE FALSE

但是,这些方法都不能帮助您决定如何处理这些异常值。 请仔细阅读this。如您所见,您可以从三个选项中进行选择:插补、上限、预测。你会选择哪一个?我选择了封盖。所以我写了一个小函数来识别离群值、极值以及另外 returns 您在封顶后的值。

fOutCapp = function(data){
  x = data$value
  qnt = quantile(x, probs=c(.25, .75), na.rm = T)
  caps = quantile(x, probs=c(.05, .95), na.rm = T)
  H = 1.5 * IQR(x, na.rm = T)
  He = 3 * IQR(x, na.rm = T)
  is.outlier = (x < (qnt[1] - H)) | (x > (qnt[2] + H))
  x[x < (qnt[1] - H)] <- caps[1]
  x[x > (qnt[2] + H)] <- caps[2]
  data %>% group_by(COND) %>% 
    mutate(
      is.outlier = is.outlier,
      is.extreme = (x < (qnt[1] - He)) | (x > (qnt[2] + He)),
      cap.value = x
    )
}

让我们看看它是否有效

data %>% fOutCapp() %>% filter(is.outlier)
# A tibble: 1 x 8
# ID    GR    SES   COND    value is.outlier is.extreme cap.value
# <fct> <fct> <fct> <fct>   <dbl> <lgl>      <lgl>          <dbl>
#   1 11    RP    V     NEG-NOC  8.16 TRUE       FALSE           4.95
data %>% fOutCapp()
# A tibble: 25 x 8
# ID    GR    SES   COND      value is.outlier is.extreme cap.value
# <fct> <fct> <fct> <fct>     <dbl> <lgl>      <lgl>          <dbl>
#   1 01    RP    V     NEG-NOC -11.1   FALSE      FALSE        -11.1  
# 2 04    RP    V     NEG-NOC   0.239 FALSE      FALSE          0.239
# 3 06    RP    V     NEG-NOC  -2.96  FALSE      FALSE         -2.96 
# 4 07    RP    V     NEG-NOC   1.09  FALSE      FALSE          1.09 
# 5 08    RP    V     NEG-NOC   2.99  FALSE      FALSE          2.99 
# 6 09    RP    V     NEG-NOC   5.42  FALSE      FALSE          5.42 
# 7 10    RP    V     NEG-NOC  -2.83  FALSE      FALSE         -2.83 
# 8 11    RP    V     NEG-NOC   8.16  TRUE       FALSE          4.95 
# 9 12    RP    V     NEG-NOC  -9.83  FALSE      FALSE         -9.83 
# 10 13    RP    V     NEG-NOC   2.12  FALSE      FALSE          2.12 
# ... with 15 more rows

但是请注意,变量 data 中的数据分组在变量 COND 之后。因此,让我们再写一个小函数,它将对每个组执行 fOutCapp

fOutCappGroup = function(data) data %>% group_by(COND) %>% 
  group_modify(~fOutCapp(.x))

df$data[[1]] %>% fOutCappGroup()
# # A tibble: 75 x 8
# # Groups:   COND [3]
# COND    ID    GR    SES     value is.outlier is.extreme cap.value
# <fct>   <fct> <fct> <fct>   <dbl> <lgl>      <lgl>          <dbl>
#   1 NEG-CTR 01    RP    V     -11.6   FALSE      FALSE        -11.6  
# 2 NEG-CTR 04    RP    V      -0.314 FALSE      FALSE         -0.314
# 3 NEG-CTR 06    RP    V      -0.214 FALSE      FALSE         -0.214
# 4 NEG-CTR 07    RP    V      -2.83  FALSE      FALSE         -2.83 
# 5 NEG-CTR 08    RP    V       4.24  FALSE      FALSE          4.24 
# 6 NEG-CTR 09    RP    V       9.57  FALSE      FALSE          9.57 
# 7 NEG-CTR 10    RP    V      -6.13  FALSE      FALSE         -6.13 
# 8 NEG-CTR 11    RP    V       0.529 FALSE      FALSE          0.529
# 9 NEG-CTR 12    RP    V      -7.74  FALSE      FALSE         -7.74 
# 10 NEG-CTR 13    RP    V       1.27  FALSE      FALSE          1.27 
# # ... with 65 more rows

宾果游戏。一切都很好。现在我们只需要做一个简单的突变。

df %>% group_by(signals) %>% 
  mutate(data = map(data, ~fOutCappGroup(.x))) %>% 
  unnest(data)

输出

# A tibble: 450 x 9
# Groups:   signals [6]
   signals COND    ID    GR    SES     value is.outlier is.extreme cap.value
   <chr>   <fct>   <fct> <fct> <fct>   <dbl> <lgl>      <lgl>          <dbl>
 1 P3FCz   NEG-CTR 01    RP    V     -11.6   FALSE      FALSE        -11.6  
 2 P3FCz   NEG-CTR 04    RP    V      -0.314 FALSE      FALSE         -0.314
 3 P3FCz   NEG-CTR 06    RP    V      -0.214 FALSE      FALSE         -0.214
 4 P3FCz   NEG-CTR 07    RP    V      -2.83  FALSE      FALSE         -2.83 
 5 P3FCz   NEG-CTR 08    RP    V       4.24  FALSE      FALSE          4.24 
 6 P3FCz   NEG-CTR 09    RP    V       9.57  FALSE      FALSE          9.57 
 7 P3FCz   NEG-CTR 10    RP    V      -6.13  FALSE      FALSE         -6.13 
 8 P3FCz   NEG-CTR 11    RP    V       0.529 FALSE      FALSE          0.529
 9 P3FCz   NEG-CTR 12    RP    V      -7.74  FALSE      FALSE         -7.74 
10 P3FCz   NEG-CTR 13    RP    V       1.27  FALSE      FALSE          1.27 
# ... with 440 more rows

这就是你的句子完成的方式。我们不仅确定了异常值,而且还对它们应用了上限。现在决定是使用 value 变量还是 cap.value 变量进行进一步分析。决定权在你。

@little_statistician

的小更新

首先,我们将加载您的所有数据。

#Loading libraries
library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)

#Upload data
df_join <- read_excel("df_join.xlsx")

df = df_join  %>%
  mutate_at(vars(ID:COND), factor) %>%
  pivot_longer(P3FCz:LPP2Pz, names_to = "signals") %>%
  group_by(signals) %>%
  nest()

现在让我们再次定义 fOutCappfOutCappGroup 函数。请注意,在 fOutCapp 的原始版本中不需要 group_by 函数。

fOutCapp = function(data){
  x = data$value
  qnt = quantile(x, probs=c(.25, .75), na.rm = T)
  caps = quantile(x, probs=c(.05, .95), na.rm = T)
  H = 1.5 * IQR(x, na.rm = T)
  He = 3 * IQR(x, na.rm = T)
  is.outlier = (x < (qnt[1] - H)) | (x > (qnt[2] + H))
  x[x < (qnt[1] - H)] <- caps[1]
  x[x > (qnt[2] + H)] <- caps[2]
  data %>%  
    mutate(
      is.outlier = is.outlier,
      is.extreme = (x < (qnt[1] - He)) | (x > (qnt[2] + He)),
      cap.value = x
    )
}

fOutCappGroup = function(data) data %>% group_by(COND) %>% 
  group_modify(~fOutCapp(.x))

现在是变异的时候了。

df = df %>% group_by(signals) %>% 
  mutate(data = map(data, ~fOutCappGroup(.x))) %>% 
  unnest(data) %>% # step 1
  mutate(old.value = value,
         value = cap.value) %>% #Step 2
  nest(data=COND:old.value)  #Step 3

了解这里的真实情况非常重要。因此,在第 1 步中,我们将 tibblesignals 变量分组。这很简单,你当然明白。在第 2 步中,我们改变 data 变量,这是一个由各个信号的数据组成的列表。

第 2 步后的输出

# A tibble: 12 x 2
# Groups:   signals [12]
   signals     data                 
   <chr>       <list>               
 1 P3FCz       <grouped_df [75 x 8]>
 2 P3Cz        <grouped_df [75 x 8]>
 3 P3Pz        <grouped_df [75 x 8]>
 4 LPPearlyFCz <grouped_df [75 x 8]>
 5 LPPearlyCz  <grouped_df [75 x 8]>
 6 LPPearlyPz  <grouped_df [75 x 8]>
 7 LPP1FCz     <grouped_df [75 x 8]>
 8 LPP1Cz      <grouped_df [75 x 8]>
 9 LPP1Pz      <grouped_df [75 x 8]>
10 LPP2FCz     <grouped_df [75 x 8]>
11 LPP2Cz      <grouped_df [75 x 8]>
12 LPP2Pz      <grouped_df [75 x 8]>

这样你内心的小毛病就有了新的变数。您将在步骤 3 中的 unnest 之后看到它。

步骤 3 后的输出

# A tibble: 900 x 9
# Groups:   signals [12]
   signals COND    ID    GR    SES     value is.outlier is.extreme cap.value
   <chr>   <fct>   <fct> <fct> <fct>   <dbl> <lgl>      <lgl>          <dbl>
 1 P3FCz   NEG-CTR 01    RP    V     -11.6   FALSE      FALSE        -11.6  
 2 P3FCz   NEG-CTR 04    RP    V      -0.314 FALSE      FALSE         -0.314
 3 P3FCz   NEG-CTR 06    RP    V      -0.214 FALSE      FALSE         -0.214
 4 P3FCz   NEG-CTR 07    RP    V      -2.83  FALSE      FALSE         -2.83 
 5 P3FCz   NEG-CTR 08    RP    V       4.24  FALSE      FALSE          4.24 
 6 P3FCz   NEG-CTR 09    RP    V       9.57  FALSE      FALSE          9.57 
 7 P3FCz   NEG-CTR 10    RP    V      -6.13  FALSE      FALSE         -6.13 
 8 P3FCz   NEG-CTR 11    RP    V       0.529 FALSE      FALSE          0.529
 9 P3FCz   NEG-CTR 12    RP    V      -7.74  FALSE      FALSE         -7.74 
10 P3FCz   NEG-CTR 13    RP    V       1.27  FALSE      FALSE          1.27 
# ... with 890 more rows

并且由于您已经有了一个非常好的函数,可以生成具有不同统计数据的漂亮 boxplot-violin 图,让我们做一个小的突变(第 4 步),用 cap.value 替换 value

第 4 步后的输出

# A tibble: 900 x 10
# Groups:   signals [12]
   signals COND    ID    GR    SES     value is.outlier is.extreme cap.value old.value
   <chr>   <fct>   <fct> <fct> <fct>   <dbl> <lgl>      <lgl>          <dbl>     <dbl>
 1 P3FCz   NEG-CTR 01    RP    V     -11.6   FALSE      FALSE        -11.6     -11.6  
 2 P3FCz   NEG-CTR 04    RP    V      -0.314 FALSE      FALSE         -0.314    -0.314
 3 P3FCz   NEG-CTR 06    RP    V      -0.214 FALSE      FALSE         -0.214    -0.214
 4 P3FCz   NEG-CTR 07    RP    V      -2.83  FALSE      FALSE         -2.83     -2.83 
 5 P3FCz   NEG-CTR 08    RP    V       4.24  FALSE      FALSE          4.24      4.24 
 6 P3FCz   NEG-CTR 09    RP    V       9.57  FALSE      FALSE          9.57      9.57 
 7 P3FCz   NEG-CTR 10    RP    V      -6.13  FALSE      FALSE         -6.13     -6.13 
 8 P3FCz   NEG-CTR 11    RP    V       0.529 FALSE      FALSE          0.529     0.529
 9 P3FCz   NEG-CTR 12    RP    V      -7.74  FALSE      FALSE         -7.74     -7.74 
10 P3FCz   NEG-CTR 13    RP    V       1.27  FALSE      FALSE          1.27      1.27 
# ... with 890 more rows

最后,让我们用第 5 步中的变量 data 将其全部恢复到原来的形式。

第 5 步后的输出

# A tibble: 12 x 2
# Groups:   signals [12]
   signals     data             
   <chr>       <list>           
 1 P3FCz       <tibble [75 x 9]>
 2 P3Cz        <tibble [75 x 9]>
 3 P3Pz        <tibble [75 x 9]>
 4 LPPearlyFCz <tibble [75 x 9]>
 5 LPPearlyCz  <tibble [75 x 9]>
 6 LPPearlyPz  <tibble [75 x 9]>
 7 LPP1FCz     <tibble [75 x 9]>
 8 LPP1Cz      <tibble [75 x 9]>
 9 LPP1Pz      <tibble [75 x 9]>
10 LPP2FCz     <tibble [75 x 9]>
11 LPP2Cz      <tibble [75 x 9]>
12 LPP2Pz      <tibble [75 x 9]>

那么现在让我们做一个图表!

#Function to special boxplot3
SpecBoxplot3 = function(data, signal, parametric = FALSE, autor = "G. Anonim"){
  if(parametric) {
    pwc = data %>%
      pairwise_t_test(value~COND, paired = TRUE,
                      p.adjust.method = "bonferroni") %>%
      add_xy_position(x = "COND") %>%
      mutate(COND="NEG-CTR",
             lab = paste(p, " - ", p.adj.signif))
    res.test = data %>% anova_test(value~COND)
  } else {
    pwc = data %>% pairwise_wilcox_test(value~COND) %>%
      add_xy_position(x = "COND") %>%
      mutate(COND="NEG-CTR",
             lab = paste(p, " - ", p.adj.signif))
    res.test = data %>% kruskal_test(value~COND)
  }
  
  data %>% ggplot(aes(COND, value, fill=COND))+
    geom_violin(alpha=0.2)+
    geom_boxplot(outlier.shape = 23,
                 outlier.size = 3,
                 alpha=0.6)+
    geom_jitter(shape=21, width =0.1)+
    stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
    ylab(signal)+
    labs(title = get_test_label(res.test, detailed = TRUE),
         subtitle = get_pwc_label(pwc),
         caption = autor)
}


#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot3("P3FCz", TRUE)
df$data[[1]] %>% SpecBoxplot3("P3FCz", FALSE)

正如您在图表上看到的,不再有异常值!

现在我们准备绘制每个信号!

#A function that creates a special boxplot3 and adds it to a data frame
AddSignalBoxplot3 = function(df, signal, printPlot=TRUE) {
  plot1 = SpecBoxplot3(df$data[[1]], signal, TRUE)
  plot2 = SpecBoxplot3(df$data[[1]], signal, FALSE)
  if(printPlot) print(plot1)
  if(printPlot) print(plot2)
  df %>% mutate(boxplot1 = list(plot1),
                boxplot2 = list(plot2),
  )
}

#Added special boxplot3
df %>% group_by(signals) %>%
  group_modify(~AddSignalBoxplot3(.x, .y))

祝你进一步分析顺利!!

最后更新

create.plot2 = function(df, group){
  data = df$data[[1]]
  minv = min(data$value)
  maxv = max(data$value)
  df.stat = data %>% group_by(COND) %>% 
    summarise(
      n = n(),
      mean = mean(value),
      sd = sd(value),
      min = minv,
      max = maxv,
      x = seq(min, max, length.out = n*100),
      value = dnorm(x, mean, sd) 
    ) 
  data %>% ggplot(aes(value, fill=COND))+
    geom_histogram(aes(y=..density..), colour="black", fill="white", bins = 30)+
    geom_density(alpha=.2, fill="red", col="red")+
    geom_line(aes(x, value), data=df.stat, col="blue")+
    xlab(group)+
    facet_grid(cols = vars(COND))
}

df %>% group_by(signals) %>% 
  group_map(create.plot2)