R按行替换最小值的第一个实例

R rowwise replace the first instance of the minimum

我怎样才能做到以下几点:

  1. 用 NA 替换所有小于 6 的值,
  2. 如果行中只有一个 NA,将最小值的第一个实例替换为 -99?

一些包含 ID 变量和总计列的数据:

library(tidyverse)
df <- data.frame(id = c(1,2,3,4,5), a = c(10,12,4,17,3), b = c(9,12,3,20,6), c = c(2,2,10,10,10), d = c(12,16,12,10,12))
df$total <- apply(df[,c(2:5)], 1, sum)

给予

  id  a   b   c   d   total
  1   10  9   2   12   33
  2   12  12  2   16   42
  3   4   3   10  12   29
  4   17  20  10  10   57
  5   3   6   10  12   31

我想要的输出是

 id    a    b     c     d   total
 1    10   -99    NA    12    33
 2   -99    12    NA    16    42
 3    NA    NA    10    12    29
 4    17    20    10    10    57
 5    NA   -99    10    12    31

我的尝试

df_mod <- df %>%

  # Make <6 NA
  mutate(
    across(
      .cols = 'a':'total',
      ~case_when(
        .x < 6 ~ as.numeric(NA),
        TRUE ~ .x
      )
    )
  ) %>%
  # Add a count of NAs
  rowwise() %>%
  mutate(Count_NA = sum(is.na(cur_data()))) %>%
  ungroup() 

# Transpose and get row minimum
df_mod2 <- t(df_mod[,-c(1,ncol(df_mod))]) %>%
  apply(., 2, function(a){
      min <- min(a, na.rm = TRUE)
    }
  ) %>% 
  cbind(df_mod, .) %>%
  rename(., min = .) %>%
  tibble(.)
  
# If count_NA  = 1 replace the first instance of min
df_mod2 %>%
  rowwise() %>%
  mutate(
    across(
      .cols = 'a':'total',
      ~case_when(
        Count_NA == 1 & .x == min ~ replace(.x, first(match(min, .x)), -99),
        TRUE ~ .x)
    )
  ) %>%
  select(-'Count_NA', -'min')

给出以下内容

     id    a    b     c     d   total
     1    10   -99    NA    12    33
     2   -99   -99    NA    16    42
     3    NA    NA    10    12    29
     4    17    20    10    10    57
     5    NA   -99    10    12    31

谢谢

如果您愿意旋转而不是按行工作,那么这个解决方案就可以了。

library(dplyr)
df %>% 
  pivot_longer(names_to = 'col',
               values_to = 'val',
               -c(id, total)) %>%
  group_by(id) %>%
  mutate(val2 = rank(val, ties.method = 'first'),
         val = ifelse(val < 6, NA , val),
         val = ifelse(sum(is.na(val)) == 1 & val2 == 2, -99, val)) %>% 
  select(-val2) %>%
  pivot_wider(names_from = col, 
              values_from = val) %>% 
  relocate(total, .after = "d")

结果如下:

# A tibble: 5 × 6
# Groups:   id [5]
     id     a     b     c     d total
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1    10   -99    NA    12    33
2     2   -99    12    NA    16    42
3     3    NA    NA    10    12    29
4     4    17    20    10    10    57
5     5    NA   -99    10    12    31

不清楚“第二个”最小值是什么意思,因为您替换了最小值。您可以使用 data.table:

library(data.table)

setDT(df)[
  , 
  (cols) := transpose(
    lapply(
      transpose(lapply(.SD, function(x) fifelse(x < 6, NA_real_, x))), 
      function(x) if(sum(is.na(x)) == 1) replace(x, which.min(x), -99) else x
    )
  ),
  .SDcols = setdiff(names(df), "id")
]