R - 每行的非 NA 摘要

R - Summary of non NAs for each row

我有一个与此非常相似的问题:

我只想在多列中显示结果,我还需要有实际值。 重要的是,当值不是数字,而是字符时,它也可以工作!我有另一个 df,其中有刺而不是数字!

     AFA  AFI  AII  AMA  AMI  AMU  BFA  BFI  BFU  BII
 1: 0.79   NA   NA 0.58   NA   NA   NA   NA 0.75   NA
 2:   NA   NA   NA   NA   NA 0.78   NA -0.5   NA   NA
 3:   NA   NA   NA   NA   NA   NA 0.79 -0.5   NA   NA
 4:   NA   NA   NA   NA   NA   NA   NA -0.5   NA   NA
 5:   NA   NA   NA   NA 0.63   NA   NA   NA   NA   NA
 6:   NA   NA   NA   NA   NA   NA 0.83   NA   NA   NA
 7: 0.63   NA   NA   NA   NA   NA   NA   NA   NA 0.82
 8:   NA   NA   NA   NA 0.63   NA   NA   NA   NA   NA
 9:   NA   NA 0.54 0.59   NA   NA   NA   NA   NA   NA
10:   NA 0.51   NA   NA   NA   NA   NA   NA   NA   NA

输出:

   V1_Code V1_Val V2_Code V2_Val V3_Code V3_Val
1:     AFA   0.79     AMA   0.58     BFU   0.75
2:     AMU   0.78     BFI   -0.5      NA     NA
3:     BFA   0.79     BFI   -0.5      NA     NA
       and so on....

这是我的 df:

    structure(list(AFA = c(0.79, NA, NA, NA, NA, NA, 0.63, NA, NA, 
    NA), AFI = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.51), AII = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, 0.54, NA), AMA = c(0.58, NA, NA, 
    NA, NA, NA, NA, NA, 0.59, NA), AMI = c(NA, NA, NA, NA, 0.63, 
    NA, NA, 0.63, NA, NA), AMU = c(NA, 0.78, NA, NA, NA, NA, NA, 
    NA, NA, NA), BFA = c(NA, NA, 0.79, NA, NA, 0.83, NA, NA, NA, 
    NA), BFI = c(NA, -0.5, -0.5, -0.5, NA, NA, NA, NA, NA, NA), BFU = c(0.75, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), BII = c(NA, NA, NA, NA, 
NA, NA, 0.82, NA, NA, NA)), row.names = c(NA, -10L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x000001fc6b791ef0>)

这样做:

  1. 把它变成 tibble
  2. 转换为长格式
  3. 使用is.na函数过滤
  4. 随心所欲继续

下面的代码

library(tidyverse)
df = structure(list(AFA = c(0.79, NA, NA, NA, NA, NA, 0.63, NA, NA, NA), 
                    AFI = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.51), 
                    AII = c(NA,NA, NA, NA, NA, NA, NA, NA, 0.54, NA), 
                    AMA = c(0.58, NA, NA, NA, NA, NA, NA, NA, 0.59, NA), 
                    AMI = c(NA, NA, NA, NA, 0.63, NA, NA, 0.63, NA, NA), 
                    AMU = c(NA, 0.78, NA, NA, NA, NA, NA, NA, NA, NA), 
                    BFA = c(NA, NA, 0.79, NA, NA, 0.83, NA, NA, NA, NA), 
                    BFI = c(NA, -0.5, -0.5, -0.5, NA, NA, NA, NA, NA, NA), 
                    BFU = c(0.75, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
                    BII = c(NA, NA, NA, NA, NA, NA, 0.82, NA, NA, NA)), 
               row.names = c(NA, -10L), class = c("data.table","data.frame"))

df %>% tibble() %>% 
  pivot_longer(everything(), names_to = "key", values_to = "val") %>% 
  filter(!is.na(val))

输出

# A tibble: 16 x 2
   key     val
   <chr> <dbl>
 1 AFA    0.79
 2 AMA    0.58
 3 BFU    0.75
 4 AMU    0.78
 5 BFI   -0.5 
 6 BFA    0.79
 7 BFI   -0.5 
 8 BFI   -0.5 
 9 AMI    0.63
10 BFA    0.83
11 AFA    0.63
12 BII    0.82
13 AMI    0.63
14 AII    0.54
15 AMA    0.59
16 AFI    0.51
library(dplyr)
library(tidyr)

mydf |>
  mutate(id = row_number()) |> 
  pivot_longer(-id, names_to = "Code", values_to = "Val") |> 
  drop_na() |> 
  group_by(id) |> 
  mutate(col_num = row_number()) |> 
  ungroup() |> 
  pivot_wider(id_cols = id, values_from = c(Code, Val), names_from = col_num,
              names_glue = "V{col_num}_{.value}") |> 
  select(-id) |> 
  relocate(sort(tidyselect::peek_vars()))
    
# A tibble: 10 x 6
   V1_Code V1_Val V2_Code V2_Val V3_Code V3_Val
   <chr>    <dbl> <chr>    <dbl> <chr>    <dbl>
 1 AFA       0.79 AMA       0.58 BFU       0.75
 2 AMU       0.78 BFI      -0.5  NA       NA   
 3 BFA       0.79 BFI      -0.5  NA       NA   
 4 BFI      -0.5  NA       NA    NA       NA   
 5 AMI       0.63 NA       NA    NA       NA   
 6 BFA       0.83 NA       NA    NA       NA   
 7 AFA       0.63 BII       0.82 NA       NA   
 8 AMI       0.63 NA       NA    NA       NA   
 9 AII       0.54 AMA       0.59 NA       NA   
10 AFI       0.51 NA       NA    NA       NA   

这是另一种方法:

  1. 基本概念是将列名连接到一个新列 Code 中,对于 Val 的值也是如此。
  2. 然后用 strsplitunnest 拆分连接的值,将它们放入自己的行中。
  3. 到目前为止,Phil 和 Marek Fiotka 已经实施了 pivot_longer,这可以做得更短。
  4. id
  5. 分组后,用 mutate(group_id = row_number()) 分配新的 id_group
  6. 然后 pivot_wider 粘贴名称。
library(tidyverse)

    df %>% 
        mutate(id = row_number(),
               across(-id, ~case_when(!is.na(.) ~ cur_column()), .names = 'name_{col}'),
               across(1:10, as.character)) %>%
        unite(Code, starts_with('name'), na.rm = TRUE, sep = ' ') %>% 
        unite(Val, AFA:BII, na.rm = TRUE, sep = " ") %>% 
        mutate(across(-id, ~strsplit(as.character(.), " "))) %>% 
        unnest(cols = c(Code, Val)) %>% 
        group_by(id) %>% 
        mutate(group_id = row_number()) %>% 
        pivot_wider(id_cols = id, values_from = c(Code, Val), names_from = group_id,
                    names_glue = "V{group_id}_{.value}") %>% 
        ungroup() %>% 
        select(V1_Code, V1_Val, V2_Code, V2_Val, V3_Code, V3_Val, -id)

输出:

   V1_Code V1_Val V2_Code V2_Val V3_Code V3_Val
   <chr>   <chr>  <chr>   <chr>  <chr>   <chr> 
 1 AFA     0.79   AMA     0.58   BFU     0.75  
 2 AMU     0.78   BFI     -0.5   NA      NA    
 3 BFA     0.79   BFI     -0.5   NA      NA    
 4 BFI     -0.5   NA      NA     NA      NA    
 5 AMI     0.63   NA      NA     NA      NA    
 6 BFA     0.83   NA      NA     NA      NA    
 7 AFA     0.63   BII     0.82   NA      NA    
 8 AMI     0.63   NA      NA     NA      NA    
 9 AII     0.54   AMA     0.59   NA      NA    
10 AFI     0.51   NA      NA     NA      NA