如何在R中将数据从一列移动到另一列

How to move data from one column to another in R

由于基本表格填写不正确,我正在尝试将数据从一列移动到另一列。

在表格中,它要求提供有关家庭的信息,并询问每个成员的年龄 (AGE) 和性别 (SEX),每个家庭最多允许 5 人。然而,一些用户已经填写了人 1,3 和 4 的信息,但没有填写人 2 的任何信息,因为他们填写错误的人 2,划掉了详细信息并将人 2 的详细信息填入了人 3 的框等

数据看起来像这样(此数据中参考 1 和 5 正确,其他所有错误)

df <- data.frame(
  ref = c(1, 2, 3, 4, 5, 6),
  AGE1 = c(45, 36, 26, 47, 24, NA),
  AGE2 = c(NA, 24, NA, 13, 57, 28),
  AGE3 = c(NA, NA, 35, NA, NA, 26),
  AGE4 = c(NA, NA, 15, 11, NA, NA),
  AGE5 = c(NA, 15, NA, NA, NA, NA),
  SEX1 = c("M", "F", "M", "M", "M", NA),
  SEX2 = c(NA, "M", NA, "F", "F", "F"),
  SEX3 = c(NA, NA, "M", NA, NA, "M"),
  SEX4 = c(NA, NA, "F", "F", NA, NA),
  SEX5 = c(NA, "F", NA, NA, NA, NA)
)

这是 table 目前的样子 (我用 - 替换了 NA 以便于阅读)

参考 AGE1 AGE2 AGE3 AGE4 5 岁 性别1 SEX2 SEX3 SEX4 SEX5
1 45 - - - - - - - -
2 36 24 - - 15 F - - F
3 26 - 35 15 - - F -
4 47 13 - 11 - F - F -
5 24 57 - - - F - - -
6 - 28 26 - - - F - -

但我希望它看起来像这样

参考 AGE1 AGE2 AGE3 AGE4 5 岁 性别1 SEX2 SEX3 SEX4 SEX5
1 45 - - - - - - - -
2 36 24 15 - - F F - -
3 26 35 15 - - F - -
4 47 13 11 - - F F - -
5 24 57 - - - F - - -
6 28 26 - - - F - - -

有没有办法使用 dplyr 更正此问题?如果不是,R 中是否还有另一种方法可以更正数据

这是使用 dplyrtidyr 的方法。该方法涉及将数据转换为更长的格式,将 NA 值排序到末尾,对列名称重新编号,然后再次转换为宽格式。

library(dplyr)
library(tidyr)

df <-  data.frame(ref, AGE1, AGE2, AGE3, AGE4, AGE5,
                  SEX1, SEX2, SEX3, SEX4, SEX5)
df %>% 
  mutate(across(starts_with("AGE"), as.character)) %>% 
  pivot_longer(2:11) %>%
  separate(name, into = c("cat", "num"), 3) %>%
  arrange(is.na(value)) %>%
  group_by(ref, cat) %>%
  mutate(num = seq_along(value)) %>%
  ungroup() %>%
  arrange(cat) %>%
  unite(name, cat, num, sep = "") %>%
  pivot_wider(id_cols = ref) %>%
  mutate(across(starts_with("AGE"), as.numeric))


# A tibble: 6 x 11
    ref  AGE1  AGE2  AGE3  AGE4  AGE5 SEX1  SEX2  SEX3  SEX4  SEX5 
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
1     1    45    NA    NA    NA    NA M     NA    NA    NA    NA   
2     2    36    24    15    NA    NA F     M     F     NA    NA   
3     3    26    35    15    NA    NA M     M     F     NA    NA   
4     4    47    13    11    NA    NA M     F     F     NA    NA   
5     5    24    57    NA    NA    NA M     F     NA    NA    NA   
6     6    28    26    NA    NA    NA F     M     NA    NA    NA  

试试下面的基本代码

u1 <- reshape(
  setNames(df, sub("(\d)", ".\1", names(df))),
  direction = "long",
  idvar = "ref",
  varying = -1
)

u2  <- reshape(
  transform(
    u1[with(u1, order(is.na(AGE), is.na(SEX))), ],
    time = ave(time, ref, FUN = seq_along)
  ),
  direction = "wide",
  idvar = "ref"
)

out <- u2[match(names(df),sub("\.","",names(u2)))]

你会得到

> out
    ref AGE.1 AGE.2 AGE.3 AGE.4 AGE.5 SEX.1 SEX.2 SEX.3 SEX.4 SEX.5
1.1   1    45    NA    NA    NA    NA     M  <NA>  <NA>  <NA>  <NA>
2.1   2    36    24    15    NA    NA     F     M     F  <NA>  <NA>
3.1   3    26    35    15    NA    NA     M     M     F  <NA>  <NA>
4.1   4    47    13    11    NA    NA     M     F     F  <NA>  <NA>
5.1   5    24    57    NA    NA    NA     M     F  <NA>  <NA>  <NA>
6.2   6    28    26    NA    NA    NA     F     M  <NA>  <NA>  <NA>

数据

df <- data.frame(
  ref = c(1, 2, 3, 4, 5, 6),
  AGE1 = c(45, 36, 26, 47, 24, NA),
  AGE2 = c(NA, 24, NA, 13, 57, 28),
  AGE3 = c(NA, NA, 35, NA, NA, 26),
  AGE4 = c(NA, NA, 15, 11, NA, NA),
  AGE5 = c(NA, 15, NA, NA, NA, NA),
  SEX1 = c("M", "F", "M", "M", "M", NA),
  SEX2 = c(NA, "M", NA, "F", "F", "F"),
  SEX3 = c(NA, NA, "M", NA, NA, "M"),
  SEX4 = c(NA, NA, "F", "F", NA, NA),
  SEX5 = c(NA, "F", NA, NA, NA, NA)
)

这是使用 dplyrtidyr 库的方法。

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(cols = -ref, 
               names_to = c('.value', 'num'), 
               names_pattern = '([A-Z]+)(\d+)') %>%
  arrange(ref, AGE, SEX) %>%
  group_by(ref) %>%
  mutate(num = row_number()) %>%
  ungroup %>%
  pivot_wider(names_from = num, values_from = c(AGE, SEX)) 

#    ref AGE_1 AGE_2 AGE_3 AGE_4 AGE_5 SEX_1 SEX_2 SEX_3 SEX_4 SEX_5
#  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#1     1    45    NA    NA    NA    NA M     NA    NA    NA    NA   
#2     2    15    24    36    NA    NA F     M     F     NA    NA   
#3     3    15    26    35    NA    NA F     M     M     NA    NA   
#4     4    11    13    47    NA    NA F     F     M     NA    NA   
#5     5    24    57    NA    NA    NA M     F     NA    NA    NA   
#6     6    26    28    NA    NA    NA M     F     NA    NA    NA    

这是一个使用包 dedupewider:

的解决方案
library(dedupewider)

df <- data.frame(
  ref = c(1, 2, 3, 4, 5, 6),
  AGE1 = c(45, 36, 26, 47, 24, NA),
  AGE2 = c(NA, 24, NA, 13, 57, 28),
  AGE3 = c(NA, NA, 35, NA, NA, 26),
  AGE4 = c(NA, NA, 15, 11, NA, NA),
  AGE5 = c(NA, 15, NA, NA, NA, NA),
  SEX1 = c("M", "F", "M", "M", "M", NA),
  SEX2 = c(NA, "M", NA, "F", "F", "F"),
  SEX3 = c(NA, NA, "M", NA, NA, "M"),
  SEX4 = c(NA, NA, "F", "F", NA, NA),
  SEX5 = c(NA, "F", NA, NA, NA, NA)
)

age_moved <- na_move(df, cols = names(df)[grepl("^AGE\d$", names(df))]) # 'right' direction is by default

sex_moved <- na_move(age_moved, cols = names(df)[grepl("^SEX\d$", names(df))])

sex_moved

#>   ref AGE1 AGE2 AGE3 AGE4 AGE5 SEX1 SEX2 SEX3 SEX4 SEX5
#> 1   1   45   NA   NA   NA   NA    M <NA> <NA>   NA   NA
#> 2   2   36   24   15   NA   NA    F    M    F   NA   NA
#> 3   3   26   35   15   NA   NA    M    M    F   NA   NA
#> 4   4   47   13   11   NA   NA    M    F    F   NA   NA
#> 5   5   24   57   NA   NA   NA    M    F <NA>   NA   NA
#> 6   6   28   26   NA   NA   NA    F    M <NA>   NA   NA