重塑数据框 - 根据 R 中的数据可用性创建行
Reshape dataframe - create rows as per data availability in R
我想将 original 数据框重塑为 target 数据框,如下所示。
但首先,要重新创建数据帧:
original <- data.frame(caseid = c("id101", 'id201', 'id202', 'id301', 'id302'),
age_child1 = c('3', '5', '8', NA, NA),
age_child2 = c('1', '7', NA, NA, NA),
age_child3 = c('2', '6', '8', '3', NA))
target <- data.frame(caseid = c('id101_1', 'id101_2', 'id101_3', 'id201_1', 'id201_2', 'id201_3', 'id202_1', 'id202_3', 'id301_3'),
age = c(3, 1, 2, 5, 7, 6, 8, 8, 3))
caseid 列代表母亲。我想为每个 children 创建一个新的 caseid 行,并将相应的 'age' 值添加到 age 柱子。如果没有可用的 'age' 值,则意味着没有 n child 并且不应创建新行。
感谢您的帮助!
您可以使用 pivot_longer()
及其各种有用的选项:
pivot_longer(original, cols = starts_with("age"), names_prefix = "age_child",values_to = "age",values_transform = as.integer) %>%
filter(!is.na(age)) %>%
mutate(caseid = paste0(caseid,"_",name)) %>%
select(-name)
输出:
# A tibble: 9 × 2
caseid age
<chr> <int>
1 id101_1 3
2 id101_2 1
3 id101_3 2
4 id201_1 5
5 id201_2 7
6 id201_3 6
7 id202_1 8
8 id202_3 8
9 id301_3 3
original %>%
pivot_longer(-caseid, names_to = 'child', names_pattern = '([0-9]+$)',
values_to = 'age', values_drop_na = TRUE)%>%
unite(caseid, caseid, child)
# A tibble: 9 x 2
caseid age
<chr> <chr>
1 id101_1 3
2 id101_2 1
3 id101_3 2
4 id201_1 5
5 id201_2 7
6 id201_3 6
7 id202_1 8
8 id202_3 8
9 id301_3 3
使用 reshape
形式基数 r ,
original <- data.frame(caseid = c("id101", 'id201', 'id202', 'id301', 'id302'),
age_child1 = c('3', '5', '8', NA, NA),
age_child2 = c('1', '7', NA, NA, NA),
age_child3 = c('2', '6', '8', '3', NA))
a <- reshape(original , varying = c("age_child1" , "age_child2" , "age_child3") ,
direction = "long" ,
times = c("_1" , "_2" , "_3") ,
v.names = "age")
a$caseid <- paste0(a$caseid , a$time)
a <- a[order(a$caseid) , ][c("caseid" , "age")]
a <- na.omit(a)
row.names(a) <- NULL
a
#> caseid age
#> 1 id101_1 3
#> 2 id101_2 1
#> 3 id101_3 2
#> 4 id201_1 5
#> 5 id201_2 7
#> 6 id201_3 6
#> 7 id202_1 8
#> 8 id202_3 8
#> 9 id301_3 3
由 reprex package (v2.0.1)
创建于 2022-06-01
我想将 original 数据框重塑为 target 数据框,如下所示。
但首先,要重新创建数据帧:
original <- data.frame(caseid = c("id101", 'id201', 'id202', 'id301', 'id302'),
age_child1 = c('3', '5', '8', NA, NA),
age_child2 = c('1', '7', NA, NA, NA),
age_child3 = c('2', '6', '8', '3', NA))
target <- data.frame(caseid = c('id101_1', 'id101_2', 'id101_3', 'id201_1', 'id201_2', 'id201_3', 'id202_1', 'id202_3', 'id301_3'),
age = c(3, 1, 2, 5, 7, 6, 8, 8, 3))
caseid 列代表母亲。我想为每个 children 创建一个新的 caseid 行,并将相应的 'age' 值添加到 age 柱子。如果没有可用的 'age' 值,则意味着没有 n child 并且不应创建新行。
感谢您的帮助!
您可以使用 pivot_longer()
及其各种有用的选项:
pivot_longer(original, cols = starts_with("age"), names_prefix = "age_child",values_to = "age",values_transform = as.integer) %>%
filter(!is.na(age)) %>%
mutate(caseid = paste0(caseid,"_",name)) %>%
select(-name)
输出:
# A tibble: 9 × 2
caseid age
<chr> <int>
1 id101_1 3
2 id101_2 1
3 id101_3 2
4 id201_1 5
5 id201_2 7
6 id201_3 6
7 id202_1 8
8 id202_3 8
9 id301_3 3
original %>%
pivot_longer(-caseid, names_to = 'child', names_pattern = '([0-9]+$)',
values_to = 'age', values_drop_na = TRUE)%>%
unite(caseid, caseid, child)
# A tibble: 9 x 2
caseid age
<chr> <chr>
1 id101_1 3
2 id101_2 1
3 id101_3 2
4 id201_1 5
5 id201_2 7
6 id201_3 6
7 id202_1 8
8 id202_3 8
9 id301_3 3
使用 reshape
形式基数 r ,
original <- data.frame(caseid = c("id101", 'id201', 'id202', 'id301', 'id302'),
age_child1 = c('3', '5', '8', NA, NA),
age_child2 = c('1', '7', NA, NA, NA),
age_child3 = c('2', '6', '8', '3', NA))
a <- reshape(original , varying = c("age_child1" , "age_child2" , "age_child3") ,
direction = "long" ,
times = c("_1" , "_2" , "_3") ,
v.names = "age")
a$caseid <- paste0(a$caseid , a$time)
a <- a[order(a$caseid) , ][c("caseid" , "age")]
a <- na.omit(a)
row.names(a) <- NULL
a
#> caseid age
#> 1 id101_1 3
#> 2 id101_2 1
#> 3 id101_3 2
#> 4 id201_1 5
#> 5 id201_2 7
#> 6 id201_3 6
#> 7 id202_1 8
#> 8 id202_3 8
#> 9 id301_3 3
由 reprex package (v2.0.1)
创建于 2022-06-01