使用列数据在 R 中的数据之前获取列
Use a columns data to get the column before it's data in R
我到处都找不到这个。我希望能够创建一个列,该列使用包含“end”的列之前的列中的数据。
我什至可能解释得不够好。
例如:
df =
V1 V2 V3 V4 V5 V6
0 start 1 end ended 0
3 end 0 start 5 0
2 start 3 next 6 end
我希望新列是下一列“结束”之前的数字。
V1 V2 V3 V4 V5 V6 end_num
0 start 1 end ended 0 1
3 end 0 start 5 0 3
2 start 3 next 6 end 6
一个 base
解决方案,使用 max.col()
找到每一行的 "end" 的位置:
df$end_num <- df[cbind(1:nrow(df), max.col(df == "end") - 1)]
df
# V1 V2 V3 V4 V5 V6 end_num
# 1 0 start 1 end ended 0 1
# 2 3 end 0 start 5 0 3
# 3 2 start 3 next 6 end 6
数据
df <- structure(list(V1 = c(0L, 3L, 2L), V2 = c("start", "end", "start"),
V3 = c(1L, 0L, 3L), V4 = c("end", "start", "next"), V5 = c("ended", "5", "6"),
V6 = c("0", "0", "end")), class = "data.frame", row.names = c(NA, -3L))
base R
中的可能解决方案:
df$end_num <- apply(df, 1, \(x) x[which(x == "end") - 1])
df
#> V1 V2 V3 V4 V5 V6 end_num
#> 1 0 start 1 end ended 0 1
#> 2 3 end 0 start 5 0 3
#> 3 2 start 3 next 6 end 6
这是一种费力的 tidyverse
解决方案::-)
library(dplyr)
library(tidyr)
library(readr)
df %>%
mutate(across(everything(), ~case_when(. == "end" ~ cur_column()), .names = 'new_{col}')) %>%
unite(New_Col, starts_with('new'), na.rm = TRUE, sep = ' ') %>%
mutate(New_Col = paste0("V", parse_number(New_Col)-1)) %>%
mutate(end_num = purrr::map2_chr(row_number(),New_Col,~df[.x,.y]), .keep="unused")
V1 V2 V3 V4 V5 V6 end_num
1 0 start 1 end ended 0 1
2 3 end 0 start 5 0 3
3 2 start 3 next 6 end 6
Tidyverse 解决方案:将您的数据框转换为长格式,创建一个新的分组列来标识第一列和第二列,然后按行汇总。
此方法的一个优点是您可以识别是否有任何行具有多个“end”实例。
df <- data.frame(
V1 = c(0, 3, 2),
V2 = c("s", "e", "s"),
V3 = c(1, 0, 3),
V4 = c("e", "s", "n"),
V5 = c("en", 5, 6),
V6 = c(0, 0, "e")
)
library(dplyr)
library(tidyr)
df_pivot <- df |>
mutate(id = row_number()) |> # need a row number to unpivot
pivot_longer(
cols = c(everything(), -id), names_pattern = "(\d+)", # don't pivot id
names_transform = list(name = as.integer), values_transform = as.list
) |>
mutate(
col_rem = name %% 2,
col_group = (name + col_rem) / 2, # round up to lowest divisible by 2
col_type = ifelse(col_rem == 0, "second", "first")
) |>
select(-col_rem, -name) |>
pivot_wider(names_from = col_type, values_from = value) |>
group_by(id) |>
summarise(
new = first[second == "e"]
)
我到处都找不到这个。我希望能够创建一个列,该列使用包含“end”的列之前的列中的数据。
我什至可能解释得不够好。
例如:
df =
V1 V2 V3 V4 V5 V6
0 start 1 end ended 0
3 end 0 start 5 0
2 start 3 next 6 end
我希望新列是下一列“结束”之前的数字。
V1 V2 V3 V4 V5 V6 end_num
0 start 1 end ended 0 1
3 end 0 start 5 0 3
2 start 3 next 6 end 6
一个 base
解决方案,使用 max.col()
找到每一行的 "end" 的位置:
df$end_num <- df[cbind(1:nrow(df), max.col(df == "end") - 1)]
df
# V1 V2 V3 V4 V5 V6 end_num
# 1 0 start 1 end ended 0 1
# 2 3 end 0 start 5 0 3
# 3 2 start 3 next 6 end 6
数据
df <- structure(list(V1 = c(0L, 3L, 2L), V2 = c("start", "end", "start"),
V3 = c(1L, 0L, 3L), V4 = c("end", "start", "next"), V5 = c("ended", "5", "6"),
V6 = c("0", "0", "end")), class = "data.frame", row.names = c(NA, -3L))
base R
中的可能解决方案:
df$end_num <- apply(df, 1, \(x) x[which(x == "end") - 1])
df
#> V1 V2 V3 V4 V5 V6 end_num
#> 1 0 start 1 end ended 0 1
#> 2 3 end 0 start 5 0 3
#> 3 2 start 3 next 6 end 6
这是一种费力的 tidyverse
解决方案::-)
library(dplyr)
library(tidyr)
library(readr)
df %>%
mutate(across(everything(), ~case_when(. == "end" ~ cur_column()), .names = 'new_{col}')) %>%
unite(New_Col, starts_with('new'), na.rm = TRUE, sep = ' ') %>%
mutate(New_Col = paste0("V", parse_number(New_Col)-1)) %>%
mutate(end_num = purrr::map2_chr(row_number(),New_Col,~df[.x,.y]), .keep="unused")
V1 V2 V3 V4 V5 V6 end_num
1 0 start 1 end ended 0 1
2 3 end 0 start 5 0 3
3 2 start 3 next 6 end 6
Tidyverse 解决方案:将您的数据框转换为长格式,创建一个新的分组列来标识第一列和第二列,然后按行汇总。
此方法的一个优点是您可以识别是否有任何行具有多个“end”实例。
df <- data.frame(
V1 = c(0, 3, 2),
V2 = c("s", "e", "s"),
V3 = c(1, 0, 3),
V4 = c("e", "s", "n"),
V5 = c("en", 5, 6),
V6 = c(0, 0, "e")
)
library(dplyr)
library(tidyr)
df_pivot <- df |>
mutate(id = row_number()) |> # need a row number to unpivot
pivot_longer(
cols = c(everything(), -id), names_pattern = "(\d+)", # don't pivot id
names_transform = list(name = as.integer), values_transform = as.list
) |>
mutate(
col_rem = name %% 2,
col_group = (name + col_rem) / 2, # round up to lowest divisible by 2
col_type = ifelse(col_rem == 0, "second", "first")
) |>
select(-col_rem, -name) |>
pivot_wider(names_from = col_type, values_from = value) |>
group_by(id) |>
summarise(
new = first[second == "e"]
)