使用列数据在 R 中的数据之前获取列

Use a columns data to get the column before it's data in R

我到处都找不到这个。我希望能够创建一个列,该列使用包含“end”的列之前的列中的数据。

我什至可能解释得不够好。

例如:

df =

V1     V2  V3     V4     V5   V6
 0  start   1    end  ended    0
 3    end   0  start      5    0
 2  start   3   next      6  end

我希望新列是下一列“结束”之前的数字。

V1     V2  V3     V4     V5   V6  end_num
 0  start   1    end  ended    0        1
 3    end   0  start      5    0        3
 2  start   3   next      6  end        6

一个 base 解决方案,使用 max.col() 找到每一行的 "end" 的位置:

df$end_num <- df[cbind(1:nrow(df), max.col(df == "end") - 1)]

df
#   V1    V2 V3    V4    V5  V6 end_num
# 1  0 start  1   end ended   0       1
# 2  3   end  0 start     5   0       3
# 3  2 start  3  next     6 end       6

数据
df <- structure(list(V1 = c(0L, 3L, 2L), V2 = c("start", "end", "start"),
V3 = c(1L, 0L, 3L), V4 = c("end", "start", "next"), V5 = c("ended", "5", "6"),
V6 = c("0", "0", "end")), class = "data.frame", row.names = c(NA, -3L))

base R中的可能解决方案:

df$end_num <- apply(df, 1, \(x) x[which(x == "end") - 1])
df

#>   V1    V2 V3    V4    V5  V6 end_num
#> 1  0 start  1   end ended   0       1
#> 2  3   end  0 start     5   0       3
#> 3  2 start  3  next     6 end       6

这是一种费力的 tidyverse 解决方案::-)

library(dplyr)
library(tidyr)
library(readr)

df %>% 
  mutate(across(everything(), ~case_when(. == "end" ~ cur_column()), .names = 'new_{col}')) %>%
  unite(New_Col, starts_with('new'), na.rm = TRUE, sep = ' ') %>% 
  mutate(New_Col = paste0("V", parse_number(New_Col)-1)) %>% 
  mutate(end_num = purrr::map2_chr(row_number(),New_Col,~df[.x,.y]), .keep="unused")
  V1    V2 V3    V4    V5  V6 end_num
1  0 start  1   end ended   0       1
2  3   end  0 start     5   0       3
3  2 start  3  next     6 end       6

Tidyverse 解决方案:将您的数据框转换为长格式,创建一个新的分组列来标识第一列和第二列,然后按行汇总。

此方法的一个优点是您可以识别是否有任何行具有多个“end”实例。

df <- data.frame(
  V1 = c(0, 3, 2),
  V2 = c("s", "e", "s"),
  V3 = c(1, 0, 3),
  V4 = c("e", "s", "n"),
  V5 = c("en", 5, 6),
  V6 = c(0, 0, "e")
)

library(dplyr)
library(tidyr)

df_pivot <- df |> 
  mutate(id = row_number()) |> # need a row number to unpivot
  pivot_longer(
    cols = c(everything(), -id), names_pattern = "(\d+)", # don't pivot id
    names_transform = list(name = as.integer), values_transform = as.list
  ) |> 
  mutate(
    col_rem = name %% 2,
    col_group = (name + col_rem) / 2, # round up to lowest divisible by 2
    col_type = ifelse(col_rem == 0, "second", "first")
  ) |> 
  select(-col_rem, -name) |> 
  pivot_wider(names_from = col_type, values_from = value) |> 
  group_by(id) |> 
  summarise(
    new = first[second == "e"]
  )