如何将选定的行转换为 R 中的单列

How to transform selected rows into a single column in R

我有一个需要转换的数据框。我需要根据列的值将唯一行更改为单列。

我的数据如下:

df1 <- data.frame(V1 = c("a", "a", "b", "b","b"), 
                  V2 = c("product1", "transport", "product1", "product2","transport"),
                  V3 = c("100", "10", "100", "100","10"))

> df1
  V1        V2  V3
1  a  product1 100
2  a transport  10
3  b  product1 100
4  b  product2 100
5  b transport  10

我需要进行以下转换,并将V3的值除以V1中包含的产品数量。

> df2
  V1       V2 transport  V3
1  a product1        10 100
2  b product1         5 100
3  b product2         5 100

这是一种使用 data.table 的方法 - 转换为 data.table (setDT),确保 'V3' 为 numeric(用于除法 -它被创建为字符),按 'V1' 分组,通过提取 'V3' 值创建 'transport',其中 'V2' 是 'transport' 并除以元素数在 'V2' 中不是“传输”,然后通过从 'V2'

中删除 'transport' 元素来对数据进行子集化
library(data.table)
df1$V3 <- as.numeric(df1$V3)
setDT(df1)[, transport := V3[V2 == "transport"]/
       sum(V2 != "transport"), by = V1]
df1[V2 != "transport"]
       V1       V2    V3 transport
   <char>   <char> <num>     <num>
1:      a product1   100        10
2:      b product1   100         5
3:      b product2   100         5

dplyr/tidyr

的另一个选项
library(dplyr)
library(tidyr)
df1 %>%
   type.convert(as.is = TRUE) %>% 
   mutate(transport = case_when(V2 == 'transport' ~ V3)) %>% 
   group_by(V1) %>%
   fill(transport, .direction = "downup") %>%
   mutate(transport = transport/sum(V2 != "transport")) %>% 
   ungroup %>% 
   filter(V2 != "transport")
# A tibble: 3 × 4
  V1    V2          V3 transport
  <chr> <chr>    <int>     <dbl>
1 a     product1   100        10
2 b     product1   100         5
3 b     product2   100         5

这是一个 tidyverse-based 解决方案,使用 tidyrdplyr:

  1. V3转换为数字
  2. 旋转更宽以便为 transport 创建一个单独的列,然后旋转更长以将 "product1""product2" 折叠回一列
  3. 除以 transportV1 的病例数。
library(dplyr)
library(tidyr)

df1 %>% 
  mutate(V3 = as.numeric(V3)) %>% 
  pivot_wider(names_from = V2, values_from = V3) %>% 
  pivot_longer(
    cols = c(product1, product2), 
    names_to = "V2", 
    values_to = "V3", 
    values_drop_na = TRUE
  ) %>% 
  group_by(V1) %>% 
  mutate(transport = transport / n()) %>% 
  ungroup()

#> # A tibble: 3 x 4
#>   V1    transport V2          V3
#>   <chr>     <dbl> <chr>    <dbl>
#> 1 a            10 product1   100
#> 2 b             5 product1   100
#> 3 b             5 product2   100

reprex package (v2.0.1)

创建于 2022-03-17

如果我说对了,这里有另一种方法可以得到你想要的输出:

library(dplyr)
library(tidyr)

df1 %>%
  group_by(V1) %>%
  mutate(V3 = ifelse(V2 == "transport", as.numeric(V3) / (n() - 1), as.numeric(V3))) %>%
  ungroup() %>%
  pivot_wider(names_from = V2, values_from = V3) %>%
  pivot_longer(c(product1, product2), names_to = "V2", values_to = "V3") %>%
  drop_na()

# A tibble: 3 x 4
  V1    transport V2          V3
  <chr>     <dbl> <chr>    <dbl>
1 a            10 product1   100
2 b             5 product1   100
3 b             5 product2   100

另一个可能的解决方案:

library(tidyverse)

df1 %>%
  mutate(V3 = as.numeric(V3)) %>% 
  group_by(V1) %>% 
  mutate(transport = if_else(V2 == "transport", V3 / (n()-1), NA_real_)) %>% 
  fill(transport, .direction = "up") %>% ungroup %>% 
  filter(V2 != "transport")

#> # A tibble: 3 × 4
#>   V1    V2          V3 transport
#>   <chr> <chr>    <dbl>     <dbl>
#> 1 a     product1   100        10
#> 2 b     product1   100         5
#> 3 b     product2   100         5

这里还有一个 pivoting:

library(dplyr)
library(tidyr)

df1 %>% 
  pivot_wider(
    names_from = V2,
    values_from = V3
  ) %>% 
  pivot_longer(
    -c(V1, transport),
    names_to = "V2",
    values_to = "V3"
  )%>% 
  type.convert(as.is = TRUE) %>% 
  na.omit() %>% 
  group_by(V1) %>% 
  mutate(transport = transport/max(row_number()))
  V1    transport V2          V3
  <chr>     <dbl> <chr>    <int>
1 a            10 product1   100
2 b             5 product1   100
3 b             5 product2   100

这是一个 data.table 方法:

f <- function(p,v) {  
  ps=grepl("^p",p)
  list(V2 = p[ps], transport=rep(v[!ps]/sum(ps), sum(ps)), V3 = v[ps])
}
setDT(df1)[,f(V2,as.numeric(V3)), V1 ]

       V1       V2 transport    V3
   <char>   <char>     <num> <num>
1:      a product1        10   100
2:      b product1         5   100
3:      b product2         5   100