如何将选定的行转换为 R 中的单列
How to transform selected rows into a single column in R
我有一个需要转换的数据框。我需要根据列的值将唯一行更改为单列。
我的数据如下:
df1 <- data.frame(V1 = c("a", "a", "b", "b","b"),
V2 = c("product1", "transport", "product1", "product2","transport"),
V3 = c("100", "10", "100", "100","10"))
> df1
V1 V2 V3
1 a product1 100
2 a transport 10
3 b product1 100
4 b product2 100
5 b transport 10
我需要进行以下转换,并将V3的值除以V1中包含的产品数量。
> df2
V1 V2 transport V3
1 a product1 10 100
2 b product1 5 100
3 b product2 5 100
这是一种使用 data.table
的方法 - 转换为 data.table
(setDT
),确保 'V3' 为 numeric
(用于除法 -它被创建为字符),按 'V1' 分组,通过提取 'V3' 值创建 'transport',其中 'V2' 是 'transport' 并除以元素数在 'V2' 中不是“传输”,然后通过从 'V2'
中删除 'transport' 元素来对数据进行子集化
library(data.table)
df1$V3 <- as.numeric(df1$V3)
setDT(df1)[, transport := V3[V2 == "transport"]/
sum(V2 != "transport"), by = V1]
df1[V2 != "transport"]
V1 V2 V3 transport
<char> <char> <num> <num>
1: a product1 100 10
2: b product1 100 5
3: b product2 100 5
或 dplyr/tidyr
的另一个选项
library(dplyr)
library(tidyr)
df1 %>%
type.convert(as.is = TRUE) %>%
mutate(transport = case_when(V2 == 'transport' ~ V3)) %>%
group_by(V1) %>%
fill(transport, .direction = "downup") %>%
mutate(transport = transport/sum(V2 != "transport")) %>%
ungroup %>%
filter(V2 != "transport")
# A tibble: 3 × 4
V1 V2 V3 transport
<chr> <chr> <int> <dbl>
1 a product1 100 10
2 b product1 100 5
3 b product2 100 5
这是一个 tidyverse-based 解决方案,使用 tidyr
和 dplyr
:
- 将
V3
转换为数字
- 旋转更宽以便为
transport
创建一个单独的列,然后旋转更长以将 "product1"
和 "product2"
折叠回一列
- 除以
transport
每 V1
的病例数。
library(dplyr)
library(tidyr)
df1 %>%
mutate(V3 = as.numeric(V3)) %>%
pivot_wider(names_from = V2, values_from = V3) %>%
pivot_longer(
cols = c(product1, product2),
names_to = "V2",
values_to = "V3",
values_drop_na = TRUE
) %>%
group_by(V1) %>%
mutate(transport = transport / n()) %>%
ungroup()
#> # A tibble: 3 x 4
#> V1 transport V2 V3
#> <chr> <dbl> <chr> <dbl>
#> 1 a 10 product1 100
#> 2 b 5 product1 100
#> 3 b 5 product2 100
由 reprex package (v2.0.1)
创建于 2022-03-17
如果我说对了,这里有另一种方法可以得到你想要的输出:
library(dplyr)
library(tidyr)
df1 %>%
group_by(V1) %>%
mutate(V3 = ifelse(V2 == "transport", as.numeric(V3) / (n() - 1), as.numeric(V3))) %>%
ungroup() %>%
pivot_wider(names_from = V2, values_from = V3) %>%
pivot_longer(c(product1, product2), names_to = "V2", values_to = "V3") %>%
drop_na()
# A tibble: 3 x 4
V1 transport V2 V3
<chr> <dbl> <chr> <dbl>
1 a 10 product1 100
2 b 5 product1 100
3 b 5 product2 100
另一个可能的解决方案:
library(tidyverse)
df1 %>%
mutate(V3 = as.numeric(V3)) %>%
group_by(V1) %>%
mutate(transport = if_else(V2 == "transport", V3 / (n()-1), NA_real_)) %>%
fill(transport, .direction = "up") %>% ungroup %>%
filter(V2 != "transport")
#> # A tibble: 3 × 4
#> V1 V2 V3 transport
#> <chr> <chr> <dbl> <dbl>
#> 1 a product1 100 10
#> 2 b product1 100 5
#> 3 b product2 100 5
这里还有一个 pivoting
:
library(dplyr)
library(tidyr)
df1 %>%
pivot_wider(
names_from = V2,
values_from = V3
) %>%
pivot_longer(
-c(V1, transport),
names_to = "V2",
values_to = "V3"
)%>%
type.convert(as.is = TRUE) %>%
na.omit() %>%
group_by(V1) %>%
mutate(transport = transport/max(row_number()))
V1 transport V2 V3
<chr> <dbl> <chr> <int>
1 a 10 product1 100
2 b 5 product1 100
3 b 5 product2 100
这是一个 data.table 方法:
f <- function(p,v) {
ps=grepl("^p",p)
list(V2 = p[ps], transport=rep(v[!ps]/sum(ps), sum(ps)), V3 = v[ps])
}
setDT(df1)[,f(V2,as.numeric(V3)), V1 ]
V1 V2 transport V3
<char> <char> <num> <num>
1: a product1 10 100
2: b product1 5 100
3: b product2 5 100
我有一个需要转换的数据框。我需要根据列的值将唯一行更改为单列。
我的数据如下:
df1 <- data.frame(V1 = c("a", "a", "b", "b","b"),
V2 = c("product1", "transport", "product1", "product2","transport"),
V3 = c("100", "10", "100", "100","10"))
> df1
V1 V2 V3
1 a product1 100
2 a transport 10
3 b product1 100
4 b product2 100
5 b transport 10
我需要进行以下转换,并将V3的值除以V1中包含的产品数量。
> df2
V1 V2 transport V3
1 a product1 10 100
2 b product1 5 100
3 b product2 5 100
这是一种使用 data.table
的方法 - 转换为 data.table
(setDT
),确保 'V3' 为 numeric
(用于除法 -它被创建为字符),按 'V1' 分组,通过提取 'V3' 值创建 'transport',其中 'V2' 是 'transport' 并除以元素数在 'V2' 中不是“传输”,然后通过从 'V2'
library(data.table)
df1$V3 <- as.numeric(df1$V3)
setDT(df1)[, transport := V3[V2 == "transport"]/
sum(V2 != "transport"), by = V1]
df1[V2 != "transport"]
V1 V2 V3 transport
<char> <char> <num> <num>
1: a product1 100 10
2: b product1 100 5
3: b product2 100 5
或 dplyr/tidyr
library(dplyr)
library(tidyr)
df1 %>%
type.convert(as.is = TRUE) %>%
mutate(transport = case_when(V2 == 'transport' ~ V3)) %>%
group_by(V1) %>%
fill(transport, .direction = "downup") %>%
mutate(transport = transport/sum(V2 != "transport")) %>%
ungroup %>%
filter(V2 != "transport")
# A tibble: 3 × 4
V1 V2 V3 transport
<chr> <chr> <int> <dbl>
1 a product1 100 10
2 b product1 100 5
3 b product2 100 5
这是一个 tidyverse-based 解决方案,使用 tidyr
和 dplyr
:
- 将
V3
转换为数字 - 旋转更宽以便为
transport
创建一个单独的列,然后旋转更长以将"product1"
和"product2"
折叠回一列 - 除以
transport
每V1
的病例数。
library(dplyr)
library(tidyr)
df1 %>%
mutate(V3 = as.numeric(V3)) %>%
pivot_wider(names_from = V2, values_from = V3) %>%
pivot_longer(
cols = c(product1, product2),
names_to = "V2",
values_to = "V3",
values_drop_na = TRUE
) %>%
group_by(V1) %>%
mutate(transport = transport / n()) %>%
ungroup()
#> # A tibble: 3 x 4
#> V1 transport V2 V3
#> <chr> <dbl> <chr> <dbl>
#> 1 a 10 product1 100
#> 2 b 5 product1 100
#> 3 b 5 product2 100
由 reprex package (v2.0.1)
创建于 2022-03-17如果我说对了,这里有另一种方法可以得到你想要的输出:
library(dplyr)
library(tidyr)
df1 %>%
group_by(V1) %>%
mutate(V3 = ifelse(V2 == "transport", as.numeric(V3) / (n() - 1), as.numeric(V3))) %>%
ungroup() %>%
pivot_wider(names_from = V2, values_from = V3) %>%
pivot_longer(c(product1, product2), names_to = "V2", values_to = "V3") %>%
drop_na()
# A tibble: 3 x 4
V1 transport V2 V3
<chr> <dbl> <chr> <dbl>
1 a 10 product1 100
2 b 5 product1 100
3 b 5 product2 100
另一个可能的解决方案:
library(tidyverse)
df1 %>%
mutate(V3 = as.numeric(V3)) %>%
group_by(V1) %>%
mutate(transport = if_else(V2 == "transport", V3 / (n()-1), NA_real_)) %>%
fill(transport, .direction = "up") %>% ungroup %>%
filter(V2 != "transport")
#> # A tibble: 3 × 4
#> V1 V2 V3 transport
#> <chr> <chr> <dbl> <dbl>
#> 1 a product1 100 10
#> 2 b product1 100 5
#> 3 b product2 100 5
这里还有一个 pivoting
:
library(dplyr)
library(tidyr)
df1 %>%
pivot_wider(
names_from = V2,
values_from = V3
) %>%
pivot_longer(
-c(V1, transport),
names_to = "V2",
values_to = "V3"
)%>%
type.convert(as.is = TRUE) %>%
na.omit() %>%
group_by(V1) %>%
mutate(transport = transport/max(row_number()))
V1 transport V2 V3
<chr> <dbl> <chr> <int>
1 a 10 product1 100
2 b 5 product1 100
3 b 5 product2 100
这是一个 data.table 方法:
f <- function(p,v) {
ps=grepl("^p",p)
list(V2 = p[ps], transport=rep(v[!ps]/sum(ps), sum(ps)), V3 = v[ps])
}
setDT(df1)[,f(V2,as.numeric(V3)), V1 ]
V1 V2 transport V3
<char> <char> <num> <num>
1: a product1 10 100
2: b product1 5 100
3: b product2 5 100