根据两个不同的列添加一个列,每个列有多个不同的名称
Add a column based on two different columns, each with multiple different names
我有一个包含阶段编号和机器编号的数据集 - 下面复制了一小部分。然而,实际上,完整的数据集包括 38 个阶段,超过 100 万行。
stage <- c("Stg1", "Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1", "Stg2", "Stg2", "Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg10","Stg10","Stg10")
machine <- c("132H", "132H","132H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "217H", "132H", "132H", "132H")
df <- data.frame(stage,machine)
head(df)
stage machine
1 Stg1 132H
2 Stg1 132H
3 Stg1 132H
4 Stg1 132H
5 Stg1 132H
6 Stg1 212H
我的目标是创建一个新列,按顺序为分组的阶段和机器分配数字。最终,将产生如下输出的代码:
Stage Machine JobStage
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 217H 3
Stg1 217H 3
Stg2 132H 4
Stg2 132H 4
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 217H 6
Stg2 217H 6
Stg2 217H 6
Stg10 132H 7
Stg10 132H 7
Stg10 132H 7
我知道你可以为每个阶段和每台机器做这样的事情,但它很耗时,尤其是对于大型数据集:
df$JobStage[df$stage == "Stg1" & df$machine == "132H"] <- 1
df$JobStage[df$stage == "Stg1" & df$machine == "212H"] <- 2
...
我试图将 dplyr 与 group_by() 和 mutate() 一起使用,但我不确定如何正确捕获不同的阶段和机器并为其分配编号。我知道 unique() 不适用于字符值,但代码可能是这样的:
df %>% group_by(stage, machine) %>% mutate(JobStage = unique(stage) & unique(machine))
如有任何帮助,我们将不胜感激。谢谢。
一个选项:
library(tidyverse)
df %>%
mutate(stag_mach = str_c(stage, machine),
JobStage = as.integer(factor(stag_mach))) %>%
select(-stag_mach)
# stage machine JobStage
# 1 Stg1 132H 1
# 2 Stg1 132H 1
# 3 Stg1 132H 1
# 4 Stg1 132H 1
# 5 Stg1 132H 1
# 6 Stg1 212H 2
# 7 Stg1 212H 2
# 8 Stg1 212H 2
# 9 Stg1 212H 2
# 10 Stg1 212H 2
# 11 Stg1 217H 3
# 12 Stg1 217H 3
# 13 Stg2 132H 4
# 14 Stg2 132H 4
# 15 Stg2 212H 5
# 16 Stg2 212H 5
# 17 Stg2 212H 5
# 18 Stg2 212H 5
# 19 Stg2 212H 5
# 20 Stg2 217H 6
# 21 Stg2 217H 6
# 22 Stg2 217H 6
编辑:如果您想保留订单:
df %>%
mutate(stag_mach = str_c(stage, machine),
JobStage = as.integer(fct_inorder(stag_mach))) %>%
select(-stag_mach)
# stage machine JobStage
# 1 Stg1 132H 1
# 2 Stg1 132H 1
# 3 Stg1 132H 1
# 4 Stg1 132H 1
# 5 Stg1 132H 1
# 6 Stg1 212H 2
# 7 Stg1 212H 2
# 8 Stg1 212H 2
# 9 Stg1 212H 2
# 10 Stg1 212H 2
# 11 Stg1 217H 3
# 12 Stg1 217H 3
# 13 Stg2 132H 4
# 14 Stg2 132H 4
# 15 Stg2 212H 5
# 16 Stg2 212H 5
# 17 Stg2 212H 5
# 18 Stg2 212H 5
# 19 Stg2 212H 5
# 20 Stg2 217H 6
# 21 Stg2 217H 6
# 22 Stg2 217H 6
# 23 Stg10 132H 7
# 24 Stg10 132H 7
# 25 Stg10 132H 7
这是一个带有 unite
和 match
的选项,即 unite
列 'stage'、'machine' 以创建新的粘贴列并获取索引通过 matching
具有 unique
值的元素
library(dplyr)
library(tidyr)
df %>%
unite(JobStage, stage, machine, remove = FALSE) %>%
mutate(JobStage = match(JobStage, unique(JobStage)))
-输出
JobStage stage machine
1 1 Stg1 132H
2 1 Stg1 132H
3 1 Stg1 132H
4 1 Stg1 132H
5 1 Stg1 132H
6 2 Stg1 212H
7 2 Stg1 212H
8 2 Stg1 212H
9 2 Stg1 212H
10 2 Stg1 212H
11 3 Stg1 217H
12 3 Stg1 217H
13 4 Stg2 132H
14 4 Stg2 132H
15 5 Stg2 212H
16 5 Stg2 212H
17 5 Stg2 212H
18 5 Stg2 212H
19 5 Stg2 212H
20 6 Stg2 217H
21 6 Stg2 217H
22 6 Stg2 217H
我有一个包含阶段编号和机器编号的数据集 - 下面复制了一小部分。然而,实际上,完整的数据集包括 38 个阶段,超过 100 万行。
stage <- c("Stg1", "Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1", "Stg2", "Stg2", "Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg10","Stg10","Stg10")
machine <- c("132H", "132H","132H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "217H", "132H", "132H", "132H")
df <- data.frame(stage,machine)
head(df)
stage machine
1 Stg1 132H
2 Stg1 132H
3 Stg1 132H
4 Stg1 132H
5 Stg1 132H
6 Stg1 212H
我的目标是创建一个新列,按顺序为分组的阶段和机器分配数字。最终,将产生如下输出的代码:
Stage Machine JobStage
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 132H 1
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 212H 2
Stg1 217H 3
Stg1 217H 3
Stg2 132H 4
Stg2 132H 4
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 212H 5
Stg2 217H 6
Stg2 217H 6
Stg2 217H 6
Stg10 132H 7
Stg10 132H 7
Stg10 132H 7
我知道你可以为每个阶段和每台机器做这样的事情,但它很耗时,尤其是对于大型数据集:
df$JobStage[df$stage == "Stg1" & df$machine == "132H"] <- 1
df$JobStage[df$stage == "Stg1" & df$machine == "212H"] <- 2
...
我试图将 dplyr 与 group_by() 和 mutate() 一起使用,但我不确定如何正确捕获不同的阶段和机器并为其分配编号。我知道 unique() 不适用于字符值,但代码可能是这样的:
df %>% group_by(stage, machine) %>% mutate(JobStage = unique(stage) & unique(machine))
如有任何帮助,我们将不胜感激。谢谢。
一个选项:
library(tidyverse)
df %>%
mutate(stag_mach = str_c(stage, machine),
JobStage = as.integer(factor(stag_mach))) %>%
select(-stag_mach)
# stage machine JobStage
# 1 Stg1 132H 1
# 2 Stg1 132H 1
# 3 Stg1 132H 1
# 4 Stg1 132H 1
# 5 Stg1 132H 1
# 6 Stg1 212H 2
# 7 Stg1 212H 2
# 8 Stg1 212H 2
# 9 Stg1 212H 2
# 10 Stg1 212H 2
# 11 Stg1 217H 3
# 12 Stg1 217H 3
# 13 Stg2 132H 4
# 14 Stg2 132H 4
# 15 Stg2 212H 5
# 16 Stg2 212H 5
# 17 Stg2 212H 5
# 18 Stg2 212H 5
# 19 Stg2 212H 5
# 20 Stg2 217H 6
# 21 Stg2 217H 6
# 22 Stg2 217H 6
编辑:如果您想保留订单:
df %>%
mutate(stag_mach = str_c(stage, machine),
JobStage = as.integer(fct_inorder(stag_mach))) %>%
select(-stag_mach)
# stage machine JobStage
# 1 Stg1 132H 1
# 2 Stg1 132H 1
# 3 Stg1 132H 1
# 4 Stg1 132H 1
# 5 Stg1 132H 1
# 6 Stg1 212H 2
# 7 Stg1 212H 2
# 8 Stg1 212H 2
# 9 Stg1 212H 2
# 10 Stg1 212H 2
# 11 Stg1 217H 3
# 12 Stg1 217H 3
# 13 Stg2 132H 4
# 14 Stg2 132H 4
# 15 Stg2 212H 5
# 16 Stg2 212H 5
# 17 Stg2 212H 5
# 18 Stg2 212H 5
# 19 Stg2 212H 5
# 20 Stg2 217H 6
# 21 Stg2 217H 6
# 22 Stg2 217H 6
# 23 Stg10 132H 7
# 24 Stg10 132H 7
# 25 Stg10 132H 7
这是一个带有 unite
和 match
的选项,即 unite
列 'stage'、'machine' 以创建新的粘贴列并获取索引通过 matching
具有 unique
值的元素
library(dplyr)
library(tidyr)
df %>%
unite(JobStage, stage, machine, remove = FALSE) %>%
mutate(JobStage = match(JobStage, unique(JobStage)))
-输出
JobStage stage machine
1 1 Stg1 132H
2 1 Stg1 132H
3 1 Stg1 132H
4 1 Stg1 132H
5 1 Stg1 132H
6 2 Stg1 212H
7 2 Stg1 212H
8 2 Stg1 212H
9 2 Stg1 212H
10 2 Stg1 212H
11 3 Stg1 217H
12 3 Stg1 217H
13 4 Stg2 132H
14 4 Stg2 132H
15 5 Stg2 212H
16 5 Stg2 212H
17 5 Stg2 212H
18 5 Stg2 212H
19 5 Stg2 212H
20 6 Stg2 217H
21 6 Stg2 217H
22 6 Stg2 217H