根据两个不同的列添加一个列,每个列有多个不同的名称

Add a column based on two different columns, each with multiple different names

我有一个包含阶段编号和机器编号的数据集 - 下面复制了一小部分。然而,实际上,完整的数据集包括 38 个阶段,超过 100 万行。

stage <- c("Stg1",  "Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1","Stg1",   "Stg2", "Stg2", "Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg2","Stg10","Stg10","Stg10")
machine <- c("132H", "132H","132H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "132H", "132H", "212H", "212H", "212H", "212H", "212H", "217H", "217H", "217H", "132H", "132H", "132H")

df <- data.frame(stage,machine)

head(df)
  stage machine
1  Stg1    132H
2  Stg1    132H
3  Stg1    132H
4  Stg1    132H
5  Stg1    132H
6  Stg1    212H

我的目标是创建一个新列,按顺序为分组的阶段和机器分配数字。最终,将产生如下输出的代码:

Stage   Machine JobStage
Stg1    132H    1
Stg1    132H    1
Stg1    132H    1
Stg1    132H    1
Stg1    132H    1
Stg1    212H    2
Stg1    212H    2
Stg1    212H    2
Stg1    212H    2
Stg1    212H    2
Stg1    217H    3
Stg1    217H    3
Stg2    132H    4
Stg2    132H    4
Stg2    212H    5
Stg2    212H    5
Stg2    212H    5
Stg2    212H    5
Stg2    212H    5
Stg2    217H    6
Stg2    217H    6
Stg2    217H    6
Stg10   132H    7
Stg10   132H    7
Stg10   132H    7

我知道你可以为每个阶段和每台机器做这样的事情,但它很耗时,尤其是对于大型数据集:

df$JobStage[df$stage == "Stg1" & df$machine == "132H"] <- 1
df$JobStage[df$stage == "Stg1" & df$machine == "212H"] <- 2
...

我试图将 dplyr 与 group_by() 和 mutate() 一起使用,但我不确定如何正确捕获不同的阶段和机器并为其分配编号。我知道 unique() 不适用于字符值,但代码可能是这样的:

df %>% group_by(stage, machine) %>% mutate(JobStage = unique(stage) & unique(machine))

如有任何帮助,我们将不胜感激。谢谢。

一个选项:

library(tidyverse)
df %>% 
  mutate(stag_mach = str_c(stage, machine),
         JobStage = as.integer(factor(stag_mach))) %>% 
  select(-stag_mach)
#    stage machine JobStage
# 1   Stg1    132H        1
# 2   Stg1    132H        1
# 3   Stg1    132H        1
# 4   Stg1    132H        1
# 5   Stg1    132H        1
# 6   Stg1    212H        2
# 7   Stg1    212H        2
# 8   Stg1    212H        2
# 9   Stg1    212H        2
# 10  Stg1    212H        2
# 11  Stg1    217H        3
# 12  Stg1    217H        3
# 13  Stg2    132H        4
# 14  Stg2    132H        4
# 15  Stg2    212H        5
# 16  Stg2    212H        5
# 17  Stg2    212H        5
# 18  Stg2    212H        5
# 19  Stg2    212H        5
# 20  Stg2    217H        6
# 21  Stg2    217H        6
# 22  Stg2    217H        6

编辑:如果您想保留订单:

df %>% 
  mutate(stag_mach = str_c(stage, machine),
         JobStage = as.integer(fct_inorder(stag_mach))) %>% 
  select(-stag_mach)
#    stage machine JobStage
# 1   Stg1    132H        1
# 2   Stg1    132H        1
# 3   Stg1    132H        1
# 4   Stg1    132H        1
# 5   Stg1    132H        1
# 6   Stg1    212H        2
# 7   Stg1    212H        2
# 8   Stg1    212H        2
# 9   Stg1    212H        2
# 10  Stg1    212H        2
# 11  Stg1    217H        3
# 12  Stg1    217H        3
# 13  Stg2    132H        4
# 14  Stg2    132H        4
# 15  Stg2    212H        5
# 16  Stg2    212H        5
# 17  Stg2    212H        5
# 18  Stg2    212H        5
# 19  Stg2    212H        5
# 20  Stg2    217H        6
# 21  Stg2    217H        6
# 22  Stg2    217H        6
# 23 Stg10    132H        7
# 24 Stg10    132H        7
# 25 Stg10    132H        7

这是一个带有 unitematch 的选项,即 unite 列 'stage'、'machine' 以创建新的粘贴列并获取索引通过 matching 具有 unique 值的元素

library(dplyr)
library(tidyr)
df %>% 
   unite(JobStage, stage, machine, remove = FALSE) %>% 
   mutate(JobStage = match(JobStage, unique(JobStage)))

-输出

    JobStage stage machine
1         1  Stg1    132H
2         1  Stg1    132H
3         1  Stg1    132H
4         1  Stg1    132H
5         1  Stg1    132H
6         2  Stg1    212H
7         2  Stg1    212H
8         2  Stg1    212H
9         2  Stg1    212H
10        2  Stg1    212H
11        3  Stg1    217H
12        3  Stg1    217H
13        4  Stg2    132H
14        4  Stg2    132H
15        5  Stg2    212H
16        5  Stg2    212H
17        5  Stg2    212H
18        5  Stg2    212H
19        5  Stg2    212H
20        6  Stg2    217H
21        6  Stg2    217H
22        6  Stg2    217H