如何按条件将代码应用于数据框?
How to apply code to dataframe by condition?
我有以下数据框:
library(dplyr)
library(tidyverse)
library(concordance)
Year <- c(2016,2016,2017,2019,2020,2020,2020,2013,2010,2010)
Pf <- c("HS4","HS4","HS4","HS5","HS5","HS5","HS5","HS4","HS3","HS3")
Code <- c("391890","440929","851660","732399","720839","050510","830241","321590","010210","010210")
Slen <- c("6","6","6","6","6","6","6","6","6","6")
df <- data.frame(Year,Pf,Code,Slen)
'Pf' 列包含 3 种不同类型的行:“HS3”、“HS4”和“HS5”。我想执行矢量化操作并将 concord() 函数应用于 'Code' 列”,但是为了做到这一点,'Pf' 必须是唯一的,这就是为什么在我设置数据帧之前 [=23] =] 列是唯一的
# Subset data where Pf column is unique
df.H5 <- subset(df, Pf == "HS5")
df.H4 <- subset(df, Pf == "HS4")
df.H3 <- subset(df, Pf == "HS3")
现在我对每个数据框应用一个函数。这里 concord()
函数应用于 'Code' 列并将这些字符转换为不同的字符。但是,如果目的地(参数)和 'Pf' 列中的值相同,则它不起作用,例如,如果 Pf="HS3"(在 df 中)和目的地 = "HS3",代码不会 运行,这就是为什么我不将代码应用于 df.H3
# Apply function to df.H5
df.H5<- df.H5 %>%
group_by(Pf, Slen) %>%
mutate(
Code2 = concord(Code, origin = unique(Pf), dest.digit = unique(Slen), destination = "HS3", all = FALSE)
) %>%
ungroup()
# Apply function to df.H4
df.H4<- df.H4 %>%
group_by(Pf, Slen) %>%
mutate(
Code2 = concord(Code, origin = unique(Pf), dest.digit = unique(Slen), destination = "HS3", all = FALSE)
) %>%
ungroup()
#add column todf.H3 in order to merge these 3 tafarames
df.H3$Code2 <- df.H3$Code
#merge
df2 <- rbind(df.H4, df.H5, df.H3)
我的目标是以某种方式使流程自动化。例如,如果 destination = "HS3",代码应用整个数据而不进行预子集化,并且如果 destination(参数)和 Pf 中的行彼此匹配,则代码不适用于它,只是从 [=24 复制粘贴值=] 在这种情况下生成 'Code2' 列
您可以将逻辑放在一个函数中,然后在拆分数据和应用函数的 by
方法中使用它。在该函数中,您可以执行一个案例处理,其中 P == 'HS3'
不应被处理。最后 unsplit
.
cf <- \(x) {
Code2 <- if (!any(x$Pf == 'HS3')) {
concordance::concord(x$Code, x$Pf[1], x$Slen[1],
destination="HS3", all=FALSE)
} else {
x$Code
}
cbind(x, Code2)
}
by(df, df$Pf, cf) |>
unsplit(df$Pf)
# Year Pf Code Slen Code2
# 1 2016 HS4 391890 6 391890
# 2 2016 HS4 440929 6 440929
# 3 2017 HS4 851660 6 851660
# 4 2019 HS5 732399 6 732399
# 5 2020 HS5 720839 6 720839
# 6 2020 HS5 050510 6 050510
# 7 2020 HS5 830241 6 830241
# 8 2013 HS4 321590 6 321590
# 9 2010 HS3 010210 6 010210
# 10 2010 HS3 010210 6 010210
数据:
df <- structure(list(Year = c(2016, 2016, 2017, 2019, 2020, 2020, 2020,
2013, 2010, 2010), Pf = c("HS4", "HS4", "HS4", "HS5", "HS5",
"HS5", "HS5", "HS4", "HS3", "HS3"), Code = c("391890", "440929",
"851660", "732399", "720839", "050510", "830241", "321590", "010210",
"010210"), Slen = c("6", "6", "6", "6", "6", "6", "6", "6", "6",
"6")), class = "data.frame", row.names = c(NA, -10L))
我有以下数据框:
library(dplyr)
library(tidyverse)
library(concordance)
Year <- c(2016,2016,2017,2019,2020,2020,2020,2013,2010,2010)
Pf <- c("HS4","HS4","HS4","HS5","HS5","HS5","HS5","HS4","HS3","HS3")
Code <- c("391890","440929","851660","732399","720839","050510","830241","321590","010210","010210")
Slen <- c("6","6","6","6","6","6","6","6","6","6")
df <- data.frame(Year,Pf,Code,Slen)
'Pf' 列包含 3 种不同类型的行:“HS3”、“HS4”和“HS5”。我想执行矢量化操作并将 concord() 函数应用于 'Code' 列”,但是为了做到这一点,'Pf' 必须是唯一的,这就是为什么在我设置数据帧之前 [=23] =] 列是唯一的
# Subset data where Pf column is unique
df.H5 <- subset(df, Pf == "HS5")
df.H4 <- subset(df, Pf == "HS4")
df.H3 <- subset(df, Pf == "HS3")
现在我对每个数据框应用一个函数。这里 concord()
函数应用于 'Code' 列并将这些字符转换为不同的字符。但是,如果目的地(参数)和 'Pf' 列中的值相同,则它不起作用,例如,如果 Pf="HS3"(在 df 中)和目的地 = "HS3",代码不会 运行,这就是为什么我不将代码应用于 df.H3
# Apply function to df.H5
df.H5<- df.H5 %>%
group_by(Pf, Slen) %>%
mutate(
Code2 = concord(Code, origin = unique(Pf), dest.digit = unique(Slen), destination = "HS3", all = FALSE)
) %>%
ungroup()
# Apply function to df.H4
df.H4<- df.H4 %>%
group_by(Pf, Slen) %>%
mutate(
Code2 = concord(Code, origin = unique(Pf), dest.digit = unique(Slen), destination = "HS3", all = FALSE)
) %>%
ungroup()
#add column todf.H3 in order to merge these 3 tafarames
df.H3$Code2 <- df.H3$Code
#merge
df2 <- rbind(df.H4, df.H5, df.H3)
我的目标是以某种方式使流程自动化。例如,如果 destination = "HS3",代码应用整个数据而不进行预子集化,并且如果 destination(参数)和 Pf 中的行彼此匹配,则代码不适用于它,只是从 [=24 复制粘贴值=] 在这种情况下生成 'Code2' 列
您可以将逻辑放在一个函数中,然后在拆分数据和应用函数的 by
方法中使用它。在该函数中,您可以执行一个案例处理,其中 P == 'HS3'
不应被处理。最后 unsplit
.
cf <- \(x) {
Code2 <- if (!any(x$Pf == 'HS3')) {
concordance::concord(x$Code, x$Pf[1], x$Slen[1],
destination="HS3", all=FALSE)
} else {
x$Code
}
cbind(x, Code2)
}
by(df, df$Pf, cf) |>
unsplit(df$Pf)
# Year Pf Code Slen Code2
# 1 2016 HS4 391890 6 391890
# 2 2016 HS4 440929 6 440929
# 3 2017 HS4 851660 6 851660
# 4 2019 HS5 732399 6 732399
# 5 2020 HS5 720839 6 720839
# 6 2020 HS5 050510 6 050510
# 7 2020 HS5 830241 6 830241
# 8 2013 HS4 321590 6 321590
# 9 2010 HS3 010210 6 010210
# 10 2010 HS3 010210 6 010210
数据:
df <- structure(list(Year = c(2016, 2016, 2017, 2019, 2020, 2020, 2020,
2013, 2010, 2010), Pf = c("HS4", "HS4", "HS4", "HS5", "HS5",
"HS5", "HS5", "HS4", "HS3", "HS3"), Code = c("391890", "440929",
"851660", "732399", "720839", "050510", "830241", "321590", "010210",
"010210"), Slen = c("6", "6", "6", "6", "6", "6", "6", "6", "6",
"6")), class = "data.frame", row.names = c(NA, -10L))