减去R中同一数据框中的多列
Subtract multiple column in the same data frame in R
对于以下数据集
mydata=data.frame(x1_c1=c(1:5),
x2_c1=c(2:6),
x3_c1=c(3:7),
x4_c1=c(4:8),
x1_c2=0,
x2_c2=0,
x3_c2=0,
x4_c2=0,
x1_c3=c(1:5),
x2_c3=c(2:6),
x3_c3=c(3:7),
x4_c3=c(4:8))
> mydata
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 1 2 3 4 0 0 0 0 1 2 3 4
2 2 3 4 5 0 0 0 0 2 3 4 5
3 3 4 5 6 0 0 0 0 3 4 5 6
4 4 5 6 7 0 0 0 0 4 5 6 7
5 5 6 7 8 0 0 0 0 5 6 7 8
我想从_c1
、_c2
和_c3
结尾的变量中减去所有以_c3
结尾的变量,然后合并所有列。这是一个选项
mydata_update=cbind(mydata[,grep("_c1", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c2", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c3", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ])
预期结果是
> mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
欢迎任何其他方法。
我们可以使用split.default
根据列名的子字符串将数据分组,然后在每个list
元素中找到'c3'列grep
,减去 cbind
do.call
中的 list
个元素
out <- do.call(cbind, unname(lapply(split.default(mydata,
sub("_.*", "", names(mydata))),
function(x) x - x[,grep("_c3", names(x))])))[names(mydata)]
-输出
out
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者我们可以使用tidyverse
library(dplyr)
library(tidyr)
mydata %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c(".value", "grp"),
names_sep = "_") %>%
group_by(rn) %>%
mutate(across(where(is.numeric), ~ . - .[grp == 'c3'])) %>%
ungroup %>%
pivot_wider(names_from = grp, values_from = x1:x4) %>%
select(-rn) %>%
select(names(mydata))
-输出
# A tibble: 5 x 12
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
这是使用循环的另一种方式:
sm <- mydata[,grep("_c3",colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}
mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
匹配数据的前缀和减法部分,然后相减:
subsel <- endsWith(names(mydata), "_c3")
prefix <- sub("_.+", "", names(mydata))
mydata - mydata[subsel][match(prefix, prefix[subsel])]
# x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
#2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
#3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
#4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
#5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者,如果您想生活在边缘并且您确定您的数据是完整的并且按预期排序:
mydata - as.matrix(mydata[,endsWith(names(mydata), "_c3")])
对于以下数据集
mydata=data.frame(x1_c1=c(1:5),
x2_c1=c(2:6),
x3_c1=c(3:7),
x4_c1=c(4:8),
x1_c2=0,
x2_c2=0,
x3_c2=0,
x4_c2=0,
x1_c3=c(1:5),
x2_c3=c(2:6),
x3_c3=c(3:7),
x4_c3=c(4:8))
> mydata
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 1 2 3 4 0 0 0 0 1 2 3 4
2 2 3 4 5 0 0 0 0 2 3 4 5
3 3 4 5 6 0 0 0 0 3 4 5 6
4 4 5 6 7 0 0 0 0 4 5 6 7
5 5 6 7 8 0 0 0 0 5 6 7 8
我想从_c1
、_c2
和_c3
结尾的变量中减去所有以_c3
结尾的变量,然后合并所有列。这是一个选项
mydata_update=cbind(mydata[,grep("_c1", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c2", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c3", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ])
预期结果是
> mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
欢迎任何其他方法。
我们可以使用split.default
根据列名的子字符串将数据分组,然后在每个list
元素中找到'c3'列grep
,减去 cbind
do.call
list
个元素
out <- do.call(cbind, unname(lapply(split.default(mydata,
sub("_.*", "", names(mydata))),
function(x) x - x[,grep("_c3", names(x))])))[names(mydata)]
-输出
out
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者我们可以使用tidyverse
library(dplyr)
library(tidyr)
mydata %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c(".value", "grp"),
names_sep = "_") %>%
group_by(rn) %>%
mutate(across(where(is.numeric), ~ . - .[grp == 'c3'])) %>%
ungroup %>%
pivot_wider(names_from = grp, values_from = x1:x4) %>%
select(-rn) %>%
select(names(mydata))
-输出
# A tibble: 5 x 12
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
这是使用循环的另一种方式:
sm <- mydata[,grep("_c3",colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}
mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
匹配数据的前缀和减法部分,然后相减:
subsel <- endsWith(names(mydata), "_c3")
prefix <- sub("_.+", "", names(mydata))
mydata - mydata[subsel][match(prefix, prefix[subsel])]
# x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
#2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
#3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
#4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
#5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
或者,如果您想生活在边缘并且您确定您的数据是完整的并且按预期排序:
mydata - as.matrix(mydata[,endsWith(names(mydata), "_c3")])