R 中基于行值和类别的条件计算
Conditional calculation in R based on Row values and categories
我有这个数据框:
df<-data.frame(a=c("a1","a2","a3","a4","b1","b2","b3","b4","a1","a2","a3","a4","b1","b2","b3","b4"), b=c("x1","x2","x3","total","x1","x2","x3","total", "x1","x2","x3","total","x1","x2","x3","total"), reg=c("A","A","A","A","A","A","A","A","B", "B","B","B","B","B","B","B"), c=c(1:16))
看起来像:
a b reg c
1 a1 x1 A 1
2 a2 x2 A 2
3 a3 x3 A 3
4 a4 total A 4
5 b1 x1 A 5
6 b2 x2 A 6
7 b3 x3 A 7
8 b4 total A 8
9 a1 x1 B 9
10 a2 x2 B 10
11 a3 x3 B 11
12 a4 total B 12
13 b1 x1 B 13
14 b2 x2 B 14
15 b3 x3 B 15
16 b4 total B 16
列 'a'、'b' 和 'reg' 是分类变量。我想要做的是创建一个新列,将 x(i) 划分为 reg' and in
a 中每个类别的 i=1,2,3 和 'total' (x(i)/total) ' 列。
有人可以帮我解决这个问题吗?
假设您的 df 是按照您的示例排序的。
library(zoo)
df$NEW=df$c
df$NEW[df$b!='total']=NA
df$NEW=na.locf(df$NEW,fromLast=T,na.rm=F)
df$NEW=df$c/df$NEW
df
a b reg c NEW
1 a1 x1 A 1 0.2500000
2 a2 x2 A 2 0.5000000
3 a3 x2 A 3 0.7500000
4 a4 total A 4 1.0000000
5 b1 x1 A 5 0.6250000
6 b2 x2 A 6 0.7500000
7 b3 x2 A 7 0.8750000
8 b4 total A 8 1.0000000
9 a1 x1 B 9 0.7500000
10 a2 x2 B 10 0.8333333
11 a3 x2 B 11 0.9166667
12 a4 total B 12 1.0000000
13 b1 x1 B 13 0.8125000
14 b2 x2 B 14 0.8750000
15 b3 x2 B 15 0.9375000
16 b4 total B 16 1.0000000
根据Op的解释,下面是he/she的真实数据。(来自OP)
data1$shares<-NA
id<-which(data1$Occupation=='Total')
data1$shares[id]<-data114[id]
data1$shares=na.locf(data1$shares,fromLast=T,na.rm=F)
data1$shares=data114/data1$shares
仅使用 R 基础:
df<-data.frame(a=c("a1","a2","a3","a4","b1","b2","b3","b4","a1","a2","a3","a4","b1","b2","b3","b4"), b=c("x1","x2","x3","total","x1","x2","x3","total", "x1","x2","x3","total","x1","x2","x3","total"), reg=c("A","A","A","A","A","A","A","A","B", "B","B","B","B","B","B","B"), c=c(1:16))
totals <- data.frame(To=df[df$b=='total',4])
totals$from <- c(1, totals$To[1:nrow(totals)-1]+1)
df$NEW = df$c/totals[findInterval(x=df$c, vec=c(rbind(totals$from, totals$to))), 1]
df
输出:
a b reg c NEW
1 a1 x1 A 1 0.2500000
2 a2 x2 A 2 0.5000000
3 a3 x3 A 3 0.7500000
4 a4 total A 4 1.0000000
5 b1 x1 A 5 0.6250000
6 b2 x2 A 6 0.7500000
7 b3 x3 A 7 0.8750000
8 b4 total A 8 1.0000000
9 a1 x1 B 9 0.7500000
10 a2 x2 B 10 0.8333333
11 a3 x3 B 11 0.9166667
12 a4 total B 12 1.0000000
13 b1 x1 B 13 0.8125000
14 b2 x2 B 14 0.8750000
15 b3 x3 B 15 0.9375000
16 b4 total B 16 1.0000000
我有这个数据框:
df<-data.frame(a=c("a1","a2","a3","a4","b1","b2","b3","b4","a1","a2","a3","a4","b1","b2","b3","b4"), b=c("x1","x2","x3","total","x1","x2","x3","total", "x1","x2","x3","total","x1","x2","x3","total"), reg=c("A","A","A","A","A","A","A","A","B", "B","B","B","B","B","B","B"), c=c(1:16))
看起来像:
a b reg c
1 a1 x1 A 1
2 a2 x2 A 2
3 a3 x3 A 3
4 a4 total A 4
5 b1 x1 A 5
6 b2 x2 A 6
7 b3 x3 A 7
8 b4 total A 8
9 a1 x1 B 9
10 a2 x2 B 10
11 a3 x3 B 11
12 a4 total B 12
13 b1 x1 B 13
14 b2 x2 B 14
15 b3 x3 B 15
16 b4 total B 16
列 'a'、'b' 和 'reg' 是分类变量。我想要做的是创建一个新列,将 x(i) 划分为 reg' and in
a 中每个类别的 i=1,2,3 和 'total' (x(i)/total) ' 列。
有人可以帮我解决这个问题吗?
假设您的 df 是按照您的示例排序的。
library(zoo)
df$NEW=df$c
df$NEW[df$b!='total']=NA
df$NEW=na.locf(df$NEW,fromLast=T,na.rm=F)
df$NEW=df$c/df$NEW
df
a b reg c NEW
1 a1 x1 A 1 0.2500000
2 a2 x2 A 2 0.5000000
3 a3 x2 A 3 0.7500000
4 a4 total A 4 1.0000000
5 b1 x1 A 5 0.6250000
6 b2 x2 A 6 0.7500000
7 b3 x2 A 7 0.8750000
8 b4 total A 8 1.0000000
9 a1 x1 B 9 0.7500000
10 a2 x2 B 10 0.8333333
11 a3 x2 B 11 0.9166667
12 a4 total B 12 1.0000000
13 b1 x1 B 13 0.8125000
14 b2 x2 B 14 0.8750000
15 b3 x2 B 15 0.9375000
16 b4 total B 16 1.0000000
根据Op的解释,下面是he/she的真实数据。(来自OP)
data1$shares<-NA
id<-which(data1$Occupation=='Total')
data1$shares[id]<-data114[id]
data1$shares=na.locf(data1$shares,fromLast=T,na.rm=F)
data1$shares=data114/data1$shares
仅使用 R 基础:
df<-data.frame(a=c("a1","a2","a3","a4","b1","b2","b3","b4","a1","a2","a3","a4","b1","b2","b3","b4"), b=c("x1","x2","x3","total","x1","x2","x3","total", "x1","x2","x3","total","x1","x2","x3","total"), reg=c("A","A","A","A","A","A","A","A","B", "B","B","B","B","B","B","B"), c=c(1:16))
totals <- data.frame(To=df[df$b=='total',4])
totals$from <- c(1, totals$To[1:nrow(totals)-1]+1)
df$NEW = df$c/totals[findInterval(x=df$c, vec=c(rbind(totals$from, totals$to))), 1]
df
输出:
a b reg c NEW
1 a1 x1 A 1 0.2500000
2 a2 x2 A 2 0.5000000
3 a3 x3 A 3 0.7500000
4 a4 total A 4 1.0000000
5 b1 x1 A 5 0.6250000
6 b2 x2 A 6 0.7500000
7 b3 x3 A 7 0.8750000
8 b4 total A 8 1.0000000
9 a1 x1 B 9 0.7500000
10 a2 x2 B 10 0.8333333
11 a3 x3 B 11 0.9166667
12 a4 total B 12 1.0000000
13 b1 x1 B 13 0.8125000
14 b2 x2 B 14 0.8750000
15 b3 x3 B 15 0.9375000
16 b4 total B 16 1.0000000