R for loop wise:Rowwise sum on conditions:性能问题
R for loop wise : Rowwise sum on conditions : Performance issue
我有一个数据库,我在其中 运行 根据同一行中先前单元格的总和和后续单元格的总和来更改单元格值的代码。
for (i in 1:row1)
{
for(j in 3:col-1)
{ # for-loop over columns
if (as.numeric(rowSums(e[i,2:j])) == 0 )
{
e1[i,j] <- 0
}
else if (as.numeric(rowSums(e[i,2:j])) > 0 && e[i,j] == 0 && as.numeric(rowSums(e[i,j:col])) > 0 )
{
e1[i,j] <- 1
}
else if (as.numeric(rowSums(e[i,2:j])) > 0 && e[i,j] == 1 && as.numeric(rowSums(e[i,j:col])) > 0 )
{
e1[i,j] <- 0
}
}
}
运行时间非常长。感谢任何提高速度的建议。附加信息:正在将新值复制到数据框中。
谢谢,
桑迪
编辑 2:
示例数据:
structure(list(`Sr no` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19), `2018-01` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-02` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-03` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-04` = c(0,
0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-05` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0), `2018-06` = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0), `2018-07` = c(0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), `2018-08` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1), `2018-09` = c(0,
0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0), `2018-10` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), `2018-11` = c(0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), `2018-12` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2019-01` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2019-02` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-19L), class = c("tbl_df", "tbl", "data.frame"))
我认为您可以使用 matrix
逻辑来做到这一点。取决于您是否有足够的内存。
# creating fake data
# nc <- 300 # number of columns
nc <- 10 # for testing
nn <- 1e6 # rows
e <- sapply(1:nc, function(x) sample.int(2, nn, replace = T) - 1L)
e <- as.data.frame(e)
row1 <- nrow(e)
colc <- ncol(e)
# note that:
3:colc-1
# isnt equal with:
3:(colc-1)
s <- 3:(colc-1) # I assume you meant this
e1 <- matrix(nrow = row1, ncol = length(s)) # empty resulting matrix
s1 <- sapply(s, function(j) rowSums(e[, 2:j])) # sum for each relevant i,j
s2 <- sapply(s, function(j) rowSums(e[, j:colc])) # sum for each relevant i,j
e2 <- as.matrix(e[, s]) # taking relevant columns of e
e1[s1 == 0] <- 0
e1[s1 > 0 & e2 == 0 & s2 > 0] <- 1
e1[s1 > 0 & e2 == 1 & s2 > 0] <- 0
我有一个数据库,我在其中 运行 根据同一行中先前单元格的总和和后续单元格的总和来更改单元格值的代码。
for (i in 1:row1)
{
for(j in 3:col-1)
{ # for-loop over columns
if (as.numeric(rowSums(e[i,2:j])) == 0 )
{
e1[i,j] <- 0
}
else if (as.numeric(rowSums(e[i,2:j])) > 0 && e[i,j] == 0 && as.numeric(rowSums(e[i,j:col])) > 0 )
{
e1[i,j] <- 1
}
else if (as.numeric(rowSums(e[i,2:j])) > 0 && e[i,j] == 1 && as.numeric(rowSums(e[i,j:col])) > 0 )
{
e1[i,j] <- 0
}
}
}
运行时间非常长。感谢任何提高速度的建议。附加信息:正在将新值复制到数据框中。
谢谢, 桑迪
编辑 2:
示例数据:
structure(list(`Sr no` = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19), `2018-01` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-02` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-03` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-04` = c(0,
0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2018-05` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0), `2018-06` = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0), `2018-07` = c(0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), `2018-08` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1), `2018-09` = c(0,
0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0), `2018-10` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), `2018-11` = c(0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), `2018-12` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2019-01` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2019-02` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-19L), class = c("tbl_df", "tbl", "data.frame"))
我认为您可以使用 matrix
逻辑来做到这一点。取决于您是否有足够的内存。
# creating fake data
# nc <- 300 # number of columns
nc <- 10 # for testing
nn <- 1e6 # rows
e <- sapply(1:nc, function(x) sample.int(2, nn, replace = T) - 1L)
e <- as.data.frame(e)
row1 <- nrow(e)
colc <- ncol(e)
# note that:
3:colc-1
# isnt equal with:
3:(colc-1)
s <- 3:(colc-1) # I assume you meant this
e1 <- matrix(nrow = row1, ncol = length(s)) # empty resulting matrix
s1 <- sapply(s, function(j) rowSums(e[, 2:j])) # sum for each relevant i,j
s2 <- sapply(s, function(j) rowSums(e[, j:colc])) # sum for each relevant i,j
e2 <- as.matrix(e[, s]) # taking relevant columns of e
e1[s1 == 0] <- 0
e1[s1 > 0 & e2 == 0 & s2 > 0] <- 1
e1[s1 > 0 & e2 == 1 & s2 > 0] <- 0