循环遍历 R 中的列
Loop through the columns in R
我正在尝试计算每列中出现的 'S' 的数量,从 1 到 10 行显示为 'downstream',然后从 15 到 25 显示为 'upstream'。
然后我想将输出保存在一个文本文件中。
好吧,我设法解决了一个例子。不幸的是,我在循环遍历要保存的列时遇到了问题。在本例中,列数为 5,但可能因文件而异。
#data frame
S <- data.frame(scale = c(0, 0, 0, 0, 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),
aa = c('A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'))
#input (example)
V1 V2 V3 V4 V5
1 C D E R N
2 C A M K P
3 V T Q Q E
4 A T S S S
5 C D E R N
6 C A M K P
7 V T Q Q E
8 A T S S S
9 R V D S A
10 W R H I C
11 S N I P T
12 Q A S D E
13 C D E R N
14 C A M K P
15 V T Q Q E
16 A T S S S
17 C D E R N
18 C A M K P
19 V T Q Q E
20 A T S S S
21 R V D S A
22 W R H I C
23 S N I P T
24 G A D S S
25 N T T S A
# matching the data from two data frames
df11 <- df_trial %>%
pivot_longer(cols = everything(), values_to = 'aa') %>%
mutate(aa = replace(aa, aa == '-', '')) %>%
left_join(S, by = 'aa') %>%
arrange(name) %>%
group_by(name) %>%
mutate(row = row_number())
view(df11)
values_for_all <- df11 %>%
pivot_wider(names_from = name, values_from = c(scale, aa)) %>%
select(-row)
view(values_for_all)
#class(values_for_all)
#循环遍历此处列的问题:!!!!!!!!!!!!!!!!!!!
#sum values from positions 1 to 10 and then from 15 to 25
downstream <- sum(values_for_all$scale_V1[1:11])
#view(downstream)
upstream <- sum(values_for_all$scale_V1[15:25])
#view(upstream)
res <- cbind(downstream,upstream)
res_trial<- as.data.frame(t(res))
view(res_trial)
#class(res_trial)
#converting a matrix to the data frame
res_final <- as.data.frame(t(res_trial))
view(res_final)
#class(res_final)
#saving to a text file
write.table(res_final,"~/Desktop/R_work/test.txt",sep="\t",row.names=FALSE)
#expected outcome (example):
downstream upstream
2 0
0 0
感谢您的帮助!
我们可以在对行进行子集化后使用colSums
colSums(df_trial[1:10,] == 'S')
# V1 V2 V3 V4 V5
# 0 0 2 3 2
colSums(df_trial[15:25,] == 'S')
# V1 V2 V3 V4 V5
#1 0 2 5 3
或 dplyr
library(dplyr)
df_trial %>%
summarise(across(everything(), ~ c(sum(.[1:10] == 'S'),
sum(.[15:25] == 'S')))) %>%
mutate(categ = c('upstream', 'downstream'), .before = 1)
# categ V1 V2 V3 V4 V5
#1 upstream 0 0 2 3 2
#2 downstream 1 0 2 5 3
数据
df_trial <- structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A",
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R",
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T",
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T",
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M",
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q",
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R",
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K",
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S",
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N",
"P", "E", "S", "A", "C", "T", "S", "A")), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"))
这个有用吗:
> S_count <- data.frame(downstream = sum(sapply(S[1:10,], function(x) sum(grepl('S',x)))),
+ upstream = sum(sapply(S[15:25,], function(x) sum(grepl('S',x)))), stringsAsFactors = F)
> S_count
downstream upstream
1 7 11
>
矩阵形式:
> S_count_vec <- rbind(downstream = sapply(S[1:10,], function(x) sum(grepl('S',x))),
+ upstream = sapply(S[15:25,], function(x) sum(grepl('S',x))))
> S_count_vec
V1 V2 V3 V4 V5
downstream 0 0 2 3 2
upstream 1 0 2 5 3
>
使用的数据:
> dput(S)
structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A",
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R",
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T",
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T",
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M",
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q",
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R",
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K",
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S",
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N",
"P", "E", "S", "A", "C", "T", "S", "A")), row.names = c(NA, -25L
), class = c("tbl_df", "tbl", "data.frame"))
我正在尝试计算每列中出现的 'S' 的数量,从 1 到 10 行显示为 'downstream',然后从 15 到 25 显示为 'upstream'。
然后我想将输出保存在一个文本文件中。 好吧,我设法解决了一个例子。不幸的是,我在循环遍历要保存的列时遇到了问题。在本例中,列数为 5,但可能因文件而异。
#data frame
S <- data.frame(scale = c(0, 0, 0, 0, 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0),
aa = c('A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'))
#input (example)
V1 V2 V3 V4 V5
1 C D E R N
2 C A M K P
3 V T Q Q E
4 A T S S S
5 C D E R N
6 C A M K P
7 V T Q Q E
8 A T S S S
9 R V D S A
10 W R H I C
11 S N I P T
12 Q A S D E
13 C D E R N
14 C A M K P
15 V T Q Q E
16 A T S S S
17 C D E R N
18 C A M K P
19 V T Q Q E
20 A T S S S
21 R V D S A
22 W R H I C
23 S N I P T
24 G A D S S
25 N T T S A
# matching the data from two data frames
df11 <- df_trial %>%
pivot_longer(cols = everything(), values_to = 'aa') %>%
mutate(aa = replace(aa, aa == '-', '')) %>%
left_join(S, by = 'aa') %>%
arrange(name) %>%
group_by(name) %>%
mutate(row = row_number())
view(df11)
values_for_all <- df11 %>%
pivot_wider(names_from = name, values_from = c(scale, aa)) %>%
select(-row)
view(values_for_all)
#class(values_for_all)
#循环遍历此处列的问题:!!!!!!!!!!!!!!!!!!!
#sum values from positions 1 to 10 and then from 15 to 25
downstream <- sum(values_for_all$scale_V1[1:11])
#view(downstream)
upstream <- sum(values_for_all$scale_V1[15:25])
#view(upstream)
res <- cbind(downstream,upstream)
res_trial<- as.data.frame(t(res))
view(res_trial)
#class(res_trial)
#converting a matrix to the data frame
res_final <- as.data.frame(t(res_trial))
view(res_final)
#class(res_final)
#saving to a text file
write.table(res_final,"~/Desktop/R_work/test.txt",sep="\t",row.names=FALSE)
#expected outcome (example):
downstream upstream
2 0
0 0
感谢您的帮助!
我们可以在对行进行子集化后使用colSums
colSums(df_trial[1:10,] == 'S')
# V1 V2 V3 V4 V5
# 0 0 2 3 2
colSums(df_trial[15:25,] == 'S')
# V1 V2 V3 V4 V5
#1 0 2 5 3
或 dplyr
library(dplyr)
df_trial %>%
summarise(across(everything(), ~ c(sum(.[1:10] == 'S'),
sum(.[15:25] == 'S')))) %>%
mutate(categ = c('upstream', 'downstream'), .before = 1)
# categ V1 V2 V3 V4 V5
#1 upstream 0 0 2 3 2
#2 downstream 1 0 2 5 3
数据
df_trial <- structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A",
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R",
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T",
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T",
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M",
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q",
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R",
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K",
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S",
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N",
"P", "E", "S", "A", "C", "T", "S", "A")), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"))
这个有用吗:
> S_count <- data.frame(downstream = sum(sapply(S[1:10,], function(x) sum(grepl('S',x)))),
+ upstream = sum(sapply(S[15:25,], function(x) sum(grepl('S',x)))), stringsAsFactors = F)
> S_count
downstream upstream
1 7 11
>
矩阵形式:
> S_count_vec <- rbind(downstream = sapply(S[1:10,], function(x) sum(grepl('S',x))),
+ upstream = sapply(S[15:25,], function(x) sum(grepl('S',x))))
> S_count_vec
V1 V2 V3 V4 V5
downstream 0 0 2 3 2
upstream 1 0 2 5 3
>
使用的数据:
> dput(S)
structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A",
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R",
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T",
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T",
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M",
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q",
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R",
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K",
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S",
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N",
"P", "E", "S", "A", "C", "T", "S", "A")), row.names = c(NA, -25L
), class = c("tbl_df", "tbl", "data.frame"))