循环遍历 R 中的列

Loop through the columns in R

我正在尝试计算每列中出现的 'S' 的数量,从 1 到 10 行显示为 'downstream',然后从 15 到 25 显示为 'upstream'。

然后我想将输出保存在一个文本文件中。 好吧,我设法解决了一个例子。不幸的是,我在循环遍历要保存的列时遇到了问题。在本例中,列数为 5,但可能因文件而异。

#data frame
S <- data.frame(scale = c(0, 0, 0, 0, 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), 
                aa = c('A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'))

    #input  (example)
   V1  V2  V3  V4  V5 
1  C   D   E    R   N  
2  C   A   M    K   P
3  V   T   Q    Q   E 
4  A   T   S    S   S
5  C   D   E    R   N  
6  C   A   M    K   P
7  V   T   Q    Q   E 
8  A   T   S    S   S
9  R   V   D    S   A
10 W   R   H    I   C
11 S   N   I    P   T
12 Q   A   S    D   E
13 C   D   E    R   N  
14 C   A   M    K   P
15 V   T   Q    Q   E 
16 A   T   S    S   S
17 C   D   E    R   N  
18 C   A   M    K   P
19 V   T   Q    Q   E 
20 A   T   S    S   S
21 R   V   D    S   A
22 W   R   H    I   C
23 S   N   I    P   T
24 G   A   D    S   S 
25 N   T   T    S   A




# matching the data from two data frames
  df11 <- df_trial %>%
  pivot_longer(cols = everything(), values_to = 'aa') %>%
  mutate(aa = replace(aa, aa == '-', '')) %>%
  left_join(S, by = 'aa') %>%
  arrange(name) %>%  
  group_by(name) %>% 
  mutate(row = row_number())  
  view(df11)

  values_for_all <- df11 %>%
  pivot_wider(names_from = name, values_from = c(scale, aa)) %>% 
  select(-row) 
  view(values_for_all)
 #class(values_for_all)

#循环遍历此处列的问题:!!!!!!!!!!!!!!!!!!!

 #sum values from positions 1 to 10 and then from  15 to 25
    downstream <- sum(values_for_all$scale_V1[1:11])
    #view(downstream)
    upstream <- sum(values_for_all$scale_V1[15:25])
    #view(upstream)

res <- cbind(downstream,upstream) 
res_trial<- as.data.frame(t(res))
view(res_trial)
#class(res_trial)
#converting a matrix to the data frame
res_final <- as.data.frame(t(res_trial))
view(res_final)
#class(res_final)

#saving to a text file
write.table(res_final,"~/Desktop/R_work/test.txt",sep="\t",row.names=FALSE)

#expected outcome (example):

downstream   upstream
   2            0
   0            0

感谢您的帮助!

我们可以在对行进行子集化后使用colSums

colSums(df_trial[1:10,] == 'S')
# V1 V2 V3 V4 V5 
# 0  0  2  3  2 
colSums(df_trial[15:25,] == 'S')
# V1 V2 V3 V4 V5 
#1  0  2  5  3 

dplyr

library(dplyr)
df_trial %>%
       summarise(across(everything(), ~ c(sum(.[1:10] == 'S'), 
                                       sum(.[15:25] == 'S')))) %>%
       mutate(categ = c('upstream', 'downstream'), .before = 1)
#       categ V1 V2 V3 V4 V5
#1   upstream  0  0  2  3  2
#2 downstream  1  0  2  5  3

数据

df_trial <- structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A", 
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R", 
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T", 
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T", 
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M", 
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q", 
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R", 
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K", 
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S", 
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N", 
"P", "E", "S", "A", "C", "T", "S", "A")), class = "data.frame",
row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25"))

这个有用吗:

> S_count <- data.frame(downstream = sum(sapply(S[1:10,], function(x) sum(grepl('S',x)))),
+                       upstream = sum(sapply(S[15:25,], function(x) sum(grepl('S',x)))), stringsAsFactors = F)
> S_count
  downstream upstream
1          7       11
> 

矩阵形式:

> S_count_vec <- rbind(downstream = sapply(S[1:10,], function(x) sum(grepl('S',x))),
+                       upstream = sapply(S[15:25,], function(x) sum(grepl('S',x))))
> S_count_vec
           V1 V2 V3 V4 V5
downstream  0  0  2  3  2
upstream    1  0  2  5  3
> 

使用的数据:

> dput(S)
structure(list(V1 = c("C", "C", "V", "A", "C", "C", "V", "A", 
"R", "W", "S", "Q", "C", "C", "V", "A", "C", "C", "V", "A", "R", 
"W", "S", "G", "N"), V2 = c("D", "A", "T", "T", "D", "A", "T", 
"T", "V", "R", "N", "A", "D", "A", "T", "T", "D", "A", "T", "T", 
"V", "R", "N", "A", "T"), V3 = c("E", "M", "Q", "S", "E", "M", 
"Q", "S", "D", "H", "I", "S", "E", "M", "Q", "S", "E", "M", "Q", 
"S", "D", "H", "I", "D", "T"), V4 = c("R", "K", "Q", "S", "R", 
"K", "Q", "S", "S", "I", "P", "D", "R", "K", "Q", "S", "R", "K", 
"Q", "S", "S", "I", "P", "S", "S"), V5 = c("N", "P", "E", "S", 
"N", "P", "E", "S", "A", "C", "T", "E", "N", "P", "E", "S", "N", 
"P", "E", "S", "A", "C", "T", "S", "A")), row.names = c(NA, -25L
), class = c("tbl_df", "tbl", "data.frame"))