将一组命令转换为用于在 R 中操作数据帧的函数

Coverting a set of commands into a function for manipulating dataframe in R

我的数据框 (DF1) 如下所示:-

[代表,DF1][1]:https://i.stack.imgur.com/wZjzR.png

对第 1 行 (MED1) 执行以下操作:

MED1 <- data_frame_merge[1,]
rownames(MED1) <- NULL

MED1 <- t(MED1)
MED1 <- as.data.frame(MED1)
MED1 <- tibble::rownames_to_column(MED1, "Fusion_Type")

MED1$Fusion_Type <- gsub("\..*", "", MED1$Fusion_Type)
MED1$Fusion_Type <- as.factor(MED1$Fusion_Type)

names(MED1)[names(MED1) == "V1"] <- "TPM"

我明白了:

[MED1, DF] [2]: https://i.stack.imgur.com/hhMf6.png

基本上,我从第一个数据帧中提取第一行并将其转换为一个数据帧,以某种方式看起来(见上图)。

我的问题是,如何将我对 MED1 所做的转换为一个函数,以便我可以对第一个数据框中的所有行执行此转换。

我多次尝试使用 for 循环和函数,但总是 运行 出错。

我尝试了下面的代码,但我很确定它充满了错误:

lsEOG<-list()

for (i in 1:nrow(df_main)) {
  rownames(df)[i] <- df[i]
  df[i] <- df[i]
  rownames(df)[i] <- NULL
  df[i] <- t(df[i])
  df[i] <- as.data.frame(df[i])
  df[i] <- tibble::rownames_to_column(df[i], "Fusion_Type")
  df[i]$Fusion_Type <- gsub("\..*", "", df[i]$Fusion_Type)
  df[i]$Fusion_Type <- as.factor(df[i]$Fusion_Type)
  names(df[i])[names(df[i]) == "V1"] <- "TPM"
  lsEOG(df[i])<- df[i]
}

行名错误 (df)[i] <- NULL:替换的长度为零(以及许多其他)

你能帮我把代码变成函数来解决这个问题吗?

所以这将转向下面,

MED1 <- data_frame_merge[1,]
rownames(MED1) <- NULL

MED1 <- t(MED1)
MED1 <- as.data.frame(MED1)
MED1 <- tibble::rownames_to_column(MED1, "Fusion_Type")

MED1$Fusion_Type <- gsub("\..*", "", MED1$Fusion_Type)
MED1$Fusion_Type <- as.factor(MED1$Fusion_Type)

names(MED1)[names(MED1) == "V1"] <- "TPM"

转换成一个函数?所以它可以应用于数据框的所有行并输出

试试这个:

library(tidyverse)

as_tibble(data_frame_merge,rownames = "MED") %>% 
  pivot_longer(cols = -MED, names_to = "Fusion_Type",values_to = "TMP") %>% 
  mutate(Fusion_Type = stringr::str_extract(Fusion_Type, "(?<=\d_).*$"))

输出:

# A tibble: 28 × 3
   MED   Fusion_Type   TMP
   <chr> <chr>       <dbl>
 1 MED1  PML_RARA     57.5
 2 MED1  PML_RARA    178. 
 3 MED1  PML_RARA     20.6
 4 MED1  PML_RARA    139. 
 5 MED10 PML_RARA    158. 
 6 MED10 PML_RARA    110. 
 7 MED10 PML_RARA    180. 
 8 MED10 PML_RARA    128. 
 9 MED11 PML_RARA     81.8
10 MED11 PML_RARA     91.3
# … with 18 more rows

输入:

data_frame_merge = structure(list(`TCGA-AB-2991_PML_RARA` = c(57.5155040249228, 
157.661027088761, 81.79538436234, 176.603480800986, 188.093456858769, 
9.11129987798631, 105.621097609401), `TCGA-AB-3012_PML_RARA` = c(178.483808878809, 
110.287002893165, 91.3229470606893, 191.366669069976, 90.6668312381953, 
135.514127090573, 114.526680391282), `TCGA-AB-2872_PML_RARA` = c(20.5849365331233, 
179.964994080365, 49.217546870932, 8.41190670616925, 65.5841438565403, 
190.900729829445, 177.907863212749), `TCGA-AB-2999_PML_RARA` = c(138.56068123132, 
128.101362753659, 198.853955324739, 131.141159823164, 141.706093633547, 
108.813204942271, 118.828404089436)), class = "data.frame", row.names = c("MED1", 
"MED10", "MED11", "MED12", "MED12L", "MED13", "MED13L"))

data_frame_merge 看起来像这样:

       TCGA-AB-2991_PML_RARA TCGA-AB-3012_PML_RARA TCGA-AB-2872_PML_RARA TCGA-AB-2999_PML_RARA
MED1                57.51550             178.48381             20.584937              138.5607
MED10              157.66103             110.28700            179.964994              128.1014
MED11               81.79538              91.32295             49.217547              198.8540
MED12              176.60348             191.36667              8.411907              131.1412
MED12L             188.09346              90.66683             65.584144              141.7061
MED13                9.11130             135.51413            190.900730              108.8132
MED13L             105.62110             114.52668            177.907863              118.8284