循环数据帧以创建新变量

Question

我有 30 个数据帧，我需要根据一些条件在每个数据帧中创建一个新变量。我正在尝试做一个 for 循环，但我没有得到它。我在这里搜索了一些例子，但我做得太过分了。有什么帮助吗？

我所做的是：

dflist <- c("chr1", "chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10",
            "chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18",
            "chr19","chr20","chr21","chr22","chr23","chr24","chr25","chr26","chr27",
            "chr28","chr29","chr30")


for (df in dflist){
for (i in 1:length(df)){
df[i,]$Dist2 =   ifelse(df[i,]$Dist <= 10000,1, 
            ifelse(df[i,]$Dist > 10000 & df[i,]$Dist <= 20000 ,2,
            ifelse(df[i,]$Dist > 20000 & df[i,]$Dist <= 30000 ,3,
        ifelse(df[i,]$Dist > 30000 & df[i,]$Dist <= 40000 ,4,
        ifelse(df[i,]$Dist > 40000 & df[i,]$Dist <= 50000 ,5,
        ifelse(df[i,]$Dist > 50000 & df[i,]$Dist <= 60000 ,6,
        ifelse(df[i,]$Dist > 60000 & df[i,]$Dist <= 70000 ,7,
        ifelse(df[i,]$Dist > 70000 & df[i,]$Dist <= 80000 ,8,
        ifelse(df[i,]$Dist > 80000 & df[i,]$Dist <= 90000 ,9,
        ifelse(df[i,]$Dist > 90000 & df[i,]$Dist <= 100000 ,10,NA))))))))))}}

每个文件如下所示：

     Chr   SNP1   SNP2  Dist  Sign   r2
1     26 507478 507479  9727    + 0.789
2     26 507478 507480 13907    - 0.093
3     26 507478 507481 23618    - 0.002
4     26 507478 507482 59349    - 0.245
5     26 507478 507483 62804    + 0.266
6     26 507478 507484 65323    + 0.029

非常感谢。干杯。宝拉

Answer 1

我们可以使用 cut。我们将 data.frames 保留在 list ('lst') 中，通过 lapply 和 transform 循环 data.frame 通过 list使用 cut.

创建另一列 'Dist2'

lapply(lst, transform, Dist2= cut(Dist, 
      breaks=c(-Inf,seq(1e4, 1e5, by=1e4), Inf), labels=c(1:10, NA)))
#[[1]]
#  Chr   SNP1   SNP2   Dist Sign    r2 Dist2
#1  26 507478 507479 123300    + 0.789  <NA>
#2  26 507478 507480  13907    - 0.093     2
#3  26 507478 507481  23618    - 0.002     3
#4  26 507478 507482  59349    - 0.245     6
#5  26 507478 507483  62804    + 0.266     7
#6  26 507478 507484  65323    + 0.029     7

#[[2]]
#  Chr   SNP1   SNP2  Dist Sign    r2 Dist2
#1  26 507478 507479  9727    + 0.789     1
#2  26 507478 507480 13907    - 0.093     2
#3  26 507478 507481 23618    - 0.002     3
#4  26 507478 507482 59349    - 0.245     6
#5  26 507478 507483 62804    + 0.266     7
#6  26 507478 507484 65323    + 0.029     7

数据

lst <- list(structure(list(Chr = c(26L, 26L, 26L, 26L, 26L, 26L),
SNP1 = c(507478L, 
507478L, 507478L, 507478L, 507478L, 507478L), SNP2 = 507479:507484, 
    Dist = c(123300, 13907, 23618, 59349, 62804, 65323), Sign = c("+", 
    "-", "-", "-", "+", "+"), r2 = c(0.789, 0.093, 0.002, 0.245, 
    0.266, 0.029)), .Names = c("Chr", "SNP1", "SNP2", "Dist", 
"Sign", "r2"), row.names = c("1", "2", "3", "4", "5", "6"),
 class = "data.frame"), 
    structure(list(Chr = c(26L, 26L, 26L, 26L, 26L, 26L), SNP1 = c(507478L, 
    507478L, 507478L, 507478L, 507478L, 507478L), SNP2 = 507479:507484, 
        Dist = c(9727L, 13907L, 23618L, 59349L, 62804L, 65323L
        ), Sign = c("+", "-", "-", "-", "+", "+"), r2 = c(0.789, 
        0.093, 0.002, 0.245, 0.266, 0.029)), .Names = c("Chr", 
    "SNP1", "SNP2", "Dist", "Sign", "r2"), class = "data.frame",
    row.names = c("1", 
    "2", "3", "4", "5", "6")))

Answer 2

使用 akrun 的回答中的 lst 数据，这是使用 ceiling()

的另一种可能方法

lapply(lst, function(x) {
    ## divide 'Dist' by 10,000 and push to the next integer
    y <- ceiling(x$Dist / 1e4)
    ## replace the values over 10 with NA
    is.na(y) <- y > 10
    ## bind the data to the new vector
    cbind(x, Dist2 = y)
})
# [[1]]
#   Chr   SNP1   SNP2   Dist Sign    r2 Dist2
# 1  26 507478 507479 123300    + 0.789    NA
# 2  26 507478 507480  13907    - 0.093     2
# 3  26 507478 507481  23618    - 0.002     3
# 4  26 507478 507482  59349    - 0.245     6
# 5  26 507478 507483  62804    + 0.266     7
# 6  26 507478 507484  65323    + 0.029     7
#
# [[2]]
#   Chr   SNP1   SNP2  Dist Sign    r2 Dist2
# 1  26 507478 507479  9727    + 0.789     1
# 2  26 507478 507480 13907    - 0.093     2
# 3  26 507478 507481 23618    - 0.002     3
# 4  26 507478 507482 59349    - 0.245     6
# 5  26 507478 507483 62804    + 0.266     7
# 6  26 507478 507484 65323    + 0.029     7

循环数据帧以创建新变量

Loop over dataframes to create new variable

loops

for-loop

r

lapply

数据