一种提取在 R 中采样的子数组名称的方法

A way to extract the name of subarray that was sampled in R

我有一个已构建的模拟,需要引用从中随机采样的给定子数组,以便使用 for 循环创建一些图。我不太确定如何着手完成这个...

这是我的代码:

K <- 2       # number of subarrays
N <- 100 
Hstar <- 10

perms <- 10000

p <- 0.95

specs <- 1:N 

pop <- array(dim = c(c(perms, N), K))

haps <- as.character(1:Hstar)

probs <- rep(1/Hstar, Hstar) 

for(j in 1:perms){
    for(i in 1:K){ 
        if(i == 1){
            pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
    }
        else{
            K1 <- c(1:5)
            K2 <- c(6:10)
            pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
            pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
        }
    }
}

HAC.mat <- array(dim = c(c(perms, N), K))

for(k in specs){
    for(j in 1:perms){
        for(i in 1:K){ 
            ind.index <- sample(specs, size = k, replace = FALSE)
            hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
            HAC.mat[j, k, i] <- length(unique(hap.plot))
    }
  }
}

means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))

par(mfrow = c(1, 2))

for(i in 1:K){
    if(i == 1){
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)

}
    else{
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(max(HAC.mat)*probs[K1], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
    }
}

问题出在我指出的最后一个else语句中

max(HAC.mat)*probs[K1]

以上,我只是以K1为例。然而,在现实中,我并不知道采样的是K1还是K2的哪一行。有没有一种方法可以通用地指定它?像 K[i]?

这样的东西

谢谢!如果需要更多说明,请告诉我。

您可以使用 getpaste0。我让你的例子有更少的排列,所以它完成得更快(因为它只是为了测试这个)并且我让 probs 不同所以你可以看到它有效。

K <- 2       # number of subarrays
N <- 10 
Hstar <- 10

perms <- 1000

p <- 0.95

specs <- 1:N 

pop <- array(dim = c(c(perms, N), K))

haps <- as.character(1:Hstar)

probs <- seq(1/Hstar, 1,.1) 

for(j in 1:perms){
  for(i in 1:K){ 
    if(i == 1){
      pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
    }
    else{
      K1 <- c(1:5)
      K2 <- c(6:10)
      pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
      pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
      print(paste("j is: ", j))
    }
  }
}

HAC.mat <- array(dim = c(c(perms, N), K))

for(k in specs){
  for(j in 1:perms){
    for(i in 1:K){ 
      ind.index <- sample(specs, size = k, replace = FALSE)
      hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
      HAC.mat[j, k, i] <- length(unique(hap.plot))
    }
  }
}

means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))

par(mfrow = c(1, 2))

for(i in 1:K){
  print(paste("i is:",i))
  print(paste("probs[...] is:", probs[get(paste0("K",i))]))
  if(i == 1){
    plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
    polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
    lines(specs, means, lwd = 2)
    HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)

  }
  else{
    plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
    polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
    lines(specs, means, lwd = 2)
    HAC.bar <- barplot(max(HAC.mat)*probs[get(paste0("K",i))], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
  }
}

我添加了打印语句,以便我们可以看到它有效:

[1] "i is: 1"
[1] "probs[...] is: 0.1" "probs[...] is: 0.2" "probs[...] is: 0.3" "probs[...] is: 0.4" "probs[...] is: 0.5"
[1] "i is: 2"
[1] "probs[...] is: 0.6" "probs[...] is: 0.7" "probs[...] is: 0.8" "probs[...] is: 0.9" "probs[...] is: 1"