嵌套 for 循环 - 标记-重新捕获
Nested for loop - mark-recapture
感谢您花时间阅读本文。
下面的代码创建了一个图表,其中包含 100 个介于 5% 之间的样本
和 15% 的人口 (400)。
但是,我想要做的是向图中添加另外两个部分。它
看起来像这样:
from 1-100 samples 取 100 个样本在 5% 到 15% 之间
人口(400)。从 101-200 取 100 个样本,样本比例在 5% 到 15% 之间
人口(800)。从 201-300 中抽取 100 个样本,这些样本在 5% 之间
和 15% 的人口 (300)。
我假设这需要一个嵌套的 for 循环。有没有人有建议
如何做到这一点?
感谢您的宝贵时间。克尔斯滕
N <- 400
pop <- c(1:N)
lower.bound <- round(x = .05 * N, digits = 0)
lower.bound ## Smallest possible sample size
upper.bound <- round(x = .15 * N, digits = 0)
upper.bound ## Largest possible sample size
length.ss.interval <- length(c(lower.bound:upper.bound))
length.ss.interval ## total possible sample sizes, ranging form lower.bound
to upper.bound
sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval, length.ss.interval)))
n.samples <- 100
dat <- matrix(data = NA,
nrow = length(pop),
ncol = n.samples + 1)
dat[,1] <- pop
for(i in 2:ncol(dat)) {
a.sample <- sample(x = pop,
size = sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval,
length.ss.interval))),
replace = FALSE)
dat[,i] <- dat[,1] %in% a.sample
}
schnabel.comp <- data.frame(sample = 1:n.samples,
n.sampled = apply(X = dat, MARGIN = 2, FUN =
sum)[2:length(apply(X = dat, MARGIN = 2, FUN = sum))]
)
n.prev.sampled <- c(0, rep(NA, n.samples-1))
n.prev.sampled
n.prev.sampled[2] <- sum(ifelse(test = dat[,3] == 1 & dat[,2] == 1,
yes = 1,
no = 0))
for(i in 4:ncol(dat)) {
n.prev.sampled[i-1] <- sum(ifelse(test = dat[,i] == 1 &
rowSums(dat[,2:(i-1)]) > 0,
yes = 1,
no = 0))
}
schnabel.comp$n.prev.sampled <- n.prev.sampled
schnabel.comp$n.newly.sampled <- with(schnabel.comp,
n.sampled - n.prev.sampled)
schnabel.comp$cum.sampled <- c(0,
cumsum(schnabel.comp$n.newly.sampled)[2:n.samples-1])
schnabel.comp$numerator <- with(schnabel.comp,
n.sampled * cum.sampled)
schnabel.comp$pop.estimate <- NA
for(i in 1:length(schnabel.comp$pop.estimate)) {
schnabel.comp$pop.estimate[i] <- sum(schnabel.comp$numerator[1:i]) /
sum(schnabel.comp$n.prev.sampled[1:i])
}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("scales")) {install.packages("scales"); require("scales")}
small.sample.dat <- schnabel.comp
small.sample <- ggplot(data = small.sample.dat,
mapping = aes(x = sample, y = pop.estimate)) +
geom_point(size = 2) +
geom_line() +
geom_hline(yintercept = N, col = "red", lwd = 1) +
coord_cartesian(xlim = c(0:100), ylim = c(300:500)) +
scale_x_continuous(breaks = pretty_breaks(11)) +
scale_y_continuous(breaks = pretty_breaks(11)) +
labs(x = "\nSample", y = "Population estimate\n",
title = "Sample sizes are between 5% and 15%\nof the population") +
theme_bw(base_size = 12) +
theme(aspect.ratio = 1)
我的想法是使用以下代码创建一个嵌套的 ifelse 语句:
N.2 <- 800
N.3 <- 300
pop.2 <- c(401:N.2)
pop.3 <- c(801:N)
lower.bound.2 <- round(x = .05 * N.2, digits = 0)
upper.bound.2 <- round(x = .15 * N.2, digits = 0)
lower.bound.3 <- round(x = .05 * N.3, digits = 0)
upper.bound.3 <- round(x = .15 * N.3, digits = 0)
也许是...的一些排列
dat <- imatrix(ifelse(n.samples ,= 100),
yes = nrow = length(pop),
no = ifelse(n.samples > 100 & > 201),
yes = nrow = length(pop.2),
no = nrow = length(pop.3),
ncol = n.samples + 1)
这是否符合您的要求?我在下面编写的函数 mark_recapture
接受四个参数(样本数、样本的下限和上限以及总体规模),并输出一个矩阵,其中行代表总体中的个体,列代表样本.如果在给定的样本中捕获了一个个体,则它得到 1,否则它得到 0。定义函数后,您可以 运行 它 3 次,具有 3 个不同的种群大小,以获得 3 个不同的矩阵。
# define variables
num_samp <- 100
lower_sampsize <- 0.05
upper_sampsize <- 0.15
# define sampling function that outputs matrix
mark_recapture <- function (num_samp, pop_size, lower_sampsize, upper_sampsize) {
# empty matrix
mat <- matrix(0, pop_size, num_samp)
# min and max sample size
min <- ceiling(lower_sampsize*pop_size)
max <- floor(upper_sampsize*pop_size)
# vector of random sample sizes between min and max
samp_sizes <- sample(min:max, num_samp, replace=TRUE)
# draw the samples and fill in the matrix
for (i in 1:num_samp) {mat[sample(1:pop_size, samp_sizes[i]),i] <- 1}
# return matrix
return(mat)
}
# do the sampling from the 3 populations
mat1 <- mark_recapture(num_samp=num_samp, pop_size=400, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
mat2 <- mark_recapture(num_samp=num_samp, pop_size=800, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
mat3 <- mark_recapture(num_samp=num_samp, pop_size=300, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
虽然超出了这个问题的范围,但我只是提一下,有专门的R包来分析和模拟标记-重新捕获数据,例如multimark。只需 Google "CRAN mark recapture",您就会发现许多选项。我建议仔细阅读这些内容并仔细考虑您要在这里实现的目标,因为您可能正在尝试重新发明轮子。
感谢您花时间阅读本文。
下面的代码创建了一个图表,其中包含 100 个介于 5% 之间的样本 和 15% 的人口 (400)。
但是,我想要做的是向图中添加另外两个部分。它 看起来像这样:
from 1-100 samples 取 100 个样本在 5% 到 15% 之间 人口(400)。从 101-200 取 100 个样本,样本比例在 5% 到 15% 之间 人口(800)。从 201-300 中抽取 100 个样本,这些样本在 5% 之间 和 15% 的人口 (300)。
我假设这需要一个嵌套的 for 循环。有没有人有建议 如何做到这一点?
感谢您的宝贵时间。克尔斯滕
N <- 400
pop <- c(1:N)
lower.bound <- round(x = .05 * N, digits = 0)
lower.bound ## Smallest possible sample size
upper.bound <- round(x = .15 * N, digits = 0)
upper.bound ## Largest possible sample size
length.ss.interval <- length(c(lower.bound:upper.bound))
length.ss.interval ## total possible sample sizes, ranging form lower.bound
to upper.bound
sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval, length.ss.interval)))
n.samples <- 100
dat <- matrix(data = NA,
nrow = length(pop),
ncol = n.samples + 1)
dat[,1] <- pop
for(i in 2:ncol(dat)) {
a.sample <- sample(x = pop,
size = sample(x = c(lower.bound:upper.bound),
size = 1,
prob = c(rep(1/length.ss.interval,
length.ss.interval))),
replace = FALSE)
dat[,i] <- dat[,1] %in% a.sample
}
schnabel.comp <- data.frame(sample = 1:n.samples,
n.sampled = apply(X = dat, MARGIN = 2, FUN =
sum)[2:length(apply(X = dat, MARGIN = 2, FUN = sum))]
)
n.prev.sampled <- c(0, rep(NA, n.samples-1))
n.prev.sampled
n.prev.sampled[2] <- sum(ifelse(test = dat[,3] == 1 & dat[,2] == 1,
yes = 1,
no = 0))
for(i in 4:ncol(dat)) {
n.prev.sampled[i-1] <- sum(ifelse(test = dat[,i] == 1 &
rowSums(dat[,2:(i-1)]) > 0,
yes = 1,
no = 0))
}
schnabel.comp$n.prev.sampled <- n.prev.sampled
schnabel.comp$n.newly.sampled <- with(schnabel.comp,
n.sampled - n.prev.sampled)
schnabel.comp$cum.sampled <- c(0,
cumsum(schnabel.comp$n.newly.sampled)[2:n.samples-1])
schnabel.comp$numerator <- with(schnabel.comp,
n.sampled * cum.sampled)
schnabel.comp$pop.estimate <- NA
for(i in 1:length(schnabel.comp$pop.estimate)) {
schnabel.comp$pop.estimate[i] <- sum(schnabel.comp$numerator[1:i]) /
sum(schnabel.comp$n.prev.sampled[1:i])
}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("scales")) {install.packages("scales"); require("scales")}
small.sample.dat <- schnabel.comp
small.sample <- ggplot(data = small.sample.dat,
mapping = aes(x = sample, y = pop.estimate)) +
geom_point(size = 2) +
geom_line() +
geom_hline(yintercept = N, col = "red", lwd = 1) +
coord_cartesian(xlim = c(0:100), ylim = c(300:500)) +
scale_x_continuous(breaks = pretty_breaks(11)) +
scale_y_continuous(breaks = pretty_breaks(11)) +
labs(x = "\nSample", y = "Population estimate\n",
title = "Sample sizes are between 5% and 15%\nof the population") +
theme_bw(base_size = 12) +
theme(aspect.ratio = 1)
我的想法是使用以下代码创建一个嵌套的 ifelse 语句:
N.2 <- 800
N.3 <- 300
pop.2 <- c(401:N.2)
pop.3 <- c(801:N)
lower.bound.2 <- round(x = .05 * N.2, digits = 0)
upper.bound.2 <- round(x = .15 * N.2, digits = 0)
lower.bound.3 <- round(x = .05 * N.3, digits = 0)
upper.bound.3 <- round(x = .15 * N.3, digits = 0)
也许是...的一些排列
dat <- imatrix(ifelse(n.samples ,= 100),
yes = nrow = length(pop),
no = ifelse(n.samples > 100 & > 201),
yes = nrow = length(pop.2),
no = nrow = length(pop.3),
ncol = n.samples + 1)
这是否符合您的要求?我在下面编写的函数 mark_recapture
接受四个参数(样本数、样本的下限和上限以及总体规模),并输出一个矩阵,其中行代表总体中的个体,列代表样本.如果在给定的样本中捕获了一个个体,则它得到 1,否则它得到 0。定义函数后,您可以 运行 它 3 次,具有 3 个不同的种群大小,以获得 3 个不同的矩阵。
# define variables
num_samp <- 100
lower_sampsize <- 0.05
upper_sampsize <- 0.15
# define sampling function that outputs matrix
mark_recapture <- function (num_samp, pop_size, lower_sampsize, upper_sampsize) {
# empty matrix
mat <- matrix(0, pop_size, num_samp)
# min and max sample size
min <- ceiling(lower_sampsize*pop_size)
max <- floor(upper_sampsize*pop_size)
# vector of random sample sizes between min and max
samp_sizes <- sample(min:max, num_samp, replace=TRUE)
# draw the samples and fill in the matrix
for (i in 1:num_samp) {mat[sample(1:pop_size, samp_sizes[i]),i] <- 1}
# return matrix
return(mat)
}
# do the sampling from the 3 populations
mat1 <- mark_recapture(num_samp=num_samp, pop_size=400, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
mat2 <- mark_recapture(num_samp=num_samp, pop_size=800, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
mat3 <- mark_recapture(num_samp=num_samp, pop_size=300, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize)
虽然超出了这个问题的范围,但我只是提一下,有专门的R包来分析和模拟标记-重新捕获数据,例如multimark。只需 Google "CRAN mark recapture",您就会发现许多选项。我建议仔细阅读这些内容并仔细考虑您要在这里实现的目标,因为您可能正在尝试重新发明轮子。