按组创建加起来为 1 的随机比率
Creating random ratios that add up to 1 by group
我有一个数据集如下:
panelID= c(1:50)
year= c(2005, 2010)
country = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
urban = c("A", "B", "C")
indust = c("D", "E", "F")
sizes = c(1,2,3,4,5)
n <- 2
library(AER)
library(data.table)
library(dplyr)
set.seed(123)
DT <- data.table( country = rep(sample(country, length(panelID), replace = T), each = n),
year = c(replicate(length(panelID), sample(year, n))),
sales= round(rnorm(10,10,10),2),
industry = rep(sample(indust, length(panelID), replace = T), each = n),
urbanisation = rep(sample(urban, length(panelID), replace = T), each = n),
size = rep(sample(sizes, length(panelID), replace = T), each = n))
DT <- DT %>%
group_by(country) %>%
mutate(base_rate = as.integer(runif(1, 12.5, 37.5))) %>%
group_by(country, year) %>%
mutate(taxrate = base_rate + as.integer(runif(1,-2.5,+2.5)))
DT <- DT %>%
group_by(country, year) %>%
mutate(vote = sample(c(0,1),1),
votewon = ifelse(vote==1, sample(c(0,1),1),0))
我想向这个名为 ratio
的数据集添加一个变量。我希望 ratio
是一个介于 0 和 1 之间的随机数,并且我希望按国家/地区划分的这些比率之和为 1。
我将如何创建这样的专栏?我唯一能想到的就是手动创建加起来为 1 的向量,然后从这些向量中采样。
编辑:国家/地区的条目不相等:
> table(DT$country)
A B C D E F G H I J
6 10 14 6 14 10 10 8 10 12
ratio_sample_6 <- c(0.1, 0.2, 0.3, 0.05, 0.15, 0.2)
DT[,ratio:=sample(ratio_sample_6, replace = FALSE), by="country"]
但即便如此我也无法开始工作。有什么建议吗?
选择随机数并按国家归一化:
## data.table version
DT[, ratio := runif(.N)][, ratio := ratio / sum(ratio), by = "country"]
## dplyr version
DT %>% group_by(country) %>%
mutate(
ratio = runif(n()),
ratio = ratio / sum(ratio)
)
我有一个数据集如下:
panelID= c(1:50)
year= c(2005, 2010)
country = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
urban = c("A", "B", "C")
indust = c("D", "E", "F")
sizes = c(1,2,3,4,5)
n <- 2
library(AER)
library(data.table)
library(dplyr)
set.seed(123)
DT <- data.table( country = rep(sample(country, length(panelID), replace = T), each = n),
year = c(replicate(length(panelID), sample(year, n))),
sales= round(rnorm(10,10,10),2),
industry = rep(sample(indust, length(panelID), replace = T), each = n),
urbanisation = rep(sample(urban, length(panelID), replace = T), each = n),
size = rep(sample(sizes, length(panelID), replace = T), each = n))
DT <- DT %>%
group_by(country) %>%
mutate(base_rate = as.integer(runif(1, 12.5, 37.5))) %>%
group_by(country, year) %>%
mutate(taxrate = base_rate + as.integer(runif(1,-2.5,+2.5)))
DT <- DT %>%
group_by(country, year) %>%
mutate(vote = sample(c(0,1),1),
votewon = ifelse(vote==1, sample(c(0,1),1),0))
我想向这个名为 ratio
的数据集添加一个变量。我希望 ratio
是一个介于 0 和 1 之间的随机数,并且我希望按国家/地区划分的这些比率之和为 1。
我将如何创建这样的专栏?我唯一能想到的就是手动创建加起来为 1 的向量,然后从这些向量中采样。
编辑:国家/地区的条目不相等:
> table(DT$country)
A B C D E F G H I J
6 10 14 6 14 10 10 8 10 12
ratio_sample_6 <- c(0.1, 0.2, 0.3, 0.05, 0.15, 0.2)
DT[,ratio:=sample(ratio_sample_6, replace = FALSE), by="country"]
但即便如此我也无法开始工作。有什么建议吗?
选择随机数并按国家归一化:
## data.table version
DT[, ratio := runif(.N)][, ratio := ratio / sum(ratio), by = "country"]
## dplyr version
DT %>% group_by(country) %>%
mutate(
ratio = runif(n()),
ratio = ratio / sum(ratio)
)