使用限制来定义唯一标识

Question

Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)

People <- data.frame(Person, Age, Height, Weight)

Age_cats_type1 [5-15], [20-30], [35-45]
Age_cats_type2 [8-13], [14-16], [18-40]

Height_cat_Type1 [100-120], [121-140], [141-186]
Height_cat_type2 [110-125], [126-145], [146-190]

Weight_cat_Type1 [50-60], [61-78], [79-85]
Weight_cat_Type2 [55-75], [76-90], [91-100]

对于人物[1,2]（年龄=10），这适合 Age_cats_type1==1 和 Age_cats_type2==1。对于 People[1,3] (height=140)，这适合 Height_cat_Type1==2 和 Height_cat_Type2==2

现在我想为 (Age_cats_type1==1)|(Age_cats_type1==2)、(Height_cats_type1==1)|(Height_cats_type1==2)、(Weight_cats_type1==1)|(Weight_cats_type1==2).

的间隔的每个唯一可能结果创建一个 table

所需的输出应类似于下面的黄色图像。上面的table是对每个invterval

的可能性的总结

这与密切相关，但是当您按照 BrodieG 概述的此处使用的代码进行操作时，第三次迭代会出现错误。在本例中，我们在 data.table

中使用 foverlaps

我用过下面的代码

library(intervals)
# create our limits

AGE_cats_type1 <- Intervals(
  matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

AGE_cats_type2 <- Intervals(
  matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type1 <- Intervals(
  matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type2 <- Intervals(
  matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type1 <- Intervals(
  matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type2 <- Intervals(
  matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]

I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))

setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)

PEOPLE2 <- data.frame(rbind(
  foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
  foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))

####################################################
# second iteration for  height

PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]

I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))

setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)

PEOPLE4 <- data.frame(rbind(
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))

################################################
# third iteration

PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]

I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))

setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)

PEOPLE6 <- data.frame(rbind(
  foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))

但是在 PEOPLE6 中出现错误。

Error in setcolorder(ans, c(xcols1, ycols, xcols2)) : 
  neworder is length 16 but x has 18 columns.

当我查看 PEOPLE4 时，我们看到 idX idY idxA 和 idyA 是 Age_cats_type1、Age_cats_type2、Height_cat_Type1 和 Height_cat_Type2 值

Answer 1

你的问题的表述有一些问题。让我们尝试重构您的意思。

# Let's first construct the data
Person <- c(1,2,3)
Age    <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)

People <- data.frame(Person, Age, Height, Weight)

# Results in something like this:
#   Person Age Height Weight
# 1      1  10    140     65
# 2      2  22    185     80
# 3      3  30    160     75

# Now we want to represent ranges. One way to do it would be:

Age_cats_type1 <- list(c(5, 15), c(20, 30), c(35, 45))
Age_cats_type2 <- list(c(8, 13), c(14, 16), c(18, 40))

Height_cat_Type1 <- list(c(100, 120), c(121, 140), c(141, 186))
Height_cat_Type2 <- list(c(110, 125), c(126, 145), c(146, 190))

Weight_cat_Type1 <- list(c(50, 60), c(61, 78), c(79, 85))
Weight_cat_Type2 <- list(c(55, 75), c(76, 90), c(91, 100))

# Then you mentioned something like People[1,1] meant age == 10.
# I believe you made a mistake here. If you type People[1, 1] in the console,
# you'll find you that it returns Person == 1. Therefore, I think
# that you meant to say the People data frame was constructed without the Person vector.

People <- data.frame(Age, Height, Weight)

# Now People[1, 1] returns age == 10.

# Then you went on to say that you wanted some function that returned Age_cats_type == 1
# Well, it seems that you want the first element of the list of ranges that contains the specified value.
# Then let's build it

contains_value <- function(range, value) {
    lower <- range[1]
    upper <- range[2]
    lower <= value && value <= upper
}

range_index <- function(ranges_list, value){
    which(sapply(ranges_list, contains_value, value))[1]
}

range_index(Age_cats_type1, People[1, 1]) # 1
range_index(Age_cats_type2, People[1, 1]) # 1
range_index(Height_cat_Type1, People[1, 2]) # 2
range_index(Height_cat_Type2, People[1, 2]) # 2

# Now I didn't understand what the table you were trying to construct was, but maybe these functions will help you build it.

使用限制来定义唯一标识

using limits to define unique identities

r

dataframe

dplyr