使用 lapply 时强制表格具有相同的维度
Forcing tables to have equal dimensions while using lapply
我正在尝试绘制 150 个人的状态间转换频率与次数的关系图。为此,使用 lapply 来为每个人生成转换表。但是,由于有些人只有很少的转换或根本没有转换,因此生成的表在 rows/columns 的数量上彼此不同。因此,当我尝试提取转换数以绘制它们时,出现 "incorrect number of dimensions" 错误。我看过 this 个帖子,但我没有成功将解决方案应用到我的案例中。
这是我当前的代码:
n_STATEs <- 4
data <- read.csv("transitiondata.csv")
transitions <- by(data,data$ID,
function(xx)data.frame(ID=head(xx$ID,-1),
TIME=tail(xx$TIME,-1),
FROM=head(xx$STATE,-1),
TO=tail(xx$STATE,-1)))
transition_table <- lapply(transitions,function(xx)with(xx,table(FROM,TO)))
min_n_transitions <- min(unlist(transition_table))
max_n_transitions <- max(unlist(transition_table))
max_freq <- 150
par(mfrow=rep(n_STATEs,2),mai=c(.4,.4,.4,.1))
for ( from in 1:n_STATEs ) {
for ( to in 1:n_STATEs ) {
sapply(transition_table,"[",from,to)
hist(foo,freq=TRUE,
xlim=c(min_n_transitions,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
}
}
Here is the dataset. I have also tried getting the transition numbers without using lapply (see the answer in my previous thread) 但这种方法还计算了一个人内最后一个时间点和第一个时间点之间的转换,这没有任何意义。
提前致谢!
编辑:代码已修复。
另一种方法是。错误消息消失了,但是,可能是因为行和列的排序不同,转换计数显然是错误的。当前代码如下所示:
n_STATEs <- 4
data <- read.csv("transitiondata.csv")
transitions <- by(data,data$ID,
function(xx)data.frame(ID=head(xx$ID,-1),
TIME=tail(xx$TIME,-1),
FROM=head(xx$STATE,-1),
TO=tail(xx$STATE,-1)))
transition_table <- lapply(transitions,function(xx)with(xx,table(FROM,TO)))
cols <- unique(unlist(sapply(transition_table, colnames)))
rows <- unique(unlist(sapply(transition_table, rownames)))
result <- lapply(transition_table, function(m) {
missingrows <- setdiff(rows, rownames(m))
missingcols <- setdiff(cols, colnames(m))
rbind(cbind(m,
structure(matrix(0, nrow=nrow(m), ncol=length(missingcols)),
dimnames=list(NULL, missingcols))),
structure(matrix(0, nrow=length(missingrows), ncol=length(cols)),
dimnames=list(missingrows)))
})
min_n_transitions <- min(unlist(result))
max_n_transitions <- max(unlist(result))
max_freq <- 150
par(mfrow=rep(n_STATEs,2),mai=c(.4,.4,.4,.1))
for ( from in 1:n_STATEs ) {
for ( to in 1:n_STATEs ) {
sapply(result,"[",from,to)
hist(foo,freq=TRUE,
xlim=c(min_n_transitions,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
}
}
还有其他可能吗?
我找到了替代解决方案。使用 this 代码,我还获得了一个 ID 中最后一个时间点和第一个时间点之间的转换计数。但是,使用 arrange
和 subset
我可以摆脱这种奇怪的行为。当前代码如下所示:
transitions <- data.frame(ID=head(data$ID,-1),
TIME=tail(data$TIME, -1),FROM=head(data$STATE,-1),
TO=tail(data$STATE,-1))
tran1<-arrange(transitions, ID, TIME)
tran2<-subset(tran1, TIME!=0)
transition_table <- with(tran2,table(FROM,TO,ID))
min_n_transitions <- min(unlist(transition_table))
max_n_transitions <- max(unlist(transition_table))
max_freq <- 150
bins <- c(-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5)
par(mfrow=rep(4,2),mai=c(.3,.3,.3,.1))
for ( from in 1:4 ) {
for ( to in 1:4 ) {
foo <- transition_table[from,to,]
hist(foo,freq=TRUE,
breaks=bins,
xlim=c(-.5,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
mtext('Number of transitions', side = 1, outer = TRUE, line = 2)
mtext('Frequency', side = 2, outer = TRUE, line = 1.5)
}
}
我正在尝试绘制 150 个人的状态间转换频率与次数的关系图。为此,使用 lapply 来为每个人生成转换表。但是,由于有些人只有很少的转换或根本没有转换,因此生成的表在 rows/columns 的数量上彼此不同。因此,当我尝试提取转换数以绘制它们时,出现 "incorrect number of dimensions" 错误。我看过 this 个帖子,但我没有成功将解决方案应用到我的案例中。
这是我当前的代码:
n_STATEs <- 4
data <- read.csv("transitiondata.csv")
transitions <- by(data,data$ID,
function(xx)data.frame(ID=head(xx$ID,-1),
TIME=tail(xx$TIME,-1),
FROM=head(xx$STATE,-1),
TO=tail(xx$STATE,-1)))
transition_table <- lapply(transitions,function(xx)with(xx,table(FROM,TO)))
min_n_transitions <- min(unlist(transition_table))
max_n_transitions <- max(unlist(transition_table))
max_freq <- 150
par(mfrow=rep(n_STATEs,2),mai=c(.4,.4,.4,.1))
for ( from in 1:n_STATEs ) {
for ( to in 1:n_STATEs ) {
sapply(transition_table,"[",from,to)
hist(foo,freq=TRUE,
xlim=c(min_n_transitions,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
}
}
Here is the dataset. I have also tried getting the transition numbers without using lapply (see the answer in my previous thread) 但这种方法还计算了一个人内最后一个时间点和第一个时间点之间的转换,这没有任何意义。
提前致谢!
编辑:代码已修复。
另一种方法是
n_STATEs <- 4
data <- read.csv("transitiondata.csv")
transitions <- by(data,data$ID,
function(xx)data.frame(ID=head(xx$ID,-1),
TIME=tail(xx$TIME,-1),
FROM=head(xx$STATE,-1),
TO=tail(xx$STATE,-1)))
transition_table <- lapply(transitions,function(xx)with(xx,table(FROM,TO)))
cols <- unique(unlist(sapply(transition_table, colnames)))
rows <- unique(unlist(sapply(transition_table, rownames)))
result <- lapply(transition_table, function(m) {
missingrows <- setdiff(rows, rownames(m))
missingcols <- setdiff(cols, colnames(m))
rbind(cbind(m,
structure(matrix(0, nrow=nrow(m), ncol=length(missingcols)),
dimnames=list(NULL, missingcols))),
structure(matrix(0, nrow=length(missingrows), ncol=length(cols)),
dimnames=list(missingrows)))
})
min_n_transitions <- min(unlist(result))
max_n_transitions <- max(unlist(result))
max_freq <- 150
par(mfrow=rep(n_STATEs,2),mai=c(.4,.4,.4,.1))
for ( from in 1:n_STATEs ) {
for ( to in 1:n_STATEs ) {
sapply(result,"[",from,to)
hist(foo,freq=TRUE,
xlim=c(min_n_transitions,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
}
}
还有其他可能吗?
我找到了替代解决方案。使用 this 代码,我还获得了一个 ID 中最后一个时间点和第一个时间点之间的转换计数。但是,使用 arrange
和 subset
我可以摆脱这种奇怪的行为。当前代码如下所示:
transitions <- data.frame(ID=head(data$ID,-1),
TIME=tail(data$TIME, -1),FROM=head(data$STATE,-1),
TO=tail(data$STATE,-1))
tran1<-arrange(transitions, ID, TIME)
tran2<-subset(tran1, TIME!=0)
transition_table <- with(tran2,table(FROM,TO,ID))
min_n_transitions <- min(unlist(transition_table))
max_n_transitions <- max(unlist(transition_table))
max_freq <- 150
bins <- c(-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5)
par(mfrow=rep(4,2),mai=c(.3,.3,.3,.1))
for ( from in 1:4 ) {
for ( to in 1:4 ) {
foo <- transition_table[from,to,]
hist(foo,freq=TRUE,
breaks=bins,
xlim=c(-.5,max_n_transitions),
ylim=c(0,max_freq),xlab="",ylab="",
main=paste("From",from,"to",to),las=1,col="lightgray")
mtext('Number of transitions', side = 1, outer = TRUE, line = 2)
mtext('Frequency', side = 2, outer = TRUE, line = 1.5)
}
}