通过 ID 和时间间隔合并两个数据帧
Merge two data frames over ID and time interval
我正在处理以下问题:
我有 2 个数据框,a 和 b:
#dataframe a
ID1 <- c("1", "1", "1", "1", "2", "2", "2")
time<- c("2022-04-12 08:52", "2022-04-12 15:34", "2022-04-12 16:45", "2022-04-12 22:23", "2022-04-12 02:15", "2022-04-12 05:24", "2022-04-12 14:55")
a <- data.frame(ID1, time)
a$time <- as.POSIXct(a$time, origin="1970-01-01")
a
ID1 time
1 2022-04-12 08:52:00
1 2022-04-12 15:34:00
1 2022-04-12 16:45:00
1 2022-04-12 22:23:00
2 2022-04-12 02:15:00
2 2022-04-12 05:24:00
2 2022-04-12 14:55:00
#dataframe b
ID2 <- c("1", "1", "1", "1", "2", "2", "2", "2")
start <- c("2022-04-12 00:00", "2022-04-12 08:00", "2022-04-12 17:00", "2022-04-12 18:00", "2022-04-12 00:00", "2022-04-12 15:00", "2022-04-12 16:00", "2022-04-12 19:00")
end <- c("2022-04-12 08:00", "2022-04-12 17:00", "2022-04-12 18:00", "2022-04-12 00:00", "2022-04-12 15:00 ", "2022-04-12 16:00", "2022-04-12 19:00", "2022-04-12 00:00")
activity <- c("At home", "Work", "Travel", "Home", "Home", "Travel", "Work", "Home")
b <- data.frame(ID2, start, end, activity)
b$start <- as.POSIXct(b$start, origin="1970-01-01")
b$end <- as.POSIXct(b$end, origin="1970-01-01")
b
ID2 start end activity
1 2022-04-12 00:00:00 2022-04-12 08:00:00 At home
1 2022-04-12 08:00:00 2022-04-12 17:00:00 Work
1 2022-04-12 17:00:00 2022-04-12 18:00:00 Travel
1 2022-04-12 18:00:00 2022-04-12 00:00:00 Home
2 2022-04-12 00:00:00 2022-04-12 15:00:00 Home
2 2022-04-12 15:00:00 2022-04-12 16:00:00 Travel
2 2022-04-12 16:00:00 2022-04-12 19:00:00 Work
2 2022-04-12 19:00:00 2022-04-12 00:00:00 Home
我想合并 ID 和时间点(当 a 恰好出现在 b 中(在开始和结束之间)。
我想最终得到一个如下所示的数据框:
ID3 <- c("1", "1", "1", "1", "2", "2", "2")
time3<- c("2022-04-12 08:52", "2022-04-12 15:34", "2022-04-12 16:45", "2022-04-12 22:23", "2022-04-12 02:15", "2022-04-12 05:24", "2022-04-12 14:55")
activity3 <- c( "Work", "Work", "Work", "Home", "Home", "Home", "Home")
result <- data.frame(ID3, time3, activity3)
result
ID3 time3 activity3
1 2022-04-12 08:52 Work
1 2022-04-12 15:34 Work
1 2022-04-12 16:45 Work
1 2022-04-12 22:23 Home
2 2022-04-12 02:15 Home
2 2022-04-12 05:24 Home
2 2022-04-12 14:55 Home
非常感谢有关此主题的任何帮助。
首先,您需要更改数据框 b
的 end
列,第 4 行和第 8 行。由于您假设新的一天开始了,所以日期不是 2022-04-12 00:00:00
, 但 2022-04-13 00:00:00
.
如果您愿意,可以将第 4 行和第 8 行第 end
列从 2022-04-12 00:00:00
更改为 2022-04-12 23:59:59
。两个选项都可以。
修复时间差异后,请遵循以下代码:
ID3 <- c();
time3 <- c();
activt <- c();
values <- c();
for (ide_number in unique(a$ID1)){ # let's start subseting by ID number
c <- subset(a,a$ID1==ide_number)
d <- subset(b,b$ID2==ide_number)
for (i in 1:nrow(c)){ # then, find the row that matches time condition
condition <- which(d$start<=c$time[i] & d$end>c$time[i]) # find rows
values <- append(values,condition) # rows in a vector
activt <- append(activt,d$activity[condition]) # activity vector
time3 <- append(time3,as.character(c$time[i])) # time vector
ID3 <- append(ID3,ide_number) # ID vector
}
}
out <- as.data.frame(cbind(ID3,time3,activt)) # just build your output
输出:
> out
ID3 time3 activt
1 1 2022-04-12 08:52:00 Work
2 1 2022-04-12 15:34:00 Work
3 1 2022-04-12 16:45:00 Work
4 1 2022-04-12 22:23:00 Home
5 2 2022-04-12 02:15:00 Home
6 2 2022-04-12 05:24:00 Home
7 2 2022-04-12 14:55:00 Home
>
我正在处理以下问题: 我有 2 个数据框,a 和 b:
#dataframe a
ID1 <- c("1", "1", "1", "1", "2", "2", "2")
time<- c("2022-04-12 08:52", "2022-04-12 15:34", "2022-04-12 16:45", "2022-04-12 22:23", "2022-04-12 02:15", "2022-04-12 05:24", "2022-04-12 14:55")
a <- data.frame(ID1, time)
a$time <- as.POSIXct(a$time, origin="1970-01-01")
a
ID1 time
1 2022-04-12 08:52:00
1 2022-04-12 15:34:00
1 2022-04-12 16:45:00
1 2022-04-12 22:23:00
2 2022-04-12 02:15:00
2 2022-04-12 05:24:00
2 2022-04-12 14:55:00
#dataframe b
ID2 <- c("1", "1", "1", "1", "2", "2", "2", "2")
start <- c("2022-04-12 00:00", "2022-04-12 08:00", "2022-04-12 17:00", "2022-04-12 18:00", "2022-04-12 00:00", "2022-04-12 15:00", "2022-04-12 16:00", "2022-04-12 19:00")
end <- c("2022-04-12 08:00", "2022-04-12 17:00", "2022-04-12 18:00", "2022-04-12 00:00", "2022-04-12 15:00 ", "2022-04-12 16:00", "2022-04-12 19:00", "2022-04-12 00:00")
activity <- c("At home", "Work", "Travel", "Home", "Home", "Travel", "Work", "Home")
b <- data.frame(ID2, start, end, activity)
b$start <- as.POSIXct(b$start, origin="1970-01-01")
b$end <- as.POSIXct(b$end, origin="1970-01-01")
b
ID2 start end activity
1 2022-04-12 00:00:00 2022-04-12 08:00:00 At home
1 2022-04-12 08:00:00 2022-04-12 17:00:00 Work
1 2022-04-12 17:00:00 2022-04-12 18:00:00 Travel
1 2022-04-12 18:00:00 2022-04-12 00:00:00 Home
2 2022-04-12 00:00:00 2022-04-12 15:00:00 Home
2 2022-04-12 15:00:00 2022-04-12 16:00:00 Travel
2 2022-04-12 16:00:00 2022-04-12 19:00:00 Work
2 2022-04-12 19:00:00 2022-04-12 00:00:00 Home
我想合并 ID 和时间点(当 a 恰好出现在 b 中(在开始和结束之间)。 我想最终得到一个如下所示的数据框:
ID3 <- c("1", "1", "1", "1", "2", "2", "2")
time3<- c("2022-04-12 08:52", "2022-04-12 15:34", "2022-04-12 16:45", "2022-04-12 22:23", "2022-04-12 02:15", "2022-04-12 05:24", "2022-04-12 14:55")
activity3 <- c( "Work", "Work", "Work", "Home", "Home", "Home", "Home")
result <- data.frame(ID3, time3, activity3)
result
ID3 time3 activity3
1 2022-04-12 08:52 Work
1 2022-04-12 15:34 Work
1 2022-04-12 16:45 Work
1 2022-04-12 22:23 Home
2 2022-04-12 02:15 Home
2 2022-04-12 05:24 Home
2 2022-04-12 14:55 Home
非常感谢有关此主题的任何帮助。
首先,您需要更改数据框 b
的 end
列,第 4 行和第 8 行。由于您假设新的一天开始了,所以日期不是 2022-04-12 00:00:00
, 但 2022-04-13 00:00:00
.
如果您愿意,可以将第 4 行和第 8 行第 end
列从 2022-04-12 00:00:00
更改为 2022-04-12 23:59:59
。两个选项都可以。
修复时间差异后,请遵循以下代码:
ID3 <- c();
time3 <- c();
activt <- c();
values <- c();
for (ide_number in unique(a$ID1)){ # let's start subseting by ID number
c <- subset(a,a$ID1==ide_number)
d <- subset(b,b$ID2==ide_number)
for (i in 1:nrow(c)){ # then, find the row that matches time condition
condition <- which(d$start<=c$time[i] & d$end>c$time[i]) # find rows
values <- append(values,condition) # rows in a vector
activt <- append(activt,d$activity[condition]) # activity vector
time3 <- append(time3,as.character(c$time[i])) # time vector
ID3 <- append(ID3,ide_number) # ID vector
}
}
out <- as.data.frame(cbind(ID3,time3,activt)) # just build your output
输出:
> out
ID3 time3 activt
1 1 2022-04-12 08:52:00 Work
2 1 2022-04-12 15:34:00 Work
3 1 2022-04-12 16:45:00 Work
4 1 2022-04-12 22:23:00 Home
5 2 2022-04-12 02:15:00 Home
6 2 2022-04-12 05:24:00 Home
7 2 2022-04-12 14:55:00 Home
>