在新数据框 R 中扩展时间序列

Expand time series in new dataframe R

我是 R 新手,我收集了具有以下结构的眼动数据:

Participant Trial Condition Fixation.Start  Fixation.End Fixated.Area
P01         T01   Early     4               206          Outside
P01         T01   Early     258             476          Competitor
P01         T01   Early     496             882          Target
P01         T02   Late      4               794          Outside
P01         T02   Late      838             1026         Target
P01         T02   Late      1046            1328         Target
P02         T01   Early     4               168          Outside
P02         T01   Early     232             452          Competitor
P02         T01   Early     494             738          Target
P02         T02   Late      4               176          Outside
P02         T02   Late      238             466          Target
P02         T02   Late      524             632          Competitor

其中,屏幕上显示的不同区域的注视时间以毫秒为单位,从开始(注视开始)到结束(注视结束)。每一行都是一个注视点。

我想做的是将数据重塑为新数据帧中 50 毫秒的时间仓,以便每个时间仓(行)反映当时正在注视的区域。换句话说,我希望新数据框看起来像这样:

Participant Trial   Condition   Time.Bin    Fixated.Area
P01         T01     Early       50          Outside
P01         T01     Early       100         Outside
P01         T01     Early       150         Outside
P01         T01     Early       200         Outside
P01         T01     Early       250         Competitor
P01         T01     Early       300         Competitor
P01         T01     Early       350         Competitor
P01         T01     Early       400         Competitor
P01         T01     Early       450         Competitor
P01         T01     Early       500         Target
P01         T01     Early       550         Target
P01         T01     Early       600         Target
P01         T01     Early       650         Target  

我认为这在 R 中应该很容易做到。有什么想法吗?

这是一种将每个时间范围扩展到 by=50 个时间段的技术。

基础 R

Time.Bins <- Map(
  function(a, b) seq(a, b, by = 50),
  ceiling(dat$Fixation.Start / 50) * 50,
  dat$Fixation.End)

out <- cbind(
  dat[, c("Participant", "Trial", "Condition", "Fixated.Area")
      ][ rep(seq_len(nrow(dat)), lengths(Time.Bins)),],
  Time.Bin = unlist(Time.Bins)
)
head(out, 20)
#     Participant Trial Condition Fixated.Area Time.Bin
# 1           P01   T01     Early      Outside       50
# 1.1         P01   T01     Early      Outside      100
# 1.2         P01   T01     Early      Outside      150
# 1.3         P01   T01     Early      Outside      200
# 2           P01   T01     Early   Competitor      300
# 2.1         P01   T01     Early   Competitor      350
# 2.2         P01   T01     Early   Competitor      400
# 2.3         P01   T01     Early   Competitor      450
# 3           P01   T01     Early       Target      500
# 3.1         P01   T01     Early       Target      550
# 3.2         P01   T01     Early       Target      600
# 3.3         P01   T01     Early       Target      650
# 3.4         P01   T01     Early       Target      700
# 3.5         P01   T01     Early       Target      750
# 3.6         P01   T01     Early       Target      800
# 3.7         P01   T01     Early       Target      850
# 4           P01   T02      Late      Outside       50
# 4.1         P01   T02      Late      Outside      100
# 4.2         P01   T02      Late      Outside      150
# 4.3         P01   T02      Late      Outside      200

dplyr

library(dplyr)
out <- dat %>%
  rowwise() %>%
  summarize(
    Participant, Trial, Condition, Fixated.Area,
    Time.Bin = seq(ceiling(Fixation.Start / 50) * 50, Fixation.End, by = 50),
    .groups = "drop"
  )
out
# # A tibble: 64 x 5
#    Participant Trial Condition Fixated.Area Time.Bin
#    <chr>       <chr> <chr>     <chr>           <dbl>
#  1 P01         T01   Early     Outside            50
#  2 P01         T01   Early     Outside           100
#  3 P01         T01   Early     Outside           150
#  4 P01         T01   Early     Outside           200
#  5 P01         T01   Early     Competitor        300
#  6 P01         T01   Early     Competitor        350
#  7 P01         T01   Early     Competitor        400
#  8 P01         T01   Early     Competitor        450
#  9 P01         T01   Early     Target            500
# 10 P01         T01   Early     Target            550
# # ... with 54 more rows

固定时间=250

您的预期输出在时间 = 250 时显示“竞争对手”,但数据不支持这一点。如果你需要250(有或没有面积),那么你可以这样插值。

expbins <- do.call(rbind, by(out, out[,c("Participant", "Trial", "Condition")],
   FUN = function(z) {
     rng <- seq(min(z$Time.Bin), max(z$Time.Bin), by = 50)
     transform(z[rep(1, length(rng)),], Fixated.Area = NULL, Time.Bin = rng)
   }))
out2 <- merge(expbins, out, by = c("Participant", "Trial", "Condition", "Time.Bin"), all = TRUE)
head(out2, 10)
#    Participant Trial Condition Time.Bin Fixated.Area
# 1          P01   T01     Early       50      Outside
# 2          P01   T01     Early      100      Outside
# 3          P01   T01     Early      150      Outside
# 4          P01   T01     Early      200      Outside
# 5          P01   T01     Early      250         <NA>
# 6          P01   T01     Early      300   Competitor
# 7          P01   T01     Early      350   Competitor
# 8          P01   T01     Early      400   Competitor
# 9          P01   T01     Early      450   Competitor
# 10         P01   T01     Early      500       Target

将时间=250 呈现为 NA,未知状态(在我看来更好)。

Dplyr,相同:

out %>%
  group_by(Participant, Trial, Condition) %>%
  summarize(
    Time.Bin = seq(min(Time.Bin), max(Time.Bin), by = 50),
    .groups = "drop"
  ) %>%
  full_join(out, by = c("Participant", "Trial", "Condition", "Time.Bin"))
# # A tibble: 69 x 5
#    Participant Trial Condition Time.Bin Fixated.Area
#    <chr>       <chr> <chr>        <dbl> <chr>       
#  1 P01         T01   Early           50 Outside     
#  2 P01         T01   Early          100 Outside     
#  3 P01         T01   Early          150 Outside     
#  4 P01         T01   Early          200 Outside     
#  5 P01         T01   Early          250 <NA>        
#  6 P01         T01   Early          300 Competitor  
#  7 P01         T01   Early          350 Competitor  
#  8 P01         T01   Early          400 Competitor  
#  9 P01         T01   Early          450 Competitor  
# 10 P01         T01   Early          500 Target      
# # ... with 59 more rows

数据:

dat <- structure(list(Participant = c("P01", "P01", "P01", "P01", "P01", "P01", "P02", "P02", "P02", "P02", "P02", "P02"), Trial = c("T01", "T01", "T01", "T02", "T02", "T02", "T01", "T01", "T01", "T02", "T02", "T02"), Condition = c("Early", "Early", "Early", "Late", "Late", "Late", "Early", "Early", "Early", "Late", "Late", "Late"), Fixation.Start = c(4L, 258L, 496L, 4L, 838L, 1046L, 4L, 232L, 494L, 4L, 238L, 524L), Fixation.End = c(206L, 476L, 882L, 794L, 1026L, 1328L, 168L, 452L, 738L, 176L, 466L, 632L), Fixated.Area = c("Outside", "Competitor", "Target", "Outside", "Target", "Target", "Outside", "Competitor", "Target", "Outside", "Target", "Competitor")), class = "data.frame", row.names = c(NA, -12L))