插入缺失行

Interpolate Missing rows

我有一个时间序列数据集,其随机间隔大约为 3 到 4 秒。我希望以 1 秒的间隔插入缺失值。希望这个问题很容易解决!

       time      lon      lat   Ta Pres
1  14:12:19 12.47767 41.82520 27.4 1017
2  14:12:22 12.47776 41.82518 27.4 1017
3  14:12:25 12.47784 41.82517 27.4 1017
4  14:12:29 12.47792 41.82513 27.4 1017
5  14:12:32 12.47798 41.82507 27.4 1017
6  14:12:36 12.47805 41.82500 27.3 1016
7  14:12:39 12.47809 41.82494 27.1 1017
8  14:12:43 12.47806 41.82488 26.9 1017
9  14:12:46 12.47801 41.82486 27.1 1017
10 14:12:49 12.47795 41.82486 27.1 1017
11 14:12:53 12.47782 41.82484 27.2 1017
12 14:12:56 12.47772 41.82484 27.1 1017
13 14:13:00 12.47759 41.82481 27.1 1017
14 14:13:04 12.47740 41.82478 27.2 1017
15 14:13:07 12.47729 41.82477 27.1 1017
16 14:13:10 12.47718 41.82476 26.9 1017
17 14:13:14 12.47703 41.82478 26.6 1017
18 14:13:17 12.47690 41.82478 26.5 1017
19 14:13:21 12.47674 41.82477 26.4 1017
20 14:13:24 12.47660 41.82475 26.7 1017

其核心是approxfun函数,它进行分段线性插值。还有其他选项,例如 splinefun.

dat <- read.table(text = "row time      lon      lat   Ta Pres
           1  14:12:19 12.47767 41.82520 27.4 1017
           2  14:12:22 12.47776 41.82518 27.4 1017
           3  14:12:25 12.47784 41.82517 27.4 1017
           4  14:12:29 12.47792 41.82513 27.4 1017
           5  14:12:32 12.47798 41.82507 27.4 1017
           6  14:12:36 12.47805 41.82500 27.3 1016
           7  14:12:39 12.47809 41.82494 27.1 1017
           8  14:12:43 12.47806 41.82488 26.9 1017
           9  14:12:46 12.47801 41.82486 27.1 1017
           10 14:12:49 12.47795 41.82486 27.1 1017
           11 14:12:53 12.47782 41.82484 27.2 1017
           12 14:12:56 12.47772 41.82484 27.1 1017
           13 14:13:00 12.47759 41.82481 27.1 1017
           14 14:13:04 12.47740 41.82478 27.2 1017
           15 14:13:07 12.47729 41.82477 27.1 1017
           16 14:13:10 12.47718 41.82476 26.9 1017
           17 14:13:14 12.47703 41.82478 26.6 1017
           18 14:13:17 12.47690 41.82478 26.5 1017
           19 14:13:21 12.47674 41.82477 26.4 1017
           20 14:13:24 12.47660 41.82475 26.7 1017", 
           header = TRUE)
dat$row <- NULL
dat$time <- as.POSIXct(strptime(dat$time, '%H:%M:%S'))
newdat <- data.frame(time = as.POSIXct(
  setdiff(seq.POSIXt(min(dat$time), max(dat$time), by = 1), 
          dat$time), 
  origin = '1970-01-01'),
  lon = NA_real_, lat = NA_real_, Ta = NA_real_, Pres = NA_integer_)
newdat$lon <- approxfun(dat$time, dat$lon)(newdat$time)
newdat$lat <- approxfun(dat$time, dat$lat)(newdat$time)
newdat$Ta <- approxfun(dat$time, dat$Ta)(newdat$time)
newdat$Pres <- approxfun(dat$time, dat$Pres)(newdat$time)

dat <- rbind(dat, newdat)
dat <- dat[order(dat$time),]
dat$time <- sprintf('%s', dat$time)
dat$time <- substr(dat$time, nchar(dat$time) - 7, nchar(dat$time))
head(dat, 7)
      time      lon      lat   Ta Pres
1  14:12:19 12.47767 41.82520 27.4 1017
21 14:12:20 12.47770 41.82519 27.4 1017
22 14:12:21 12.47773 41.82519 27.4 1017
2  14:12:22 12.47776 41.82518 27.4 1017
23 14:12:23 12.47779 41.82518 27.4 1017
24 14:12:24 12.47781 41.82517 27.4 1017
3  14:12:25 12.47784 41.82517 27.4 1017