插入缺失行
Interpolate Missing rows
我有一个时间序列数据集,其随机间隔大约为 3 到 4 秒。我希望以 1 秒的间隔插入缺失值。希望这个问题很容易解决!
time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017
4 14:12:29 12.47792 41.82513 27.4 1017
5 14:12:32 12.47798 41.82507 27.4 1017
6 14:12:36 12.47805 41.82500 27.3 1016
7 14:12:39 12.47809 41.82494 27.1 1017
8 14:12:43 12.47806 41.82488 26.9 1017
9 14:12:46 12.47801 41.82486 27.1 1017
10 14:12:49 12.47795 41.82486 27.1 1017
11 14:12:53 12.47782 41.82484 27.2 1017
12 14:12:56 12.47772 41.82484 27.1 1017
13 14:13:00 12.47759 41.82481 27.1 1017
14 14:13:04 12.47740 41.82478 27.2 1017
15 14:13:07 12.47729 41.82477 27.1 1017
16 14:13:10 12.47718 41.82476 26.9 1017
17 14:13:14 12.47703 41.82478 26.6 1017
18 14:13:17 12.47690 41.82478 26.5 1017
19 14:13:21 12.47674 41.82477 26.4 1017
20 14:13:24 12.47660 41.82475 26.7 1017
其核心是approxfun
函数,它进行分段线性插值。还有其他选项,例如 splinefun
.
dat <- read.table(text = "row time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017
4 14:12:29 12.47792 41.82513 27.4 1017
5 14:12:32 12.47798 41.82507 27.4 1017
6 14:12:36 12.47805 41.82500 27.3 1016
7 14:12:39 12.47809 41.82494 27.1 1017
8 14:12:43 12.47806 41.82488 26.9 1017
9 14:12:46 12.47801 41.82486 27.1 1017
10 14:12:49 12.47795 41.82486 27.1 1017
11 14:12:53 12.47782 41.82484 27.2 1017
12 14:12:56 12.47772 41.82484 27.1 1017
13 14:13:00 12.47759 41.82481 27.1 1017
14 14:13:04 12.47740 41.82478 27.2 1017
15 14:13:07 12.47729 41.82477 27.1 1017
16 14:13:10 12.47718 41.82476 26.9 1017
17 14:13:14 12.47703 41.82478 26.6 1017
18 14:13:17 12.47690 41.82478 26.5 1017
19 14:13:21 12.47674 41.82477 26.4 1017
20 14:13:24 12.47660 41.82475 26.7 1017",
header = TRUE)
dat$row <- NULL
dat$time <- as.POSIXct(strptime(dat$time, '%H:%M:%S'))
newdat <- data.frame(time = as.POSIXct(
setdiff(seq.POSIXt(min(dat$time), max(dat$time), by = 1),
dat$time),
origin = '1970-01-01'),
lon = NA_real_, lat = NA_real_, Ta = NA_real_, Pres = NA_integer_)
newdat$lon <- approxfun(dat$time, dat$lon)(newdat$time)
newdat$lat <- approxfun(dat$time, dat$lat)(newdat$time)
newdat$Ta <- approxfun(dat$time, dat$Ta)(newdat$time)
newdat$Pres <- approxfun(dat$time, dat$Pres)(newdat$time)
dat <- rbind(dat, newdat)
dat <- dat[order(dat$time),]
dat$time <- sprintf('%s', dat$time)
dat$time <- substr(dat$time, nchar(dat$time) - 7, nchar(dat$time))
head(dat, 7)
time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
21 14:12:20 12.47770 41.82519 27.4 1017
22 14:12:21 12.47773 41.82519 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
23 14:12:23 12.47779 41.82518 27.4 1017
24 14:12:24 12.47781 41.82517 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017
我有一个时间序列数据集,其随机间隔大约为 3 到 4 秒。我希望以 1 秒的间隔插入缺失值。希望这个问题很容易解决!
time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017
4 14:12:29 12.47792 41.82513 27.4 1017
5 14:12:32 12.47798 41.82507 27.4 1017
6 14:12:36 12.47805 41.82500 27.3 1016
7 14:12:39 12.47809 41.82494 27.1 1017
8 14:12:43 12.47806 41.82488 26.9 1017
9 14:12:46 12.47801 41.82486 27.1 1017
10 14:12:49 12.47795 41.82486 27.1 1017
11 14:12:53 12.47782 41.82484 27.2 1017
12 14:12:56 12.47772 41.82484 27.1 1017
13 14:13:00 12.47759 41.82481 27.1 1017
14 14:13:04 12.47740 41.82478 27.2 1017
15 14:13:07 12.47729 41.82477 27.1 1017
16 14:13:10 12.47718 41.82476 26.9 1017
17 14:13:14 12.47703 41.82478 26.6 1017
18 14:13:17 12.47690 41.82478 26.5 1017
19 14:13:21 12.47674 41.82477 26.4 1017
20 14:13:24 12.47660 41.82475 26.7 1017
其核心是approxfun
函数,它进行分段线性插值。还有其他选项,例如 splinefun
.
dat <- read.table(text = "row time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017
4 14:12:29 12.47792 41.82513 27.4 1017
5 14:12:32 12.47798 41.82507 27.4 1017
6 14:12:36 12.47805 41.82500 27.3 1016
7 14:12:39 12.47809 41.82494 27.1 1017
8 14:12:43 12.47806 41.82488 26.9 1017
9 14:12:46 12.47801 41.82486 27.1 1017
10 14:12:49 12.47795 41.82486 27.1 1017
11 14:12:53 12.47782 41.82484 27.2 1017
12 14:12:56 12.47772 41.82484 27.1 1017
13 14:13:00 12.47759 41.82481 27.1 1017
14 14:13:04 12.47740 41.82478 27.2 1017
15 14:13:07 12.47729 41.82477 27.1 1017
16 14:13:10 12.47718 41.82476 26.9 1017
17 14:13:14 12.47703 41.82478 26.6 1017
18 14:13:17 12.47690 41.82478 26.5 1017
19 14:13:21 12.47674 41.82477 26.4 1017
20 14:13:24 12.47660 41.82475 26.7 1017",
header = TRUE)
dat$row <- NULL
dat$time <- as.POSIXct(strptime(dat$time, '%H:%M:%S'))
newdat <- data.frame(time = as.POSIXct(
setdiff(seq.POSIXt(min(dat$time), max(dat$time), by = 1),
dat$time),
origin = '1970-01-01'),
lon = NA_real_, lat = NA_real_, Ta = NA_real_, Pres = NA_integer_)
newdat$lon <- approxfun(dat$time, dat$lon)(newdat$time)
newdat$lat <- approxfun(dat$time, dat$lat)(newdat$time)
newdat$Ta <- approxfun(dat$time, dat$Ta)(newdat$time)
newdat$Pres <- approxfun(dat$time, dat$Pres)(newdat$time)
dat <- rbind(dat, newdat)
dat <- dat[order(dat$time),]
dat$time <- sprintf('%s', dat$time)
dat$time <- substr(dat$time, nchar(dat$time) - 7, nchar(dat$time))
head(dat, 7)
time lon lat Ta Pres
1 14:12:19 12.47767 41.82520 27.4 1017
21 14:12:20 12.47770 41.82519 27.4 1017
22 14:12:21 12.47773 41.82519 27.4 1017
2 14:12:22 12.47776 41.82518 27.4 1017
23 14:12:23 12.47779 41.82518 27.4 1017
24 14:12:24 12.47781 41.82517 27.4 1017
3 14:12:25 12.47784 41.82517 27.4 1017