当 DST 夏令时是重复时间戳的原因时,如何使用 R 从 Excel 中读出 CET/CEST 时间序列?
How to read out a CET/CEST time series from Excel with R when DST daylight saving time is cause of repeated timestamps?
我有以下 Excel 来自电力供应商的信息...
- 列:Excel 2.列中值之间的时间差
- 列:瑞士电力供应商提供的日期时间显然采用 CET/CEST 格式
- 列:Excel 格式的日期时间
- 列:第 3 列的结果日期时间
- 列及以下:值
0.010416667; 25.10.2015 00:00; 42302.00; 25.10.2015 00:00; 1353500.886
0.010416667; 25.10.2015 00:15; 42302.01; 25.10.2015 00:15; 1358036.992
0.010416667; 25.10.2015 00:30; 42302.02; 25.10.2015 00:30; 1336141.202
0.010416667; 25.10.2015 00:45; 42302.03; 25.10.2015 00:45; 1339588.722
0.010416667; 25.10.2015 01:00; 42302.04; 25.10.2015 01:00; 1309298.517
0.010416667; 25.10.2015 01:15; 42302.05; 25.10.2015 01:15; 1312979.406
0.010416667; 25.10.2015 01:30; 42302.06; 25.10.2015 01:30; 1263912.381
0.010416667; 25.10.2015 01:45; 42302.07; 25.10.2015 01:45; 1263811.518
0.010416667; 25.10.2015 02:00; 42302.08; 25.10.2015 02:00; 1232290.791
0.010416667; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1258868.764
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1237087.155
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1231009.94
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1234285.809
-0.03125; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1220952.562
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1184250.718
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1169521.089
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1149841.273
0.010416667; 25.10.2015 03:15; 42302.14; 25.10.2015 03:15; 1157376.444
0.010416667; 25.10.2015 03:30; 42302.15; 25.10.2015 03:30; 1159568.082
0.010416667; 25.10.2015 03:45; 42302.16; 25.10.2015 03:45; 1176631.296
0.010416667; 25.10.2015 04:00; 42302.17; 25.10.2015 04:00; 1170186.402
0.010416667; 25.10.2015 04:15; 42302.18; 25.10.2015 04:15; 1174296.3
0.010416667; 25.10.2015 04:30; 42302.19; 25.10.2015 04:30; 1170491.442
0.010416667; 25.10.2015 04:45; 42302.20; 25.10.2015 04:45; 1168962.826
0.010416667; 25.10.2015 05:00; 42302.21; 25.10.2015 05:00; 1155760.463
0.010416667; 25.10.2015 05:15; 42302.22; 25.10.2015 05:15; 1184186.085
0.010416667; 25.10.2015 05:30; 42302.23; 25.10.2015 05:30; 1188887.774
完整的 Excel 数据可以从 Swissgrid 主页下载:Swissgrid Data
夏令时时移时记录数据。
我用下面的代码读出了数据:
read_SwissgridExcel_v2015 <- function(path,sheet=3){
wb1 <- read_excel(path, sheet=sheet, col_names=TRUE, skip=1)
names(wb1)[1] <-"Col_1"
wb1$Col_1 <- as.POSIXct(round.POSIXt(wb1$Col_1),tz="Europe/Berlin",usetz=TRUE)
SwissgridDat <- wb1
attr(SwissgridDat,"description")<-names(SwissgridDat)
names(SwissgridDat) <- c("Col_1",
"Cons_Total_Enduse_Swiss_ControlBlock",
"Prod_Total_Swiss_ControlBlock",
"Cons_Total_Swiss_ControlBlock",
"Net_Outflow_Swiss_Transm_Grid",
"Grid_FeedIn_Swiss_Transm_Grid",
"Control_Energy_Pos_Sec",
"Control_Energy_Neg_Sec",
"Control_Energy_Pos_Ter",
"Control_Energy_Neg_Ter",
"Cross_Border_Exchange_CH_AT",
"Cross_Border_Exchange_AT_CH",
"Cross_Border_Exchange_CH_DE",
"Cross_Border_Exchange_DE_CH",
"Cross_Border_Exchange_CH_FR",
"Cross_Border_Exchange_FR_CH",
"Cross_Border_Exchange_CH_IT",
"Cross_Border_Exchange_IT_CH",
"Transit",
"Import",
"Export",
"Prices_Avg_Pos_Sec_Control_Energy",
"Prices_Avg_Neg_Sec_Control_Energy",
"Prices_Avg_Pos_Ter_Control_Energy",
"Prices_Avg_Neg_Ter_Control_Energy",
"Prod_Canton_AG",
"Cons_Canton_AG",
"Prod_Canton_FR",
"Cons_Canton_FR",
"Prod_Canton_GL",
"Cons_Canton_GL",
"Prod_Canton_GR",
"Cons_Canton_GR",
"Prod_Canton_LU",
"Cons_Canton_LU",
"Prod_Canton_NE",
"Cons_Canton_NE",
"Prod_Canton_SO",
"Cons_Canton_SO",
"Prod_Canton_SG",
"Cons_Canton_SG",
"Prod_Canton_TI",
"Cons_Canton_TI",
"Prod_Canton_TG",
"Cons_Canton_TG",
"Prod_Canton_VS",
"Cons_Canton_VS",
"Prod_Canton_AI_AR",
"Cons_Canton_AI_AR",
"Prod_Canton_BL_BS",
"Cons_Canton_BL_BS",
"Prod_Canton_BE_JU",
"Cons_Canton_BE_JU",
"Prod_Canton_SZ_ZG",
"Cons_Canton_SZ_ZG",
"Prod_Canton_OW_NW_UR",
"Cons_Canton_OW_NW_UR",
"Prod_Canton_GE_VD",
"Cons_Canton_GE_VD",
"Prod_Canton_SH_ZH",
"Cons_Canton_SH_ZH",
"Prod_Cantons",
"Cons_Cantons",
"Prod_Foreign_Territories_SwissControlZone",
"Cons_Foreign_Territories_SwissControlZone"
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdEndUse=pmin(Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock))
SwissgridDat <- SwissgridDat %>% transform(ImportEndUse= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0))
SwissgridDat <- SwissgridDat %>% transform(ImportNonEndUsed= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0) +
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(Export2= ifelse(Prod_Total_Swiss_ControlBlock >= Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdOwnUse=
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<=Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)+
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock>Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)
)
return(SwissgridDat)
}
DataPath <- getwd()
SwissgridDat <- read_SwissgridExcel_v2015(paste(DataPath, "/EnergieUebersichtCH_2015.xlsx", sep = ""))
打印时间戳如下
SwissgridDat$Col_1[28510:28525]
结果
[1] "2015-10-25 00:30:00 CEST" "2015-10-25 00:45:00 CEST" "2015-10-25 01:00:00 CEST" "2015-10-25 01:15:00 CEST" "2015-10-25 01:30:00 CEST" "2015-10-25 01:45:00 CEST"
[7] "2015-10-25 02:00:00 CET" "2015-10-25 02:15:00 CET" "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 02:15:00 CET"
[13] "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 03:15:00 CET"
问题:如您所见,轮班时间均以 CET 时间打印,尽管逻辑上说第一个小时 02:00 到 03:00 必须是 CEST,然后第二个小时从02:15 到 03:00(CET 格式)。
我是不是做错了什么或者这是一个错误?你会如何进行读数?
我的目标是无论如何都以 UTC 格式转换所有内容,但遗憾的是数据不是 UTC 格式。
非常感谢你在这方面的帮助。
我想到了一种更简单的方法,因为您只需要 UTC:
start_posix <- as.POSIXct("2014-01-01 00:00:00", tz="Europe/Zurich") #this is your first time stamp change as appropriate (e.g SwissgridDat$Col_1[1])
attr(start_posix, "tzone")<-"UTC"
len_out<-nrow(SwissgridDat$Col_1)
len_out<-1000 # remove this line in your case
seq.POSIXt(from=start_posix,by=15*60,length.out = len_out)
只要没有数据丢失,这会给你一个很好的常规 15 分钟 UTC 时间戳
编辑:
这里只是对 DLS 小时数的更正。如果需要,请随意概括为几年的数据...
测试数据:
25.10.2015 00:00;42302
25.10.2015 00:15;42302.01
25.10.2015 00:30;42302.02
25.10.2015 00:45;42302.03
25.10.2015 01:00;42302.04
25.10.2015 01:15;42302.05
25.10.2015 01:30;42302.06
25.10.2015 01:45;42302.07
25.10.2015 02:00;42302.08
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 03:15;42302.14
25.10.2015 03:30;42302.15
25.10.2015 03:45;42302.16
25.10.2015 04:00;42302.17
25.10.2015 04:15;42302.18
25.10.2015 04:30;42302.19
25.10.2015 04:45;42302.2
25.10.2015 05:00;42302.21
25.10.2015 05:15;42302.22
25.10.2015 05:30;42302.23
代码:
data <- read.delim("test.csv", sep=";", dec=".", header =F, col.names = c("time", "somedata"))
data$time <- as.POSIXct(data$time, "%d.%m.%Y %H:%M", tz="Europe/Zurich")
target <- which(duplicated(data$time))
len_out <- length(target)*2-1 #need to run over duplicated and originals
last_target <- max(target)
for (i in seq(0,len_out)){
diff_increment <- as.numeric(difftime(data$time[last_target-i+2],data$time[last_target-i+1], units="secs")) #get correct increment from a little further out
data$time[last_target-i]<-data$time[last_target-i+1]-diff_increment
}
此代码段将在读入后更正您数据中的夏令时。
我有以下 Excel 来自电力供应商的信息...
- 列:Excel 2.列中值之间的时间差
- 列:瑞士电力供应商提供的日期时间显然采用 CET/CEST 格式
- 列:Excel 格式的日期时间
- 列:第 3 列的结果日期时间
- 列及以下:值
0.010416667; 25.10.2015 00:00; 42302.00; 25.10.2015 00:00; 1353500.886
0.010416667; 25.10.2015 00:15; 42302.01; 25.10.2015 00:15; 1358036.992
0.010416667; 25.10.2015 00:30; 42302.02; 25.10.2015 00:30; 1336141.202
0.010416667; 25.10.2015 00:45; 42302.03; 25.10.2015 00:45; 1339588.722
0.010416667; 25.10.2015 01:00; 42302.04; 25.10.2015 01:00; 1309298.517
0.010416667; 25.10.2015 01:15; 42302.05; 25.10.2015 01:15; 1312979.406
0.010416667; 25.10.2015 01:30; 42302.06; 25.10.2015 01:30; 1263912.381
0.010416667; 25.10.2015 01:45; 42302.07; 25.10.2015 01:45; 1263811.518
0.010416667; 25.10.2015 02:00; 42302.08; 25.10.2015 02:00; 1232290.791
0.010416667; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1258868.764
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1237087.155
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1231009.94
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1234285.809
-0.03125; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1220952.562
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1184250.718
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1169521.089
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1149841.273
0.010416667; 25.10.2015 03:15; 42302.14; 25.10.2015 03:15; 1157376.444
0.010416667; 25.10.2015 03:30; 42302.15; 25.10.2015 03:30; 1159568.082
0.010416667; 25.10.2015 03:45; 42302.16; 25.10.2015 03:45; 1176631.296
0.010416667; 25.10.2015 04:00; 42302.17; 25.10.2015 04:00; 1170186.402
0.010416667; 25.10.2015 04:15; 42302.18; 25.10.2015 04:15; 1174296.3
0.010416667; 25.10.2015 04:30; 42302.19; 25.10.2015 04:30; 1170491.442
0.010416667; 25.10.2015 04:45; 42302.20; 25.10.2015 04:45; 1168962.826
0.010416667; 25.10.2015 05:00; 42302.21; 25.10.2015 05:00; 1155760.463
0.010416667; 25.10.2015 05:15; 42302.22; 25.10.2015 05:15; 1184186.085
0.010416667; 25.10.2015 05:30; 42302.23; 25.10.2015 05:30; 1188887.774
完整的 Excel 数据可以从 Swissgrid 主页下载:Swissgrid Data
夏令时时移时记录数据。 我用下面的代码读出了数据:
read_SwissgridExcel_v2015 <- function(path,sheet=3){
wb1 <- read_excel(path, sheet=sheet, col_names=TRUE, skip=1)
names(wb1)[1] <-"Col_1"
wb1$Col_1 <- as.POSIXct(round.POSIXt(wb1$Col_1),tz="Europe/Berlin",usetz=TRUE)
SwissgridDat <- wb1
attr(SwissgridDat,"description")<-names(SwissgridDat)
names(SwissgridDat) <- c("Col_1",
"Cons_Total_Enduse_Swiss_ControlBlock",
"Prod_Total_Swiss_ControlBlock",
"Cons_Total_Swiss_ControlBlock",
"Net_Outflow_Swiss_Transm_Grid",
"Grid_FeedIn_Swiss_Transm_Grid",
"Control_Energy_Pos_Sec",
"Control_Energy_Neg_Sec",
"Control_Energy_Pos_Ter",
"Control_Energy_Neg_Ter",
"Cross_Border_Exchange_CH_AT",
"Cross_Border_Exchange_AT_CH",
"Cross_Border_Exchange_CH_DE",
"Cross_Border_Exchange_DE_CH",
"Cross_Border_Exchange_CH_FR",
"Cross_Border_Exchange_FR_CH",
"Cross_Border_Exchange_CH_IT",
"Cross_Border_Exchange_IT_CH",
"Transit",
"Import",
"Export",
"Prices_Avg_Pos_Sec_Control_Energy",
"Prices_Avg_Neg_Sec_Control_Energy",
"Prices_Avg_Pos_Ter_Control_Energy",
"Prices_Avg_Neg_Ter_Control_Energy",
"Prod_Canton_AG",
"Cons_Canton_AG",
"Prod_Canton_FR",
"Cons_Canton_FR",
"Prod_Canton_GL",
"Cons_Canton_GL",
"Prod_Canton_GR",
"Cons_Canton_GR",
"Prod_Canton_LU",
"Cons_Canton_LU",
"Prod_Canton_NE",
"Cons_Canton_NE",
"Prod_Canton_SO",
"Cons_Canton_SO",
"Prod_Canton_SG",
"Cons_Canton_SG",
"Prod_Canton_TI",
"Cons_Canton_TI",
"Prod_Canton_TG",
"Cons_Canton_TG",
"Prod_Canton_VS",
"Cons_Canton_VS",
"Prod_Canton_AI_AR",
"Cons_Canton_AI_AR",
"Prod_Canton_BL_BS",
"Cons_Canton_BL_BS",
"Prod_Canton_BE_JU",
"Cons_Canton_BE_JU",
"Prod_Canton_SZ_ZG",
"Cons_Canton_SZ_ZG",
"Prod_Canton_OW_NW_UR",
"Cons_Canton_OW_NW_UR",
"Prod_Canton_GE_VD",
"Cons_Canton_GE_VD",
"Prod_Canton_SH_ZH",
"Cons_Canton_SH_ZH",
"Prod_Cantons",
"Cons_Cantons",
"Prod_Foreign_Territories_SwissControlZone",
"Cons_Foreign_Territories_SwissControlZone"
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdEndUse=pmin(Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock))
SwissgridDat <- SwissgridDat %>% transform(ImportEndUse= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0))
SwissgridDat <- SwissgridDat %>% transform(ImportNonEndUsed= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0) +
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(Export2= ifelse(Prod_Total_Swiss_ControlBlock >= Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdOwnUse=
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<=Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)+
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock>Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)
)
return(SwissgridDat)
}
DataPath <- getwd()
SwissgridDat <- read_SwissgridExcel_v2015(paste(DataPath, "/EnergieUebersichtCH_2015.xlsx", sep = ""))
打印时间戳如下
SwissgridDat$Col_1[28510:28525]
结果
[1] "2015-10-25 00:30:00 CEST" "2015-10-25 00:45:00 CEST" "2015-10-25 01:00:00 CEST" "2015-10-25 01:15:00 CEST" "2015-10-25 01:30:00 CEST" "2015-10-25 01:45:00 CEST"
[7] "2015-10-25 02:00:00 CET" "2015-10-25 02:15:00 CET" "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 02:15:00 CET"
[13] "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 03:15:00 CET"
问题:如您所见,轮班时间均以 CET 时间打印,尽管逻辑上说第一个小时 02:00 到 03:00 必须是 CEST,然后第二个小时从02:15 到 03:00(CET 格式)。
我是不是做错了什么或者这是一个错误?你会如何进行读数?
我的目标是无论如何都以 UTC 格式转换所有内容,但遗憾的是数据不是 UTC 格式。
非常感谢你在这方面的帮助。
我想到了一种更简单的方法,因为您只需要 UTC:
start_posix <- as.POSIXct("2014-01-01 00:00:00", tz="Europe/Zurich") #this is your first time stamp change as appropriate (e.g SwissgridDat$Col_1[1])
attr(start_posix, "tzone")<-"UTC"
len_out<-nrow(SwissgridDat$Col_1)
len_out<-1000 # remove this line in your case
seq.POSIXt(from=start_posix,by=15*60,length.out = len_out)
只要没有数据丢失,这会给你一个很好的常规 15 分钟 UTC 时间戳
编辑:
这里只是对 DLS 小时数的更正。如果需要,请随意概括为几年的数据...
测试数据:
25.10.2015 00:00;42302
25.10.2015 00:15;42302.01
25.10.2015 00:30;42302.02
25.10.2015 00:45;42302.03
25.10.2015 01:00;42302.04
25.10.2015 01:15;42302.05
25.10.2015 01:30;42302.06
25.10.2015 01:45;42302.07
25.10.2015 02:00;42302.08
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 03:15;42302.14
25.10.2015 03:30;42302.15
25.10.2015 03:45;42302.16
25.10.2015 04:00;42302.17
25.10.2015 04:15;42302.18
25.10.2015 04:30;42302.19
25.10.2015 04:45;42302.2
25.10.2015 05:00;42302.21
25.10.2015 05:15;42302.22
25.10.2015 05:30;42302.23
代码:
data <- read.delim("test.csv", sep=";", dec=".", header =F, col.names = c("time", "somedata"))
data$time <- as.POSIXct(data$time, "%d.%m.%Y %H:%M", tz="Europe/Zurich")
target <- which(duplicated(data$time))
len_out <- length(target)*2-1 #need to run over duplicated and originals
last_target <- max(target)
for (i in seq(0,len_out)){
diff_increment <- as.numeric(difftime(data$time[last_target-i+2],data$time[last_target-i+1], units="secs")) #get correct increment from a little further out
data$time[last_target-i]<-data$time[last_target-i+1]-diff_increment
}
此代码段将在读入后更正您数据中的夏令时。