用脏数据格式化 R 时间
R time formatting with dirty data
我正在使用 R 从数据库生成 CZML 文件。
数据库有脏数据。
我需要一种方法来确保时间的格式为“%H:%M:%S”。
数据可以已经在正确的 %H:%M:%S 中,也可以在小时前丢失零,例如 8:30:00,这是一个无效的 ISO 8601 并完全抛出 CZML 解析.
在 24 小时格式中需要始终像这样 08:30:00 或 07:09:00。
我有错误,因为它像这样 8:30:00 或 7:09:00 仍然是 24 小时格式,我还没有检查分钟或秒是否也不正确,但目前我假设他们是正确的,唯一的问题是时间。
例如,我有一个这样的 csv 文件:
"Date","Time","TZ","Jul.Time","BirdID","Species","Sex","Age","SiteID","Latitude","Longitude"
"4-Mar-13","08:30:00","America/Costa_Rica",2456356.187500,"test2","GREH","M","AHY","56scr25",8.71191178,-82.96866316
"4-Mar-13","8:30:00","America/Costa_Rica",2456356.187500,"test2","GREH","M","AHY","56scr25",8.71191178,-82.96866316
我需要像这样生成 CZML:
"point": {
"color": {
"rgba": [
"2013-03-04T08:30:00Z",225,50,50,196,"2013-03-04T08:30:01Z",50,50,225,196,"2013-03-04T13:30:00Z",225,50,50,196,"2013-03-04T13:30:01Z",50,50,225,196,"2013-03-04T16:00:00Z",225,50,50,196,"2013-03-04T16:00:01Z",50,50,225,196
]
},
"pixelSize": { "number": 10 }
}
我的代码是这样的:
j=1
numVisits=nrow(visitedTimes)
while(j<=numVisits){
date=as.Date(visitedTimes$Date[j], format="%d-%b-%y")
time=format(visitedTimes$Time[j], format="%H:%M:%S")
timeOfPassage=paste0(date,"T",time,"Z")
timeAfter=as.POSIXlt(timeOfPassage, format="%Y-%m-%dT%H:%M:%SZ")
timeAfter$sec=timeAfter$sec+1
timeAfter=format(timeAfter, format="%Y-%m-%dT%H:%M:%SZ")
cat(paste0("\"",timeOfPassage,"\","))
cat("225,50,50,196,")
cat(paste0("\"",timeAfter,"\","))
cat("50,50,225,196")
if(j<numVisits){
cat(",")
}
j=j+1
}
但是由于脏数据,它没有产生所需的输出..
有什么想法吗?
我们可以使用 times
从 chron
library(chron)
times(v1)
#[1] 08:30:00 08:30:00 07:09:00 07:09:00
或使用base R
format(strptime(v2, '%H:%M:%S'), '%H:%M:%S')
#[1] "08:30:00" "08:30:00" "07:09:00" "07:09:00" "07:09:05" "11:10:00"
使用 OP 的更新数据集
df1$Time <- times(df1$Time)
df1$Time
#[1] 08:30:00 08:30:00
或使用regex
sub('^(.:)', '0\1', df1$Time)
gsub('[^:]{2}(*SKIP)(*F)|(\d)', '0\1', v2, perl=TRUE)
#[1] "08:30:00" "08:30:00" "07:09:00" "07:09:00" "07:09:05" "11:10:00"
数据
v1 <- c('8:30:00', '08:30:00', '7:09:00', '7:9:00')
v2 <- c(v1, '7:9:5', '11:10:0')
df1 <- structure(list(Date = c("4-Mar-13", "4-Mar-13"), Time = c("08:30:00",
"8:30:00"), TZ = c("America/Costa_Rica", "America/Costa_Rica"
), Jul.Time = c(2456356.1875, 2456356.1875), BirdID = c("test2",
"test2"), Species = c("GREH", "GREH"), Sex = c("M", "M"), Age = c("AHY",
"AHY"), SiteID = c("56scr25", "56scr25"), Latitude = c(8.71191178,
8.71191178), Longitude = c(-82.96866316, -82.96866316)), .Names = c("Date",
"Time", "TZ", "Jul.Time", "BirdID", "Species", "Sex", "Age",
"SiteID", "Latitude", "Longitude"), class = "data.frame", row.names = c(NA,
-2L))
我正在使用 R 从数据库生成 CZML 文件。
数据库有脏数据。
我需要一种方法来确保时间的格式为“%H:%M:%S”。
数据可以已经在正确的 %H:%M:%S 中,也可以在小时前丢失零,例如 8:30:00,这是一个无效的 ISO 8601 并完全抛出 CZML 解析.
在 24 小时格式中需要始终像这样 08:30:00 或 07:09:00。
我有错误,因为它像这样 8:30:00 或 7:09:00 仍然是 24 小时格式,我还没有检查分钟或秒是否也不正确,但目前我假设他们是正确的,唯一的问题是时间。
例如,我有一个这样的 csv 文件:
"Date","Time","TZ","Jul.Time","BirdID","Species","Sex","Age","SiteID","Latitude","Longitude"
"4-Mar-13","08:30:00","America/Costa_Rica",2456356.187500,"test2","GREH","M","AHY","56scr25",8.71191178,-82.96866316
"4-Mar-13","8:30:00","America/Costa_Rica",2456356.187500,"test2","GREH","M","AHY","56scr25",8.71191178,-82.96866316
我需要像这样生成 CZML:
"point": {
"color": {
"rgba": [
"2013-03-04T08:30:00Z",225,50,50,196,"2013-03-04T08:30:01Z",50,50,225,196,"2013-03-04T13:30:00Z",225,50,50,196,"2013-03-04T13:30:01Z",50,50,225,196,"2013-03-04T16:00:00Z",225,50,50,196,"2013-03-04T16:00:01Z",50,50,225,196
]
},
"pixelSize": { "number": 10 }
}
我的代码是这样的:
j=1
numVisits=nrow(visitedTimes)
while(j<=numVisits){
date=as.Date(visitedTimes$Date[j], format="%d-%b-%y")
time=format(visitedTimes$Time[j], format="%H:%M:%S")
timeOfPassage=paste0(date,"T",time,"Z")
timeAfter=as.POSIXlt(timeOfPassage, format="%Y-%m-%dT%H:%M:%SZ")
timeAfter$sec=timeAfter$sec+1
timeAfter=format(timeAfter, format="%Y-%m-%dT%H:%M:%SZ")
cat(paste0("\"",timeOfPassage,"\","))
cat("225,50,50,196,")
cat(paste0("\"",timeAfter,"\","))
cat("50,50,225,196")
if(j<numVisits){
cat(",")
}
j=j+1
}
但是由于脏数据,它没有产生所需的输出.. 有什么想法吗?
我们可以使用 times
从 chron
library(chron)
times(v1)
#[1] 08:30:00 08:30:00 07:09:00 07:09:00
或使用base R
format(strptime(v2, '%H:%M:%S'), '%H:%M:%S')
#[1] "08:30:00" "08:30:00" "07:09:00" "07:09:00" "07:09:05" "11:10:00"
使用 OP 的更新数据集
df1$Time <- times(df1$Time)
df1$Time
#[1] 08:30:00 08:30:00
或使用regex
sub('^(.:)', '0\1', df1$Time)
gsub('[^:]{2}(*SKIP)(*F)|(\d)', '0\1', v2, perl=TRUE)
#[1] "08:30:00" "08:30:00" "07:09:00" "07:09:00" "07:09:05" "11:10:00"
数据
v1 <- c('8:30:00', '08:30:00', '7:09:00', '7:9:00')
v2 <- c(v1, '7:9:5', '11:10:0')
df1 <- structure(list(Date = c("4-Mar-13", "4-Mar-13"), Time = c("08:30:00",
"8:30:00"), TZ = c("America/Costa_Rica", "America/Costa_Rica"
), Jul.Time = c(2456356.1875, 2456356.1875), BirdID = c("test2",
"test2"), Species = c("GREH", "GREH"), Sex = c("M", "M"), Age = c("AHY",
"AHY"), SiteID = c("56scr25", "56scr25"), Latitude = c(8.71191178,
8.71191178), Longitude = c(-82.96866316, -82.96866316)), .Names = c("Date",
"Time", "TZ", "Jul.Time", "BirdID", "Species", "Sex", "Age",
"SiteID", "Latitude", "Longitude"), class = "data.frame", row.names = c(NA,
-2L))