从 UTC 时间戳中提取日期、小时和工作日 -R
To extract date, hour and weekday from a UTC timestamp -R
我有一个数据库,其中包含 UTC 中创建时间的列,例如
created_utc
1 1430438400
2 1430438410
3 1430438430
4 1430438455
5 1430438470
6 1430438480
我想将日期、小时和 is.weekend 提取到单独的列中。我试过了,
db_subset %>% mutate(hour = as.POSIXlt(created_utc, origin ='1970-01-01')$hour)
但无法识别 created_utc
对象。
我尝试将其强制转换为数据框,然后
df_comments <- db_subset %>%
select(created_utc) %>%
collect() %>%
data.frame() %>%
mutate(hour = as.POSIXlt(created_utc, origin ='1970-01-01')$hour)
但失败并出现错误:invalid subscript type 'closure'
有人可以帮助我去哪里吗?我如何提取时间、日期等?
我首先建议将您的 created_utc
转换为 POSIXct
class(而不是 POSIXlt
,然后提取您需要的所有数据。这是一个简单的说明使用 data.table
包
library(data.table)
setDT(df)[, created_utc := as.POSIXct(created_utc, origin = '1970-01-01')]
df[, `:=`(Date = as.Date(created_utc),
Hour = hour(created_utc),
isWeekend = wday(created_utc) %in% c(7L, 1L))]
df
# created_utc Date Hour isWeekend
# 1: 2015-05-01 03:00:00 2015-05-01 3 FALSE
# 2: 2015-05-01 03:00:10 2015-05-01 3 FALSE
# 3: 2015-05-01 03:00:30 2015-05-01 3 FALSE
# 4: 2015-05-01 03:00:55 2015-05-01 3 FALSE
# 5: 2015-05-01 03:01:10 2015-05-01 3 FALSE
# 6: 2015-05-01 03:01:20 2015-05-01 3 FALSE
如果我们使用 dplyr
,一个选项是转换为 POSIXct
(因为不支持 POSIXlt
class)并提取 hour
使用 lubridate
.
library(lubridate)
library(dplyr)
db_subset %>%
mutate(hour=hour(as.POSIXct(created_utc, origin='1970-01-01')))
# created_utc hour
#1 1430438400 20
#2 1430438410 20
#3 1430438430 20
#4 1430438455 20
#5 1430438470 20
#6 1430438480 20
数据
db_subset <- structure(list(created_utc = c(1430438400L, 1430438410L,
1430438430L,
1430438455L, 1430438470L, 1430438480L)), .Names = "created_utc",
class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6"))
所有这些都可以用 base R 来完成:
R> df <- data.frame(created_utc=c(1430438400, 1430438410, 1430438430,
+ 1430438455, 1430438470, 1430438480))
R> df
created_utc
1 1430438400
2 1430438410
3 1430438430
4 1430438455
5 1430438470
6 1430438480
R>
R> # so far so good -- we just have the data
R> # so let's make it a date time object
R>
R> df[,1] <- as.POSIXct(df[,1], origin="1970-01-01")
R> df
created_utc
1 2015-04-30 19:00:00
2 2015-04-30 19:00:10
3 2015-04-30 19:00:30
4 2015-04-30 19:00:55
5 2015-04-30 19:01:10
6 2015-04-30 19:01:20
R>
R> ## we can use this to extract Date, Hour and Weekend computations
R>
R> df[,"date"] <- as.Date(df[,1])
R> df[,"hour"] <- as.POSIXlt(df[,1])$hour
R> df[,"isWeekend"] <- as.POSIXlt(df[,1])$wday < 1 || as.POSIXlt(df[,1])$wday > 5
R> df
created_utc date hour isWeekend
1 2015-04-30 19:00:00 2015-05-01 19 FALSE
2 2015-04-30 19:00:10 2015-05-01 19 FALSE
3 2015-04-30 19:00:30 2015-05-01 19 FALSE
4 2015-04-30 19:00:55 2015-05-01 19 FALSE
5 2015-04-30 19:01:10 2015-05-01 19 FALSE
6 2015-04-30 19:01:20 2015-05-01 19 FALSE
R>
我有一个数据库,其中包含 UTC 中创建时间的列,例如
created_utc
1 1430438400
2 1430438410
3 1430438430
4 1430438455
5 1430438470
6 1430438480
我想将日期、小时和 is.weekend 提取到单独的列中。我试过了,
db_subset %>% mutate(hour = as.POSIXlt(created_utc, origin ='1970-01-01')$hour)
但无法识别 created_utc
对象。
我尝试将其强制转换为数据框,然后
df_comments <- db_subset %>%
select(created_utc) %>%
collect() %>%
data.frame() %>%
mutate(hour = as.POSIXlt(created_utc, origin ='1970-01-01')$hour)
但失败并出现错误:invalid subscript type 'closure'
有人可以帮助我去哪里吗?我如何提取时间、日期等?
我首先建议将您的 created_utc
转换为 POSIXct
class(而不是 POSIXlt
,然后提取您需要的所有数据。这是一个简单的说明使用 data.table
包
library(data.table)
setDT(df)[, created_utc := as.POSIXct(created_utc, origin = '1970-01-01')]
df[, `:=`(Date = as.Date(created_utc),
Hour = hour(created_utc),
isWeekend = wday(created_utc) %in% c(7L, 1L))]
df
# created_utc Date Hour isWeekend
# 1: 2015-05-01 03:00:00 2015-05-01 3 FALSE
# 2: 2015-05-01 03:00:10 2015-05-01 3 FALSE
# 3: 2015-05-01 03:00:30 2015-05-01 3 FALSE
# 4: 2015-05-01 03:00:55 2015-05-01 3 FALSE
# 5: 2015-05-01 03:01:10 2015-05-01 3 FALSE
# 6: 2015-05-01 03:01:20 2015-05-01 3 FALSE
如果我们使用 dplyr
,一个选项是转换为 POSIXct
(因为不支持 POSIXlt
class)并提取 hour
使用 lubridate
.
library(lubridate)
library(dplyr)
db_subset %>%
mutate(hour=hour(as.POSIXct(created_utc, origin='1970-01-01')))
# created_utc hour
#1 1430438400 20
#2 1430438410 20
#3 1430438430 20
#4 1430438455 20
#5 1430438470 20
#6 1430438480 20
数据
db_subset <- structure(list(created_utc = c(1430438400L, 1430438410L,
1430438430L,
1430438455L, 1430438470L, 1430438480L)), .Names = "created_utc",
class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6"))
所有这些都可以用 base R 来完成:
R> df <- data.frame(created_utc=c(1430438400, 1430438410, 1430438430,
+ 1430438455, 1430438470, 1430438480))
R> df
created_utc
1 1430438400
2 1430438410
3 1430438430
4 1430438455
5 1430438470
6 1430438480
R>
R> # so far so good -- we just have the data
R> # so let's make it a date time object
R>
R> df[,1] <- as.POSIXct(df[,1], origin="1970-01-01")
R> df
created_utc
1 2015-04-30 19:00:00
2 2015-04-30 19:00:10
3 2015-04-30 19:00:30
4 2015-04-30 19:00:55
5 2015-04-30 19:01:10
6 2015-04-30 19:01:20
R>
R> ## we can use this to extract Date, Hour and Weekend computations
R>
R> df[,"date"] <- as.Date(df[,1])
R> df[,"hour"] <- as.POSIXlt(df[,1])$hour
R> df[,"isWeekend"] <- as.POSIXlt(df[,1])$wday < 1 || as.POSIXlt(df[,1])$wday > 5
R> df
created_utc date hour isWeekend
1 2015-04-30 19:00:00 2015-05-01 19 FALSE
2 2015-04-30 19:00:10 2015-05-01 19 FALSE
3 2015-04-30 19:00:30 2015-05-01 19 FALSE
4 2015-04-30 19:00:55 2015-05-01 19 FALSE
5 2015-04-30 19:01:10 2015-05-01 19 FALSE
6 2015-04-30 19:01:20 2015-05-01 19 FALSE
R>