尝试从字符列创建日期列后出错
Error after trying to make a date column from a character column
使用库 sparklyr
,我尝试以这种方式在 Spark 数据框中创建一个日期变量(在 R 中有效):
# Researching SPARK --------------------------------------------------------------------------
#library(data.table)
library(sparklyr)
library(dplyr)
setwd('C:/Users/aburnakov/Desktop')
#spark_install(version = "2.1.0")
r_spark_connection <- spark_connect(master = "local")
sample_dat <- data.frame(When = as.character(
c(
"2018-01-15 03:05:02.177"
, "2018-01-15 00:54:31.133"
, "2018-01-15 21:24:06.013"
, "2018-01-15 15:44:26.047"
, "2018-01-15 05:17:06.040"
, "2018-01-15 06:41:08.183"
, "2018-01-15 15:09:40.137"
, "2018-01-15 03:15:43.820"
, "2018-01-15 11:02:27.180"
, "2018-01-15 18:23:42.047"
)
)
)
write.csv(x = sample_dat, file = 'sample_dat.csv')
## write raw data from csv to spark env ------------------------------------------
sample_spark_df <- spark_read_csv(
sc = r_spark_connection
, name = 'sample_dat'
, path = 'sample_dat.csv'
, header = T
, infer_schema = F
, delimiter = ","
, quote = '"'
, escape = '`'
, charset = "UTF-8"
, null_value = NULL
, repartition = 10
, memory = F
, overwrite = T
)
## try either of two
sample_spark_df <- sample_spark_df %>%
mutate(
Date = as.Date(When, format = "%Y-%m-%d", tz = "")
)
sample_spark_df <- sample_spark_df %>%
mutate(
datetime_when = as.POSIXct(strptime(x = When, format = "%Y-%m-%d %H:%M:%OS", tz = ""))
)
## now observe the error
x <- collect(sample_spark_df)
这是为什么?我还能用指示格式和时区制作日期吗?
类似问题:Converting string/chr to date using sparklyr
我无法直接从 sparklyr 读取你的日期,但如果你想将它们读取为 POSIXct
,你可以使用 DBI
包:
res <- DBI::dbGetQuery(r_spark_connection, "Select _c0, When,
unix_timestamp(When, \"yyyy-MM-dd HH:mm:ss.SSS\") as dateTS FROM sample_dat")
res %>% mutate(dateRestored = as.POSIXct(dateTS, origin = "1970-01-01"))
_c0 When dateTS dateRestored
1 10 2018-01-15 18:23:42.047 1516037022 2018-01-15 18:23:42
2 1 2018-01-15 03:05:02.177 1515981902 2018-01-15 03:05:02
3 2 2018-01-15 00:54:31.133 1515974071 2018-01-15 00:54:31
4 3 2018-01-15 21:24:06.013 1516047846 2018-01-15 21:24:06
5 4 2018-01-15 15:44:26.047 1516027466 2018-01-15 15:44:26
6 5 2018-01-15 05:17:06.040 1515989826 2018-01-15 05:17:06
7 6 2018-01-15 06:41:08.183 1515994868 2018-01-15 06:41:08
8 7 2018-01-15 15:09:40.137 1516025380 2018-01-15 15:09:40
9 8 2018-01-15 03:15:43.820 1515982543 2018-01-15 03:15:43
10 9 2018-01-15 11:02:27.180 1516010547 2018-01-15 11:02:27
想法是使用此包通过 Spark SQL 读取数据,并使用函数 unix_timestamp
从字符日期创建时间戳。然后,您可以对时间戳使用 as.POSIXct
函数。
希望对您有所帮助。
使用库 sparklyr
,我尝试以这种方式在 Spark 数据框中创建一个日期变量(在 R 中有效):
# Researching SPARK --------------------------------------------------------------------------
#library(data.table)
library(sparklyr)
library(dplyr)
setwd('C:/Users/aburnakov/Desktop')
#spark_install(version = "2.1.0")
r_spark_connection <- spark_connect(master = "local")
sample_dat <- data.frame(When = as.character(
c(
"2018-01-15 03:05:02.177"
, "2018-01-15 00:54:31.133"
, "2018-01-15 21:24:06.013"
, "2018-01-15 15:44:26.047"
, "2018-01-15 05:17:06.040"
, "2018-01-15 06:41:08.183"
, "2018-01-15 15:09:40.137"
, "2018-01-15 03:15:43.820"
, "2018-01-15 11:02:27.180"
, "2018-01-15 18:23:42.047"
)
)
)
write.csv(x = sample_dat, file = 'sample_dat.csv')
## write raw data from csv to spark env ------------------------------------------
sample_spark_df <- spark_read_csv(
sc = r_spark_connection
, name = 'sample_dat'
, path = 'sample_dat.csv'
, header = T
, infer_schema = F
, delimiter = ","
, quote = '"'
, escape = '`'
, charset = "UTF-8"
, null_value = NULL
, repartition = 10
, memory = F
, overwrite = T
)
## try either of two
sample_spark_df <- sample_spark_df %>%
mutate(
Date = as.Date(When, format = "%Y-%m-%d", tz = "")
)
sample_spark_df <- sample_spark_df %>%
mutate(
datetime_when = as.POSIXct(strptime(x = When, format = "%Y-%m-%d %H:%M:%OS", tz = ""))
)
## now observe the error
x <- collect(sample_spark_df)
这是为什么?我还能用指示格式和时区制作日期吗?
类似问题:Converting string/chr to date using sparklyr
我无法直接从 sparklyr 读取你的日期,但如果你想将它们读取为 POSIXct
,你可以使用 DBI
包:
res <- DBI::dbGetQuery(r_spark_connection, "Select _c0, When,
unix_timestamp(When, \"yyyy-MM-dd HH:mm:ss.SSS\") as dateTS FROM sample_dat")
res %>% mutate(dateRestored = as.POSIXct(dateTS, origin = "1970-01-01"))
_c0 When dateTS dateRestored
1 10 2018-01-15 18:23:42.047 1516037022 2018-01-15 18:23:42
2 1 2018-01-15 03:05:02.177 1515981902 2018-01-15 03:05:02
3 2 2018-01-15 00:54:31.133 1515974071 2018-01-15 00:54:31
4 3 2018-01-15 21:24:06.013 1516047846 2018-01-15 21:24:06
5 4 2018-01-15 15:44:26.047 1516027466 2018-01-15 15:44:26
6 5 2018-01-15 05:17:06.040 1515989826 2018-01-15 05:17:06
7 6 2018-01-15 06:41:08.183 1515994868 2018-01-15 06:41:08
8 7 2018-01-15 15:09:40.137 1516025380 2018-01-15 15:09:40
9 8 2018-01-15 03:15:43.820 1515982543 2018-01-15 03:15:43
10 9 2018-01-15 11:02:27.180 1516010547 2018-01-15 11:02:27
想法是使用此包通过 Spark SQL 读取数据,并使用函数 unix_timestamp
从字符日期创建时间戳。然后,您可以对时间戳使用 as.POSIXct
函数。
希望对您有所帮助。