使用 POSIX 转换日期和时间
Converting date and time with POSIX
我有包含位置、日期和时间的动物追踪数据。
我需要绘制数据,一天中的时间与 date/day,但我正在努力使用 POSIX 命令以正确的格式获取时间和日期。这是我的脚本,直到我尝试使用 POSIX:
的各种方式
library(chron)
dtimes <- as.character(df$TimeWhen)
dtparts <- t(as.data.frame((strsplit(dtimes,'T'))))
rownames(dtparts) <- NULL
colnames(dtparts) <- c("Date","Time")
...
我需要一个包含日期的列和一个包含 h:m:s 的正确时间格式的列,以便将它们相互绘制。 dparts table 中的两列不是可识别的时间格式。应该很简单,我知道,但这里有些东西我不明白...
DPUT
我的一些 df:
structure(list(Lat = c(71.05946, 71.05946, 71.05946, 71.05946,
71.05946, 69.29127, 69.29127, 69.22796, 69.22796, 69.31701, 69.22796,
69.31701, 69.31701, 69.31701, 69.31701, 69.31701, 69.32129, 69.31983,
69.31983, 69.31983, 69.31983, 69.31983, 69.31366, 69.31366, 69.31366,
69.31366, 69.18893, 69.18893, 69.18893, 69.17569, 69.17569, 69.17569,
69.17569, 69.1555, 69.07846, 69.07564, 69.07846, 69.10216, 69.10216,
68.67609, 68.67609, 68.67609, 68.66437, 68.58191, 68.58191, 68.58191,
68.58787, 68.58787, 68.58787, 68.61008, 68.61008, 68.6273, 68.6273,
68.6273, 68.6273, 68.53913, 68.54474, 68.54474, 68.6173, 68.6173,
68.5852, 68.55022, 68.55022, 68.55022, 68.56043, 68.56043, 68.56043,
68.52243, 68.52243, 68.41181, 68.18226, 68.27806, 68.27806, 68.27806,
68.27806, 68.27806, 68.25424, 68.24761, 68.25424, 68.24761, 68.26075,
68.26075, 68.25436, 68.25436, 68.25436, 68.25882, 68.25882, 68.25358,
68.25882, 68.25358, 68.25358, 68.24089, 68.38403, 68.39622, 68.39622,
68.39622, 68.39622, 68.37399, 68.40941, 68.40941, 68.40941, 68.40941,
68.37438, 68.40941, 68.40941, 68.40941, 68.37438, 68.37438, 68.58219,
69.01418, 68.42632, 68.42632, 69.02445, 69.02445, 69.02445, 69.02445,
69.07674, 69.07674, 69.07674), Long = c(25.7908, 25.7908, 25.7908,
25.7908, 25.7908, 16.0598, 16.0598, 15.65899, 15.65899, 16.05636,
15.65899, 16.05636, 16.05636, 16.05636, 16.05636, 16.05636, 16.05741,
16.1268, 16.1268, 16.1268, 16.1268, 16.1268, 16.13471, 16.13471,
16.13471, 16.13471, 16.17577, 16.17577, 16.17577, 15.7561, 15.7561,
15.7561, 15.7561, 15.77912, 15.39518, 15.40282, 15.39518, 15.02001,
15.02001, 13.87795, 13.87795, 13.87795, 13.8701, 13.793, 13.793,
13.793, 13.87167, 13.87167, 13.87167, 13.87077, 13.87077, 13.74609,
13.74609, 13.74609, 13.74609, 13.50139, 13.50402, 13.50402, 13.53906,
13.53906, 13.45174, 13.30427, 13.30427, 13.30427, 13.33807, 13.33807,
13.33807, 13.08382, 13.08382, 12.97972, 12.59982, 11.19096, 11.19096,
11.19096, 11.19096, 11.19096, 11.08302, 11.12658, 11.08302, 11.12658,
11.08545, 11.08545, 11.06807, 11.06807, 11.06807, 10.89586, 10.89586,
10.93601, 10.89586, 10.93601, 10.93601, 11.00023, 9.6183, 9.49806,
9.49806, 9.49806, 9.49806, 9.48007, 9.47633, 9.47633, 9.47633,
9.47633, 9.4306, 9.47633, 9.47633, 9.47633, 9.4306, 9.4306, 7.1224,
8.58967, 7.2006, 7.2006, 8.57089, 8.57089, 8.57089, 8.57089,
8.52519, 8.52519, 8.52519), TimeWhen = structure(c(1L, 1L, 1L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
11L, 11L, 11L, 12L, 12L, 13L, 13L, 13L, 14L, 15L, 15L, 15L, 16L,
16L, 16L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 20L, 20L, 21L, 21L,
22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 26L, 27L, 28L, 28L,
28L, 28L, 28L, 30L, 29L, 30L, 29L, 31L, 31L, 32L, 32L, 32L, 33L,
33L, 34L, 33L, 34L, 34L, 35L, 36L, 37L, 37L, 37L, 37L, 37L, 38L,
38L, 38L, 38L, 39L, 38L, 38L, 38L, 39L, 39L, 40L, 41L, 41L, 41L,
42L, 42L, 42L, 42L, 43L, 43L, 43L), .Label = c("2017-07-16T15:13:50",
"2017-07-16T15:37:55", "2017-07-16T16:21:29", "2017-07-16T16:24:44",
"2017-07-16T16:53:58", "2017-07-16T17:15:02", "2017-07-16T18:00:24",
"2017-07-17T03:55:34", "2017-07-17T04:29:40", "2017-07-17T04:53:00",
"2017-07-17T09:36:13", "2017-07-18T03:17:54", "2017-07-18T04:23:26",
"2017-07-18T04:26:44", "2017-07-18T05:59:29", "2017-07-18T06:02:41",
"2017-07-18T06:10:32", "2017-07-18T06:36:17", "2017-07-18T07:39:46",
"2017-07-18T07:43:26", "2017-07-18T07:48:25", "2017-07-18T09:19:02",
"2017-07-18T09:28:46", "2017-07-18T10:00:38", "2017-07-18T11:00:03",
"2017-07-18T11:03:49", "2017-07-19T04:08:44", "2017-07-19T04:24:14",
"2017-07-19T05:45:59", "2017-07-19T05:47:37", "2017-07-19T05:56:30",
"2017-07-19T06:05:26", "2017-07-19T07:24:12", "2017-07-19T07:24:44",
"2017-07-19T07:51:08", "2017-07-20T07:22:15", "2017-07-20T08:42:35",
"2017-07-20T08:56:43", "2017-07-20T09:01:53", "2017-07-21T03:08:33",
"2017-07-21T03:31:17", "2017-07-21T03:46:05", "2017-07-21T05:03:12"),
class = "factor")), .Names = c("Lat", "Long", "TimeWhen"), row.names =
c(NA,-119L), class = "data.frame")
使用 dplyr
和 stringr
:
library(dplyr)
df %>%
mutate(TimeWhenClean = stringr::str_replace(x$TimeWhen, "T", " "),
date = strftime(TimeWhenClean, format = "%F") %>% as.Date,
time = strftime(TimeWhenClean, format = "%T")) %>%
select(-TimeWhenClean)
这将使用您的 df
,添加一个用 space 替换 T 的 tmp 变量,以便日期时间变量采用标准 ISO 格式。使用 strftime
我们可以很容易地提取日期部分和时间部分(前者转换为日期)。我们放弃了 tmp 变量。
OP 已请求:
I need a column with dates and one with h:m:s in the right time formats in order to plot them against each other.
如果正确的格式意味着日期和时间被格式化为字符串,这可能会导致绘制时出现意外结果,因为字符变量将绘制在离散轴上。
所以我建议确保日期和一天中的时间都被视为连续变量。
使用基础 R 和 lubridate
包的帮助:
library(lubridate)
# coerce character string to POSIXct
df$TimeWhen <- ymd_hms(df$TimeWhen)
# create Date column
df$date <- as.Date(df$TimeWhen)
现在,有两种可能将一天中的小时或时间创建为连续变量:
- 作为数值变量,或
- 作为 POSIXct 变量,其中所有时间戳 moved/mapped/shifted 到参考日
.
# create hour of day as numeric value between 0 and 24
df$hour.of.day <- hour(df$TimeWhen) + minute(df$TimeWhen)/60 + second(df$TimeWhen)/(60*60)
# shift time of day to a single reference date
df$time.of.day <- as.POSIXct(min(df$date)) +
(df$TimeWhen - floor_date(df$TimeWhen, unit = "day") )
绘制时这会有所不同:
library(ggplot2)
# y-axis formatted as numeric variable
ggplot(df) + aes(date, hour.of.day) + geom_point()
# y-axis formatted using hh:mm format
ggplot(df) + aes(date, time.of.day) + geom_point()
注意 y 轴的不同格式。
我有包含位置、日期和时间的动物追踪数据。
我需要绘制数据,一天中的时间与 date/day,但我正在努力使用 POSIX 命令以正确的格式获取时间和日期。这是我的脚本,直到我尝试使用 POSIX:
的各种方式library(chron)
dtimes <- as.character(df$TimeWhen)
dtparts <- t(as.data.frame((strsplit(dtimes,'T'))))
rownames(dtparts) <- NULL
colnames(dtparts) <- c("Date","Time")
...
我需要一个包含日期的列和一个包含 h:m:s 的正确时间格式的列,以便将它们相互绘制。 dparts table 中的两列不是可识别的时间格式。应该很简单,我知道,但这里有些东西我不明白...
DPUT
我的一些 df:
structure(list(Lat = c(71.05946, 71.05946, 71.05946, 71.05946,
71.05946, 69.29127, 69.29127, 69.22796, 69.22796, 69.31701, 69.22796,
69.31701, 69.31701, 69.31701, 69.31701, 69.31701, 69.32129, 69.31983,
69.31983, 69.31983, 69.31983, 69.31983, 69.31366, 69.31366, 69.31366,
69.31366, 69.18893, 69.18893, 69.18893, 69.17569, 69.17569, 69.17569,
69.17569, 69.1555, 69.07846, 69.07564, 69.07846, 69.10216, 69.10216,
68.67609, 68.67609, 68.67609, 68.66437, 68.58191, 68.58191, 68.58191,
68.58787, 68.58787, 68.58787, 68.61008, 68.61008, 68.6273, 68.6273,
68.6273, 68.6273, 68.53913, 68.54474, 68.54474, 68.6173, 68.6173,
68.5852, 68.55022, 68.55022, 68.55022, 68.56043, 68.56043, 68.56043,
68.52243, 68.52243, 68.41181, 68.18226, 68.27806, 68.27806, 68.27806,
68.27806, 68.27806, 68.25424, 68.24761, 68.25424, 68.24761, 68.26075,
68.26075, 68.25436, 68.25436, 68.25436, 68.25882, 68.25882, 68.25358,
68.25882, 68.25358, 68.25358, 68.24089, 68.38403, 68.39622, 68.39622,
68.39622, 68.39622, 68.37399, 68.40941, 68.40941, 68.40941, 68.40941,
68.37438, 68.40941, 68.40941, 68.40941, 68.37438, 68.37438, 68.58219,
69.01418, 68.42632, 68.42632, 69.02445, 69.02445, 69.02445, 69.02445,
69.07674, 69.07674, 69.07674), Long = c(25.7908, 25.7908, 25.7908,
25.7908, 25.7908, 16.0598, 16.0598, 15.65899, 15.65899, 16.05636,
15.65899, 16.05636, 16.05636, 16.05636, 16.05636, 16.05636, 16.05741,
16.1268, 16.1268, 16.1268, 16.1268, 16.1268, 16.13471, 16.13471,
16.13471, 16.13471, 16.17577, 16.17577, 16.17577, 15.7561, 15.7561,
15.7561, 15.7561, 15.77912, 15.39518, 15.40282, 15.39518, 15.02001,
15.02001, 13.87795, 13.87795, 13.87795, 13.8701, 13.793, 13.793,
13.793, 13.87167, 13.87167, 13.87167, 13.87077, 13.87077, 13.74609,
13.74609, 13.74609, 13.74609, 13.50139, 13.50402, 13.50402, 13.53906,
13.53906, 13.45174, 13.30427, 13.30427, 13.30427, 13.33807, 13.33807,
13.33807, 13.08382, 13.08382, 12.97972, 12.59982, 11.19096, 11.19096,
11.19096, 11.19096, 11.19096, 11.08302, 11.12658, 11.08302, 11.12658,
11.08545, 11.08545, 11.06807, 11.06807, 11.06807, 10.89586, 10.89586,
10.93601, 10.89586, 10.93601, 10.93601, 11.00023, 9.6183, 9.49806,
9.49806, 9.49806, 9.49806, 9.48007, 9.47633, 9.47633, 9.47633,
9.47633, 9.4306, 9.47633, 9.47633, 9.47633, 9.4306, 9.4306, 7.1224,
8.58967, 7.2006, 7.2006, 8.57089, 8.57089, 8.57089, 8.57089,
8.52519, 8.52519, 8.52519), TimeWhen = structure(c(1L, 1L, 1L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
11L, 11L, 11L, 12L, 12L, 13L, 13L, 13L, 14L, 15L, 15L, 15L, 16L,
16L, 16L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 20L, 20L, 21L, 21L,
22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 26L, 27L, 28L, 28L,
28L, 28L, 28L, 30L, 29L, 30L, 29L, 31L, 31L, 32L, 32L, 32L, 33L,
33L, 34L, 33L, 34L, 34L, 35L, 36L, 37L, 37L, 37L, 37L, 37L, 38L,
38L, 38L, 38L, 39L, 38L, 38L, 38L, 39L, 39L, 40L, 41L, 41L, 41L,
42L, 42L, 42L, 42L, 43L, 43L, 43L), .Label = c("2017-07-16T15:13:50",
"2017-07-16T15:37:55", "2017-07-16T16:21:29", "2017-07-16T16:24:44",
"2017-07-16T16:53:58", "2017-07-16T17:15:02", "2017-07-16T18:00:24",
"2017-07-17T03:55:34", "2017-07-17T04:29:40", "2017-07-17T04:53:00",
"2017-07-17T09:36:13", "2017-07-18T03:17:54", "2017-07-18T04:23:26",
"2017-07-18T04:26:44", "2017-07-18T05:59:29", "2017-07-18T06:02:41",
"2017-07-18T06:10:32", "2017-07-18T06:36:17", "2017-07-18T07:39:46",
"2017-07-18T07:43:26", "2017-07-18T07:48:25", "2017-07-18T09:19:02",
"2017-07-18T09:28:46", "2017-07-18T10:00:38", "2017-07-18T11:00:03",
"2017-07-18T11:03:49", "2017-07-19T04:08:44", "2017-07-19T04:24:14",
"2017-07-19T05:45:59", "2017-07-19T05:47:37", "2017-07-19T05:56:30",
"2017-07-19T06:05:26", "2017-07-19T07:24:12", "2017-07-19T07:24:44",
"2017-07-19T07:51:08", "2017-07-20T07:22:15", "2017-07-20T08:42:35",
"2017-07-20T08:56:43", "2017-07-20T09:01:53", "2017-07-21T03:08:33",
"2017-07-21T03:31:17", "2017-07-21T03:46:05", "2017-07-21T05:03:12"),
class = "factor")), .Names = c("Lat", "Long", "TimeWhen"), row.names =
c(NA,-119L), class = "data.frame")
使用 dplyr
和 stringr
:
library(dplyr)
df %>%
mutate(TimeWhenClean = stringr::str_replace(x$TimeWhen, "T", " "),
date = strftime(TimeWhenClean, format = "%F") %>% as.Date,
time = strftime(TimeWhenClean, format = "%T")) %>%
select(-TimeWhenClean)
这将使用您的 df
,添加一个用 space 替换 T 的 tmp 变量,以便日期时间变量采用标准 ISO 格式。使用 strftime
我们可以很容易地提取日期部分和时间部分(前者转换为日期)。我们放弃了 tmp 变量。
OP 已请求:
I need a column with dates and one with h:m:s in the right time formats in order to plot them against each other.
如果正确的格式意味着日期和时间被格式化为字符串,这可能会导致绘制时出现意外结果,因为字符变量将绘制在离散轴上。
所以我建议确保日期和一天中的时间都被视为连续变量。
使用基础 R 和 lubridate
包的帮助:
library(lubridate)
# coerce character string to POSIXct
df$TimeWhen <- ymd_hms(df$TimeWhen)
# create Date column
df$date <- as.Date(df$TimeWhen)
现在,有两种可能将一天中的小时或时间创建为连续变量:
- 作为数值变量,或
- 作为 POSIXct 变量,其中所有时间戳 moved/mapped/shifted 到参考日
.
# create hour of day as numeric value between 0 and 24
df$hour.of.day <- hour(df$TimeWhen) + minute(df$TimeWhen)/60 + second(df$TimeWhen)/(60*60)
# shift time of day to a single reference date
df$time.of.day <- as.POSIXct(min(df$date)) +
(df$TimeWhen - floor_date(df$TimeWhen, unit = "day") )
绘制时这会有所不同:
library(ggplot2)
# y-axis formatted as numeric variable
ggplot(df) + aes(date, hour.of.day) + geom_point()
# y-axis formatted using hh:mm format
ggplot(df) + aes(date, time.of.day) + geom_point()
注意 y 轴的不同格式。