循环以创建虚拟变量 R
looping in order to create dummy variables R
我想制作一个带有虚拟变量的数据框,当事件发生时间在一个时间范围(小时)之间时显示 1。这就是我做的:
library(chron)
library(stringr)
var <- c("A", "B", "C", "D", "E", "F")
times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
df <- data.frame(var, times)
hours <- print(paste(stringr::str_pad(c(0:23), 2, side="left", pad=0),
"00", "00", sep=":"))
hours <- chron(times=hours)
df$times <- chron(times=df$times)
df$zero <- 0
df$zero[df$times>=hours[1] & df$times<hours[2]] <- 1
df$one <- 0
df$one[(df$times>=hours[2] & df$times<hours[3])] <- 1
依此类推,直到:
df$twentytree <- 0
df$twentytree[(df$times>=hours[24] ] <- 1
var times zero one
1 A 22:40:20 0 0
2 B 10:36:29 0 0
3 C 09:23:27 1 0 # in column zero is one because the time event is between 00:00:00 and 01:00:00
4 D 12:33:27 0 0
5 E 12:22:22 0 0
6 F 00:17:44 1 0
但我很确定那一定是其他一些自动化方式
想法?
提前致谢。
p.d: 这个方法我试过了,还是不行
df <- cbind(df, sapply(hours, function(x) as.integer(c4$times >=x & c4$times<x) ) )
我相信这是一个使用 lubridate 和 model.matrix 的解决方案。您需要重命名列。
> library(lubridate)
>
> var <- c("A", "B", "C", "D", "E", "F")
> times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
>
> df <- data.frame(var, times)
>
> df$times <- hms(df$times)
> df$hour <- hour(df$times)
> indicators <- model.matrix(~ -1 + factor(df$hour))
> df <- cbind(df, indicators)
> df
var times hour factor(df$hour)0 factor(df$hour)9 factor(df$hour)10 factor(df$hour)12 factor(df$hour)22
1 A 22H 40M 20S 22 0 0 0 0 1
2 B 10H 36M 29S 10 0 0 1 0 0
3 C 9H 23M 27S 9 0 1 0 0 0
4 D 12H 33M 27S 12 0 0 0 1 0
5 E 12H 22M 22S 12 0 0 0 1 0
6 F 17M 44S 0 1 0 0 0 0
最后我什至不需要转换为日期。我是这样理解的:
library(stringr)
var <- c("A", "B", "C", "D", "E", "F")
times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
df <- data.frame(var, times)
hours <- stringr::str_pad(c(0:23), 2, side="left", pad=0)
df$hour <- substr(df$times,1,2)
df <- cbind(df, sapply(hours, function(x) as.integer(df$hour==x) ) )
结果:
var times hour 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21
1 A 22:40:20 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 B 10:36:29 10 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
3 C 09:23:27 09 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
4 D 12:33:27 12 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
5 E 12:22:22 12 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
6 F 00:17:44 00 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
我想制作一个带有虚拟变量的数据框,当事件发生时间在一个时间范围(小时)之间时显示 1。这就是我做的:
library(chron)
library(stringr)
var <- c("A", "B", "C", "D", "E", "F")
times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
df <- data.frame(var, times)
hours <- print(paste(stringr::str_pad(c(0:23), 2, side="left", pad=0),
"00", "00", sep=":"))
hours <- chron(times=hours)
df$times <- chron(times=df$times)
df$zero <- 0
df$zero[df$times>=hours[1] & df$times<hours[2]] <- 1
df$one <- 0
df$one[(df$times>=hours[2] & df$times<hours[3])] <- 1
依此类推,直到:
df$twentytree <- 0
df$twentytree[(df$times>=hours[24] ] <- 1
var times zero one
1 A 22:40:20 0 0
2 B 10:36:29 0 0
3 C 09:23:27 1 0 # in column zero is one because the time event is between 00:00:00 and 01:00:00
4 D 12:33:27 0 0
5 E 12:22:22 0 0
6 F 00:17:44 1 0
但我很确定那一定是其他一些自动化方式 想法?
提前致谢。
p.d: 这个方法我试过了,还是不行
df <- cbind(df, sapply(hours, function(x) as.integer(c4$times >=x & c4$times<x) ) )
我相信这是一个使用 lubridate 和 model.matrix 的解决方案。您需要重命名列。
> library(lubridate)
>
> var <- c("A", "B", "C", "D", "E", "F")
> times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
>
> df <- data.frame(var, times)
>
> df$times <- hms(df$times)
> df$hour <- hour(df$times)
> indicators <- model.matrix(~ -1 + factor(df$hour))
> df <- cbind(df, indicators)
> df
var times hour factor(df$hour)0 factor(df$hour)9 factor(df$hour)10 factor(df$hour)12 factor(df$hour)22
1 A 22H 40M 20S 22 0 0 0 0 1
2 B 10H 36M 29S 10 0 0 1 0 0
3 C 9H 23M 27S 9 0 1 0 0 0
4 D 12H 33M 27S 12 0 0 0 1 0
5 E 12H 22M 22S 12 0 0 0 1 0
6 F 17M 44S 0 1 0 0 0 0
最后我什至不需要转换为日期。我是这样理解的:
library(stringr)
var <- c("A", "B", "C", "D", "E", "F")
times <- c("22:40:20", "10:36:29", "09:23:27", "12:33:27", "12:22:22", "00:17:44")
df <- data.frame(var, times)
hours <- stringr::str_pad(c(0:23), 2, side="left", pad=0)
df$hour <- substr(df$times,1,2)
df <- cbind(df, sapply(hours, function(x) as.integer(df$hour==x) ) )
结果:
var times hour 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21
1 A 22:40:20 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 B 10:36:29 10 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
3 C 09:23:27 09 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
4 D 12:33:27 12 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
5 E 12:22:22 12 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
6 F 00:17:44 00 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0