R - 创建的函数不能正确输出
R - created function does not output correctly
我试图找到两个日期之间的差异,然后将该值分组到因子水平中。我以前用其他数值而不是日期做过这个,无法弄清楚我做错了什么。我在创建函数时没有遇到任何错误,但尝试了两种不同的方法来应用它。
我最初是按天计算的,因为稍后我需要一个天的值。以周为单位分组,是为了以后可视化创建一些层次。
#created Lead_time column to calculate how far in advance appointment was booked
#formated in days
df7$Lead_Time <- difftime(df7$Appointment_Date_Time, df7$appt_create_date, units = "days")
#to negate when negatives are created due to the appointment being created after the start time
df7$Lead_Time <- as.integer(df7$Lead_Time)
#group Lead_Time by weeks
group_Lead_Time <- function(Lead_Time){
if (Lead_Time <= 28){
return('0-4 Weeks')
}else if(Lead_Time > 29 & Lead_Time <= 56){
return('5-8 Weeks')
}else if (Lead_Time > 57 & Lead_Time <= 84){
return('8-12 Weeks')
}else if (Lead_Time > 85 & Lead_Time <= 112){
return('12-16 Weeks')
}else if (Lead_Time > 113 & Lead_Time <=140){
return('16-20 Weeks')
}else if (Lead_Time > 141 & Lead_Time <=168){
return('20-24 Weeks')
}else if (Lead_Time > 168){
return('24+ Weeks')
}
}
df7$Lead_Time_Grouped <- as.factor(group_Lead_Time(df7$Lead_Time))
df7$Lead_Time_Grouped <- sapply(df7$Lead_Time,group_Lead_Time)
如果有人有更好的方法来处理负值,我也愿意接受。
这些是我收到的错误消息:
> df7$Lead_Time_Grouped <- as.factor(group_Lead_Time(df7$Lead_Time))
Warning messages:
1: In if (Lead_Time <= 28) { :
the condition has length > 1 and only the first element will be used
2: In if (Lead_Time > 29 & Lead_Time <= 56) { :
the condition has length > 1 and only the first element will be used
3: In if (Lead_Time > 57 & Lead_Time <= 84) { :
the condition has length > 1 and only the first element will be used
4: In if (Lead_Time > 85 & Lead_Time <= 112) { :
the condition has length > 1 and only the first element will be used
> df7$Lead_Time_Grouped <- sapply(df7$Lead_Time,group_Lead_Time)
Error in if (Lead_Time <= 28) { : missing value where TRUE/FALSE needed
UPDATE/EDIT:感谢您为我指明了 ifelse 的方向。能够使用以下代码解决我的挑战。
#group Lead_Time by weeks
group_Lead_Time <- function(appt_lead_time){
ifelse (appt_lead_time <= 28,'0-4 Weeks',
ifelse (appt_lead_time > 29 & appt_lead_time <= 56, '5-8 Weeks',
ifelse (appt_lead_time > 57 & appt_lead_time <= 84, '8-12 Weeks',
ifelse (appt_lead_time > 85 & appt_lead_time <= 112, '12-16 Weeks',
ifelse (appt_lead_time > 113 & appt_lead_time <=140, '16-20 Weeks',
ifelse (appt_lead_time > 141 & appt_lead_time <=168, '20-24 Weeks',
'24+ Weeks'))))))
}
df7$appt_lead_time_weeks <- group_Lead_Time(df7$appt_lead_time)
这是一个小玩具示例,可能也会帮助您使用 cut()
。这假设您已经将提前期计算为一个整数。
# simulate some lead times between -10 and 200 days
set.seed(1234)
lead_times <- as.integer(round(runif(100, -10, 200), 0))
# convert all negatives to 0 (i.e., no lead time)
# you don't actually have to do this, but thought I would show it
lead_times_adj <- pmax(lead_times, 0)
# convert to weeks
lead_times_weeks <- lead_times_adj / 7
# use cut to break the data into factors by number of weeks
group_lead_time <- cut(lead_times_weeks, breaks = c(-Inf, 4, 8, 12, 16, 20, 24, Inf),
labels = c("0-4 Weeks", "4-8 Weeks", "8-12 Weeks",
"12-16 Weeks", "16-20 Weeks", "20-24 Weeks",
"24+ Weeks"))
# create a data frame to show an example result
data.frame(Lead_Time = lead_times,
Lead_Time_Grouped = group_lead_time)
所以你会得到这样的东西...
Lead_Time Lead_Time_Grouped
1 14 0-4 Weeks
2 121 16-20 Weeks
3 118 16-20 Weeks
4 121 16-20 Weeks
5 171 24+ Weeks
6 124 16-20 Weeks
7 -8 0-4 Weeks
8 39 4-8 Weeks
9 130 16-20 Weeks
...
在评论的帮助下,我得出了以下解决方案:
#group Lead_Time by weeks
group_Lead_Time <- function(appt_lead_time){
ifelse (appt_lead_time <= 28,'0-4 Weeks',
ifelse (appt_lead_time > 29 & appt_lead_time <= 56, '5-8 Weeks',
ifelse (appt_lead_time > 57 & appt_lead_time <= 84, '8-12 Weeks',
ifelse (appt_lead_time > 85 & appt_lead_time <= 112, '12-16 Weeks',
ifelse (appt_lead_time > 113 & appt_lead_time <=140, '16-20 Weeks',
ifelse (appt_lead_time > 141 & appt_lead_time <=168, '20-24 Weeks',
'24+ Weeks'))))))
}
df7$appt_lead_time_weeks <- group_Lead_Time(df7$appt_lead_time)
我试图找到两个日期之间的差异,然后将该值分组到因子水平中。我以前用其他数值而不是日期做过这个,无法弄清楚我做错了什么。我在创建函数时没有遇到任何错误,但尝试了两种不同的方法来应用它。
我最初是按天计算的,因为稍后我需要一个天的值。以周为单位分组,是为了以后可视化创建一些层次。
#created Lead_time column to calculate how far in advance appointment was booked
#formated in days
df7$Lead_Time <- difftime(df7$Appointment_Date_Time, df7$appt_create_date, units = "days")
#to negate when negatives are created due to the appointment being created after the start time
df7$Lead_Time <- as.integer(df7$Lead_Time)
#group Lead_Time by weeks
group_Lead_Time <- function(Lead_Time){
if (Lead_Time <= 28){
return('0-4 Weeks')
}else if(Lead_Time > 29 & Lead_Time <= 56){
return('5-8 Weeks')
}else if (Lead_Time > 57 & Lead_Time <= 84){
return('8-12 Weeks')
}else if (Lead_Time > 85 & Lead_Time <= 112){
return('12-16 Weeks')
}else if (Lead_Time > 113 & Lead_Time <=140){
return('16-20 Weeks')
}else if (Lead_Time > 141 & Lead_Time <=168){
return('20-24 Weeks')
}else if (Lead_Time > 168){
return('24+ Weeks')
}
}
df7$Lead_Time_Grouped <- as.factor(group_Lead_Time(df7$Lead_Time))
df7$Lead_Time_Grouped <- sapply(df7$Lead_Time,group_Lead_Time)
如果有人有更好的方法来处理负值,我也愿意接受。 这些是我收到的错误消息:
> df7$Lead_Time_Grouped <- as.factor(group_Lead_Time(df7$Lead_Time))
Warning messages:
1: In if (Lead_Time <= 28) { :
the condition has length > 1 and only the first element will be used
2: In if (Lead_Time > 29 & Lead_Time <= 56) { :
the condition has length > 1 and only the first element will be used
3: In if (Lead_Time > 57 & Lead_Time <= 84) { :
the condition has length > 1 and only the first element will be used
4: In if (Lead_Time > 85 & Lead_Time <= 112) { :
the condition has length > 1 and only the first element will be used
> df7$Lead_Time_Grouped <- sapply(df7$Lead_Time,group_Lead_Time)
Error in if (Lead_Time <= 28) { : missing value where TRUE/FALSE needed
UPDATE/EDIT:感谢您为我指明了 ifelse 的方向。能够使用以下代码解决我的挑战。
#group Lead_Time by weeks
group_Lead_Time <- function(appt_lead_time){
ifelse (appt_lead_time <= 28,'0-4 Weeks',
ifelse (appt_lead_time > 29 & appt_lead_time <= 56, '5-8 Weeks',
ifelse (appt_lead_time > 57 & appt_lead_time <= 84, '8-12 Weeks',
ifelse (appt_lead_time > 85 & appt_lead_time <= 112, '12-16 Weeks',
ifelse (appt_lead_time > 113 & appt_lead_time <=140, '16-20 Weeks',
ifelse (appt_lead_time > 141 & appt_lead_time <=168, '20-24 Weeks',
'24+ Weeks'))))))
}
df7$appt_lead_time_weeks <- group_Lead_Time(df7$appt_lead_time)
这是一个小玩具示例,可能也会帮助您使用 cut()
。这假设您已经将提前期计算为一个整数。
# simulate some lead times between -10 and 200 days
set.seed(1234)
lead_times <- as.integer(round(runif(100, -10, 200), 0))
# convert all negatives to 0 (i.e., no lead time)
# you don't actually have to do this, but thought I would show it
lead_times_adj <- pmax(lead_times, 0)
# convert to weeks
lead_times_weeks <- lead_times_adj / 7
# use cut to break the data into factors by number of weeks
group_lead_time <- cut(lead_times_weeks, breaks = c(-Inf, 4, 8, 12, 16, 20, 24, Inf),
labels = c("0-4 Weeks", "4-8 Weeks", "8-12 Weeks",
"12-16 Weeks", "16-20 Weeks", "20-24 Weeks",
"24+ Weeks"))
# create a data frame to show an example result
data.frame(Lead_Time = lead_times,
Lead_Time_Grouped = group_lead_time)
所以你会得到这样的东西...
Lead_Time Lead_Time_Grouped
1 14 0-4 Weeks
2 121 16-20 Weeks
3 118 16-20 Weeks
4 121 16-20 Weeks
5 171 24+ Weeks
6 124 16-20 Weeks
7 -8 0-4 Weeks
8 39 4-8 Weeks
9 130 16-20 Weeks
...
在评论的帮助下,我得出了以下解决方案:
#group Lead_Time by weeks
group_Lead_Time <- function(appt_lead_time){
ifelse (appt_lead_time <= 28,'0-4 Weeks',
ifelse (appt_lead_time > 29 & appt_lead_time <= 56, '5-8 Weeks',
ifelse (appt_lead_time > 57 & appt_lead_time <= 84, '8-12 Weeks',
ifelse (appt_lead_time > 85 & appt_lead_time <= 112, '12-16 Weeks',
ifelse (appt_lead_time > 113 & appt_lead_time <=140, '16-20 Weeks',
ifelse (appt_lead_time > 141 & appt_lead_time <=168, '20-24 Weeks',
'24+ Weeks'))))))
}
df7$appt_lead_time_weeks <- group_Lead_Time(df7$appt_lead_time)