使用基于最大差异的值查找对数差异
Find log difference using values based on max difference
这应该很简单,但我很难做到。我使用生成汇总列 max_predicted_diff
的基本 diff()
函数创建了一个汇总输出,其中包括跨越 15 个值 (lag = 15
) 的最大差值。但是,我现在想使用用于计算 max_predicted_diff
的相同值,但要计算它们之间的对数差。我从 dse
包中找到了 diffLog
,但是我在使用用于计算 max_predicted_diff
.
的相同值时遇到 dplyr
问题
例子
print(head(df,10))
Sample Predicted
1 apple 0.7356986
2 apple 0.7388222
3 apple 0.7419447
4 apple 0.7450658
5 apple 0.7481857
6 apple 0.7513042
7 apple 0.7544212
8 apple 0.7575368
9 apple 0.7606509
10 apple 0.7637635
library(dplyr)
df %>% summarise(max_predicted_diff = max(diff(Predicted, lag = 15)))
max_predicted_diff
1 0.04670478
如何找出用于找到 0.04670478 答案的值?然后我如何总结使用的这 2 个值的日志?我已经使用 max()
找到了 max_predicted_diff
但是我会使用什么汇总函数来解决日志值的差异?我认为 max()
在这里不起作用,因为我认为 diffLog
不会使用与 max_predicted_diff
相同的值(只是 log
ed) ?
使用 dse
包中的 diffLog()
我可以轻松计算对数差异,但我不知道它使用了哪些值以及如何使用用于查找 max_predicted_diff
。
library(dse)
df %>% summarise(max_predicted_diff_log = max(diffLog(Predicted, lag = 15)))
max_predicted_diff_log
1 0.06154992
可重现数据
df structure(list(Sample = c("apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple"), Predicted = c(0.735698569365871,
0.738822222617743, 0.741944657028027, 0.74506582323819, 0.748185672193904,
0.751304155146149, 0.754421223652273, 0.75753682957702, 0.760650925093515,
0.76376346268421, 0.766874395141795, 0.76998367557007, 0.773091257384776,
0.776197094314395, 0.779301140400904, 0.782403350000502, 0.785503677784295,
0.788602078738943, 0.791698508167276, 0.794792921688872, 0.797885275240596,
0.800975525077113, 0.804063627771357, 0.807149540214967, 0.810233219618698,
0.813314623512785, 0.81639370974728, 0.819470436492359, 0.822544762238589,
0.825616645797166, 0.828686046300123, 0.831752923200501, 0.83481723627249,
0.837878945611542, 0.84093801163445, 0.843994395079395, 0.847048057005967,
0.850098958795148, 0.853147062149278, 0.856192329091979, 0.859234721968058,
0.862274203443374, 0.865310736504688, 0.868344284459473, 0.871374810935701,
0.874402279881605, 0.877426655565415, 0.880447902575054, 0.883465985817829,
0.886480870520078, 0.889492522226799, 0.892500906801256, 0.895505990424551,
0.898507739595181, 0.901506121128565, 0.904501102156547, 0.907492650126881,
0.910480732802683, 0.913465318261867, 0.91644637489656, 0.919423871412485,
0.922397776828334, 0.925368060475109, 0.928334691995449)), row.names = c(NA,
64L), class = "data.frame")
这就是你想要的吗?
library(dplyr, warn.conflicts = FALSE)
df %>%
mutate(
lag15 = lag(Predicted, n = 15),
lag_diff = Predicted - lag15
) %>%
filter(lag_diff == max(lag_diff, na.rm = TRUE))
#> Sample Predicted lag15 lag_diff
#> 1 apple 0.7824034 0.7356986 0.04670478
这应该很简单,但我很难做到。我使用生成汇总列 max_predicted_diff
的基本 diff()
函数创建了一个汇总输出,其中包括跨越 15 个值 (lag = 15
) 的最大差值。但是,我现在想使用用于计算 max_predicted_diff
的相同值,但要计算它们之间的对数差。我从 dse
包中找到了 diffLog
,但是我在使用用于计算 max_predicted_diff
.
dplyr
问题
例子
print(head(df,10))
Sample Predicted
1 apple 0.7356986
2 apple 0.7388222
3 apple 0.7419447
4 apple 0.7450658
5 apple 0.7481857
6 apple 0.7513042
7 apple 0.7544212
8 apple 0.7575368
9 apple 0.7606509
10 apple 0.7637635
library(dplyr)
df %>% summarise(max_predicted_diff = max(diff(Predicted, lag = 15)))
max_predicted_diff
1 0.04670478
如何找出用于找到 0.04670478 答案的值?然后我如何总结使用的这 2 个值的日志?我已经使用 max()
找到了 max_predicted_diff
但是我会使用什么汇总函数来解决日志值的差异?我认为 max()
在这里不起作用,因为我认为 diffLog
不会使用与 max_predicted_diff
相同的值(只是 log
ed) ?
使用 dse
包中的 diffLog()
我可以轻松计算对数差异,但我不知道它使用了哪些值以及如何使用用于查找 max_predicted_diff
。
library(dse)
df %>% summarise(max_predicted_diff_log = max(diffLog(Predicted, lag = 15)))
max_predicted_diff_log
1 0.06154992
可重现数据
df structure(list(Sample = c("apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple", "apple", "apple", "apple",
"apple", "apple", "apple", "apple"), Predicted = c(0.735698569365871,
0.738822222617743, 0.741944657028027, 0.74506582323819, 0.748185672193904,
0.751304155146149, 0.754421223652273, 0.75753682957702, 0.760650925093515,
0.76376346268421, 0.766874395141795, 0.76998367557007, 0.773091257384776,
0.776197094314395, 0.779301140400904, 0.782403350000502, 0.785503677784295,
0.788602078738943, 0.791698508167276, 0.794792921688872, 0.797885275240596,
0.800975525077113, 0.804063627771357, 0.807149540214967, 0.810233219618698,
0.813314623512785, 0.81639370974728, 0.819470436492359, 0.822544762238589,
0.825616645797166, 0.828686046300123, 0.831752923200501, 0.83481723627249,
0.837878945611542, 0.84093801163445, 0.843994395079395, 0.847048057005967,
0.850098958795148, 0.853147062149278, 0.856192329091979, 0.859234721968058,
0.862274203443374, 0.865310736504688, 0.868344284459473, 0.871374810935701,
0.874402279881605, 0.877426655565415, 0.880447902575054, 0.883465985817829,
0.886480870520078, 0.889492522226799, 0.892500906801256, 0.895505990424551,
0.898507739595181, 0.901506121128565, 0.904501102156547, 0.907492650126881,
0.910480732802683, 0.913465318261867, 0.91644637489656, 0.919423871412485,
0.922397776828334, 0.925368060475109, 0.928334691995449)), row.names = c(NA,
64L), class = "data.frame")
这就是你想要的吗?
library(dplyr, warn.conflicts = FALSE)
df %>%
mutate(
lag15 = lag(Predicted, n = 15),
lag_diff = Predicted - lag15
) %>%
filter(lag_diff == max(lag_diff, na.rm = TRUE))
#> Sample Predicted lag15 lag_diff
#> 1 apple 0.7824034 0.7356986 0.04670478