计算 r 中数据帧列中数值之间的 NaN 平均数
Calculate the NaN average number between numeric values in a dataframe column in r
我的数据是这样的:
dput(head(dataf,40))
structure(list(Time = c(20000224, 20000225, 20000226, 20000227,
20000228, 20000229, 20000301, 20000302, 20000303, 20000304, 20000305,
20000306, 20000307, 20000308, 20000309, 20000310, 20000311, 20000312,
20000313, 20000314, 20000315, 20000316, 20000317, 20000318, 20000319,
20000320, 20000321, 20000322, 20000323, 20000324, 20000325, 20000326,
20000327, 20000328, 20000329, 20000330, 20000331, 20000401, 20000402,
20000403), NDVI = c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, 0.562256741, 0.560792828, 0.559272321, 0.554727235,
NaN, NaN, NaN, NaN, NaN, NaN, NaN, 0.506596306, 0.504768168,
0.536223787, 0.5476394, 0.520311992, 0.52862363, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, NaN, NaN)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
我想知道数据框的列 (NDVI) 中两个数值之间 NaN
的平均值。
参见下面的示例:
Date
NDVI
20000224
NaN
20000225
NaN
20000226
NaN
20000227
NaN
20000228
NaN
20000229
NaN
20000301
NaN
20000302
NaN
20000303
NaN
20000304
NaN
20000305
NaN
20000306
NaN
20000307
NaN
20000308
0.562256741
20000309
0.560792828
20000310
0.559272321
20000311
0.554727235
20000312
NaN
20000313
NaN
20000314
NaN
20000315
NaN
20000316
NaN
20000317
NaN
20000318
NaN
20000319
0.506596306
20000320
0.504768168
20000321
0.536223787
20000322
0.5476394
20000323
0.520311992
20000323
0.52862363
平均值为:
包括初始 NaN:10 NaN
不包括第一个 NaN:7 NaN
请注意,我特地放置了这个数据框,因为它以 NaN
开头。我假装的解决方案是在开头包含 NaN
,因此它将计算值之间 NaN
的平均值和另一个仅在第一个数值之后才开始计算 NaN 的解决方案NaN
.
任何帮助将不胜感激。
library(dplyr)
dataf %>%
group_by(cumsum(!is.na(NDVI))) %>%
mutate(cnt = cumsum(is.na(NDVI))) %>%
filter(cnt != 0) %>%
summarise(consecutive_NaNs = max(cnt)) %>%
select(consecutive_NaNs)
Returns:
consecutive_NaNs
<int>
1 13
2 7
3 10
如果你只想要平均值:
dataf %>%
group_by(cumsum(!is.na(NDVI))) %>%
mutate(cnt = cumsum(is.na(NDVI))) %>%
filter(cnt != 0) %>%
summarise(consecutive_NaNs = max(cnt)) %>%
select(consecutive_NaNs) %>%
summarise(mean_run_length = mean(consecutive_NaNs))
Returns:
mean_run_length
<dbl>
1 10
P.S.
如果您使用给出所有 运行 长度的解决方案,您可以决定是否要包含初始 运行 ....
我的数据是这样的:
dput(head(dataf,40))
structure(list(Time = c(20000224, 20000225, 20000226, 20000227,
20000228, 20000229, 20000301, 20000302, 20000303, 20000304, 20000305,
20000306, 20000307, 20000308, 20000309, 20000310, 20000311, 20000312,
20000313, 20000314, 20000315, 20000316, 20000317, 20000318, 20000319,
20000320, 20000321, 20000322, 20000323, 20000324, 20000325, 20000326,
20000327, 20000328, 20000329, 20000330, 20000331, 20000401, 20000402,
20000403), NDVI = c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, 0.562256741, 0.560792828, 0.559272321, 0.554727235,
NaN, NaN, NaN, NaN, NaN, NaN, NaN, 0.506596306, 0.504768168,
0.536223787, 0.5476394, 0.520311992, 0.52862363, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, NaN, NaN)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
我想知道数据框的列 (NDVI) 中两个数值之间 NaN
的平均值。
参见下面的示例:
Date | NDVI |
---|---|
20000224 | NaN |
20000225 | NaN |
20000226 | NaN |
20000227 | NaN |
20000228 | NaN |
20000229 | NaN |
20000301 | NaN |
20000302 | NaN |
20000303 | NaN |
20000304 | NaN |
20000305 | NaN |
20000306 | NaN |
20000307 | NaN |
20000308 | 0.562256741 |
20000309 | 0.560792828 |
20000310 | 0.559272321 |
20000311 | 0.554727235 |
20000312 | NaN |
20000313 | NaN |
20000314 | NaN |
20000315 | NaN |
20000316 | NaN |
20000317 | NaN |
20000318 | NaN |
20000319 | 0.506596306 |
20000320 | 0.504768168 |
20000321 | 0.536223787 |
20000322 | 0.5476394 |
20000323 | 0.520311992 |
20000323 | 0.52862363 |
平均值为:
包括初始 NaN:10 NaN
不包括第一个 NaN:7 NaN
请注意,我特地放置了这个数据框,因为它以 NaN
开头。我假装的解决方案是在开头包含 NaN
,因此它将计算值之间 NaN
的平均值和另一个仅在第一个数值之后才开始计算 NaN 的解决方案NaN
.
任何帮助将不胜感激。
library(dplyr)
dataf %>%
group_by(cumsum(!is.na(NDVI))) %>%
mutate(cnt = cumsum(is.na(NDVI))) %>%
filter(cnt != 0) %>%
summarise(consecutive_NaNs = max(cnt)) %>%
select(consecutive_NaNs)
Returns:
consecutive_NaNs
<int>
1 13
2 7
3 10
如果你只想要平均值:
dataf %>%
group_by(cumsum(!is.na(NDVI))) %>%
mutate(cnt = cumsum(is.na(NDVI))) %>%
filter(cnt != 0) %>%
summarise(consecutive_NaNs = max(cnt)) %>%
select(consecutive_NaNs) %>%
summarise(mean_run_length = mean(consecutive_NaNs))
Returns:
mean_run_length
<dbl>
1 10
P.S.
如果您使用给出所有 运行 长度的解决方案,您可以决定是否要包含初始 运行 ....