我可以在 rollapply 中使用什么函数来检查 window 中每个值的条件?

What function can I use in rollapply to check a condition for each value in the window?

我有一个 data.table 与下面的相似。 Returns 这里是完全编造的,但显示数据的一般格式。

> print(dt, n=144)
     ID Period   Assets Returns
  1:  a 200601        0    <NA>
  2:  a 200602        0    <NA>
  3:  a 200603        0    <NA>
  4:  a 200604 40000000    <NA>
  5:  a 200605 45000000     0.3
  6:  a 200606 48000000    1.43
  7:  a 200607 52000000    1.54
  8:  a 200608 55000000    0.09
  9:  a 200609 57000000   -0.88
 10:  a 200610 49000000   -0.67
 11:  a 200611 16000000    2.13
 12:  a 200612 15000000    2.11
 13:  a 200701 27000000    -0.9
 14:  a 200702 90000000   -0.85
 15:  a 200703 85000000    1.22
 16:  a 200704 58000000    3.22
 17:  a 200705 24000000    1.86
 18:  a 200706 16000000    1.55
 19:  a 200707 57000000    0.91
 20:  a 200708 57000000    0.03
 21:  a 200709 59000000   -0.05
 22:  a 200710 57450000   -2.55
 23:  a 200711 56000000    2.13
 24:  a 200712 65000000    3.16
 25:  a 200801 57000000    0.91
 26:  a 200802 65000000    0.56
 27:  a 200803 70000000    0.64
 28:  a 200804 70000000    0.43
 29:  a 200805 78000000    0.99
 30:  a 200806 43000000   -0.04
 31:  a 200807 56000000       1
 32:  a 200808 33000000    1.87
 33:  a 200809 23000000   -0.33
 34:  a 200810 21000000   -1.98
 35:  a 200811 24000000   -1.05
 36:  a 200812 23000000     0.2
 37:  a 200901 77000000    0.65
 38:  a 200902 78000000    0.66
 39:  a 200903 65000000    0.45
 40:  a 200904 40000000    1.33
 41:  a 200905 45000000   -0.93
 42:  a 200906 48000000   -2.33
 43:  a 200907 52000000   -0.65
 44:  a 200908 55000000   -1.23
 45:  a 200909 57000000    0.02
 46:  a 200910 49000000    0.22
 47:  a 200911 56000000    0.45
 48:  a 200912 45000000    0.19
 49:  a 201001 27000000   -0.92
 50:  a 201002 40000000   -1.78
 51:  a 201003 45000000    0.22
 52:  a 201004 58000000    0.34
 53:  a 201005 24000000    2.12
 54:  a 201006 16000000    0.94
 55:  a 201007 47000000    0.46
 56:  a 201008 57000000   -0.04
 57:  a 201009 59000000    0.67
 58:  a 201010 57450000    0.19
 59:  a 201011 56000000    0.34
 60:  a 201012 65000000    4.12
 61:  a 201101 57000000    2.98
 62:  a 201102 65000000    0.69
 63:  a 201103 70000000    1.21
 64:  a 201104 70000000    0.96
 65:  a 201105        0    <NA>
 66:  a 201106        0    <NA>
 67:  a 201107        0    <NA>
 68:  a 201108        0    <NA>
 69:  a 201109        0    <NA>
 70:  a 201110        0    <NA>
 71:  a 201111        0    <NA>
 72:  a 201112        0    <NA>
 73:  b 200601 29000000    <NA>
 74:  b 200602 40000000    1.34
 75:  b 200603 45000000    2.13
 76:  b 200604 40000000    0.97
 77:  b 200605 45000000     0.3
 78:  b 200606 48000000    1.43
 79:  b 200607 52000000    1.54
 80:  b 200608 55000000    0.09
 81:  b 200609 57000000   -0.88
 82:  b 200610 49000000   -0.67
 83:  b 200611 16000000    2.13
 84:  b 200612 15000000    2.11
 85:  b 200701 27000000    -0.9
 86:  b 200702 90000000   -0.85
 87:  b 200703 85000000    1.22
 88:  b 200704 58000000    3.22
 89:  b 200705 24000000    1.86
 90:  b 200706 16000000    1.55
 91:  b 200707 57000000    0.91
 92:  b 200708 57000000    0.03
 93:  b 200709 59000000   -0.05
 94:  b 200710 57450000   -2.55
 95:  b 200711 56000000    2.13
 96:  b 200712 65000000    3.16
 97:  b 200801 57000000    0.91
 98:  b 200802 65000000    0.56
 99:  b 200803 70000000    0.64
100:  b 200804 70000000    0.43
101:  b 200805 78000000    0.99
102:  b 200806 43000000   -0.04
103:  b 200807 56000000       1
104:  b 200808 33000000    1.87
105:  b 200809 23000000   -0.33
106:  b 200810 21000000   -1.98
107:  b 200811 24000000   -1.05
108:  b 200812 23000000     0.2
109:  b 200901 46000000    0.65
110:  b 200902 47000000    0.66
111:  b 200903 48000000    0.45
112:  b 200904 40000000    1.33
113:  b 200905 45000000   -0.93
114:  b 200906 48000000   -2.33
115:  b 200907 52000000   -0.65
116:  b 200908 55000000   -1.23
117:  b 200909 57000000    0.02
118:  b 200910 49000000    0.22
119:  b 200911 16000000    0.45
120:  b 200912 15000000    0.19
121:  b 201001 27000000   -0.92
122:  b 201002 90000000   -1.78
123:  b 201003 85000000    0.22
124:  b 201004 58000000    0.34
125:  b 201005 24000000    2.12
126:  b 201006 16000000    0.94
127:  b 201007 57000000    0.46
128:  b 201008 57000000   -0.04
129:  b 201009 59000000    0.67
130:  b 201010 57450000    0.19
131:  b 201011 56000000    0.34
132:  b 201012 65000000    4.12
133:  b 201101 57000000    2.98
134:  b 201102 65000000    0.69
135:  b 201103 70000000    1.21
136:  b 201104 70000000    0.96
137:  b 201105 78000000   -0.12
138:  b 201106 43000000   -0.91
139:  b 201107 56000000    1.27
140:  b 201108 33000000    0.45
141:  b 201109 23000000    0.94
142:  b 201110 21000000    0.09
143:  b 201111 24000000   -0.92
144:  b 201112 23000000   -0.92

我想创建一个新列,根据每个 ID 检查前 36 个期间(包括当前期间)中的所有 Returns 值是否不为 NA。我看到的大多数 rollapply 的例子都是为了求和或求标准差。我不想对 window 中的值执行这样的操作;我只是想看看他们是否都满足不NA的条件

这是我的想法,但我不确定我可以使用什么功能以及语法是否正确:

dt[, check1 := all(!is.na(rollapplyr(dt[,Returns], width=36,.N, na.pad=TRUE))), by=ID]

这里是要使用的数据:

library(data.table)
library(zoo)

ID <- c("a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
            "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b")
Period <- c(200601L, 200602L, 200603L, 200604L, 200605L, 200606L, 200607L, 200608L, 200609L, 200610L, 200611L, 200612L, 
            200701L, 200702L, 200703L, 200704L, 200705L, 200706L, 200707L, 200708L, 200709L, 200710L, 200711L, 200712L,
            200801L, 200802L, 200803L, 200804L, 200805L, 200806L, 200807L, 200808L, 200809L, 200810L, 200811L, 200812L,
            200901L, 200902L, 200903L, 200904L, 200905L, 200906L, 200907L, 200908L, 200909L, 200910L, 200911L, 200912L, 
            201001L, 201002L, 201003L, 201004L, 201005L, 201006L, 201007L, 201008L, 201009L, 201010L, 201011L, 201012L,
            201101L, 201102L, 201103L, 201104L, 201105L, 201106L, 201107L, 201108L, 201109L, 201110L, 201111L, 201112L,
            200601L, 200602L, 200603L, 200604L, 200605L, 200606L, 200607L, 200608L, 200609L, 200610L, 200611L, 200612L, 
            200701L, 200702L, 200703L, 200704L, 200705L, 200706L, 200707L, 200708L, 200709L, 200710L, 200711L, 200712L,
            200801L, 200802L, 200803L, 200804L, 200805L, 200806L, 200807L, 200808L, 200809L, 200810L, 200811L, 200812L,
            200901L, 200902L, 200903L, 200904L, 200905L, 200906L, 200907L, 200908L, 200909L, 200910L, 200911L, 200912L, 
            201001L, 201002L, 201003L, 201004L, 201005L, 201006L, 201007L, 201008L, 201009L, 201010L, 201011L, 201012L,
            201101L, 201102L, 201103L, 201104L, 201105L, 201106L, 201107L, 201108L, 201109L, 201110L, 201111L, 201112L)
Assets <- c(0L, 0L, 0L, 40000000L, 45000000L, 48000000L, 52000000L, 55000000L, 57000000L, 49000000L,16000000L,15000000L, 27000000L,90000000L,85000000L, 58000000L, 24000000L, 16000000L, 57000000L, 57000000L, 59000000L, 57450000L, 56000000L, 65000000L, 57000000L, 65000000L, 70000000L, 70000000L, 78000000L, 43000000L, 56000000L, 33000000L, 23000000L, 21000000L, 24000000L, 23000000L,
                77000000L, 78000000L, 65000000L, 40000000L, 45000000L, 48000000L, 52000000L, 55000000L, 57000000L, 49000000L,56000000L,45000000L, 27000000L,40000000L,45000000L, 58000000L, 24000000L, 16000000L, 47000000L, 57000000L, 59000000L, 57450000L, 56000000L, 65000000L, 57000000L, 65000000L, 70000000L, 70000000L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
                29000000L, 40000000L, 45000000L, 40000000L, 45000000L, 48000000L, 52000000L, 55000000L, 57000000L, 49000000L,16000000L,15000000L, 27000000L,90000000L,85000000L, 58000000L, 24000000L, 16000000L, 57000000L, 57000000L, 59000000L, 57450000L, 56000000L, 65000000L, 57000000L, 65000000L, 70000000L, 70000000L, 78000000L, 43000000L, 56000000L, 33000000L, 23000000L, 21000000L, 24000000L, 23000000L,
                46000000L, 47000000L, 48000000L, 40000000L, 45000000L, 48000000L, 52000000L, 55000000L, 57000000L, 49000000L,16000000L,15000000L, 27000000L,90000000L,85000000L, 58000000L, 24000000L, 16000000L, 57000000L, 57000000L, 59000000L, 57450000L, 56000000L, 65000000L, 57000000L, 65000000L, 70000000L, 70000000L, 78000000L, 43000000L, 56000000L, 33000000L, 23000000L, 21000000L, 24000000L, 23000000L)

Returns <- c(NA, NA, NA, NA, 0.30L, 1.43L, 1.54L, 0.09L, -0.88L, -0.67L, 2.13L, 2.11L, -0.90L, -0.85L, 1.22L, 3.22L, 1.86L, 1.55L, 0.91L, 0.03L, -0.05L, -2.55L, 2.13L, 3.16L, 0.91L, 0.56L, 0.64L, 0.43L, 0.99L, -0.04L, 1.00L, 1.87L, -0.33L, -1.98L, -1.05L, 0.20L,
             0.65L, 0.66L, 0.45L, 1.33L, -0.93L, -2.33L, -0.65L, -1.23L, 0.02L, 0.22L, 0.45L, 0.19L, -0.92L, -1.78L, 0.22L, 0.34L, 2.12L, 0.94L, 0.46L, -0.04L, 0.67L, 0.19L, 0.34L, 4.12L, 2.98L, 0.69L, 1.21L, 0.96L, NA, NA, NA, NA, NA, NA, NA, NA,
             NA, 1.34L, 2.13L, 0.97L, 0.30L, 1.43L, 1.54L, 0.09L, -0.88L, -0.67L, 2.13L, 2.11L, -0.90L, -0.85L, 1.22L, 3.22L, 1.86L, 1.55L, 0.91L, 0.03L, -0.05L, -2.55L, 2.13L, 3.16L, 0.91L, 0.56L, 0.64L, 0.43L, 0.99L, -0.04L, 1.00L, 1.87L, -0.33L, -1.98L, -1.05L, 0.20L,
             0.65L, 0.66L, 0.45L, 1.33L, -0.93L, -2.33L, -0.65L, -1.23L, 0.02L, 0.22L, 0.45L, 0.19L, -0.92L, -1.78L, 0.22L, 0.34L, 2.12L, 0.94L, 0.46L, -0.04L, 0.67L, 0.19L, 0.34L, 4.12L, 2.98L, 0.69L, 1.21L, 0.96L, -0.12L, -0.91L, 1.27L, 0.45L, 0.94L, 0.09L, -0.92L, -0.92L)

 
dt <- data.table(cbind(ID,Period, Assets, Returns))

谢谢!

你快到了。您需要将 any(is.na(x)) 作为匿名函数传递给 rollapplyr:

dt[, check1 := rollapplyr(Returns, width = 36, function(x) any(is.na(x)), fill=TRUE), by=ID]
dt
#>      ID Period   Assets Returns check1
#>   1:  a 200601        0    <NA>   TRUE
#>   2:  a 200602        0    <NA>   TRUE
#>   3:  a 200603        0    <NA>   TRUE
#>   4:  a 200604 40000000    <NA>   TRUE
#>   5:  a 200605 45000000     0.3   TRUE
#>  ---                                  
#> 140:  b 201108 33000000    0.45  FALSE
#> 141:  b 201109 23000000    0.94  FALSE
#> 142:  b 201110 21000000    0.09  FALSE
#> 143:  b 201111 24000000   -0.92  FALSE
#> 144:  b 201112 23000000   -0.92  FALSE

或者,更简洁(正如 G Grothendieck 所指出的)

dt[, check1 := rollapplyr(Returns, width = 36, anyNA, fill=TRUE), by=ID]