R:删除不完整的数据序列

R: Delete incomplete data sequences

我已经记录了通过三个阀门进入一台测量机的三个土壤 CO2 采样点的测量值。每个阀门有三个测量值(来自每个采样点)。这台机器是太阳能供电的,所以在测量过程中有很多电力损失。当电源恢复时,阀门 1 和测量 1 再次开始测量。有时,但并非总是如此(例如第 27 行),有一排电池。巴特。巴特。标志(显示空电池)。我只想选择从 1. 到 2. valve 具有完美序列的数据。

我有这样的数据:

# Measurement   Valve   Value
#1         1        1   0.123
#2         2        1   0.124
#3         3        1   0.125
#4         1        2   0.126
#5         2        2   0.127
#6         3        2   0.128
#7         1        3   0.129
#8         2        3   0.13
#9         3        3   0.131
#10    batt.    batt.   batt.
#11        1        1   0.132
#12        2        1   0.133
#13        3        1   0.134
#14        1        2   0.135
#15        2        2   0.136
#16        3        2   0.137
#17        1        3   0.138
#18        2        3   0.139
#19     bat.     bat.    bat.
#20        1        1   0.141
#21     bat.     bat.    bat.
#22        1        1   0.141
#23        2        1   0.142
#24        3        1   0.143
#25     bat.     bat.    bat.
#26        1        1   0.141
#27        1        1   0.141
#28        2        1   0.142
#29        3        1   0.143
#30        1        2   0.144
#31        2        2   0.145
#32        3        2   0.146
#33        1        3   0.147
#34        2        3   0.148
#35        3        3   0.149

我想获取这样的数据:

#   Measurement Valve   Value
#1            1     1   0.123
#2            2     1   0.124
#3            3     1   0.125
#4            1     2   0.126
#5            2     2   0.127
#6            3     2   0.128
#7            1     1   0.132
#8            2     1   0.133
#9            3     1   0.134
#10           1     2   0.135
#11           2     2   0.136
#12           3     2   0.137
#13           1     1   0.141
#14           2     1   0.142
#15           3     1   0.143
#16           1     2   0.144
#17           2     2   0.145
#18           3     2   0.146

输入数据:

DF = structure(list(Measurement = c("1", "2", "3", "1", "2", "3", 
"1", "2", "3", "batt.", "1", "2", "3", "1", "2", "3", "1", "2", 
"bat.", "1", "bat.", "1", "2", "3", "bat.", "1", "1", "2", "3", 
"1", "2", "3", "1", "2", "3"), Valve = c("1", "1", "1", "2", 
"2", "2", "3", "3", "3", "batt.", "1", "1", "1", "2", "2", "2", 
"3", "3", "bat.", "1", "bat.", "1", "1", "1", "bat.", "1", "1", 
"1", "1", "2", "2", "2", "3", "3", "3"), Value = c("0.123", "0.124", 
"0.125", "0.126", "0.127", "0.128", "0.129", "0.13", "0.131", 
"batt.", "0.132", "0.133", "0.134", "0.135", "0.136", "0.137", 
"0.138", "0.139", "bat.", "0.141", "bat.", "0.141", "0.142", 
"0.143", "bat.", "0.141", "0.141", "0.142", "0.143", "0.144", 
"0.145", "0.146", "0.147", "0.148", "0.149")), .Names = c("Measurement", 
"Valve", "Value"), row.names = c(NA, -35L), class = "data.frame")

那么,您可以创建一个分组列:

library(data.table)
setDT(DF)

DF[, g := cumsum(grepl("bat",DF$Value))]

    Measurement Valve Value g
 1:           1     1 0.123 0
 2:           2     1 0.124 0
 3:           3     1 0.125 0
 4:           1     2 0.126 0
 5:           2     2 0.127 0
 6:           3     2 0.128 0
 7:           1     3 0.129 0
 8:           2     3  0.13 0
 9:           3     3 0.131 0
10:       batt. batt. batt. 1
11:           1     1 0.132 1
12:           2     1 0.133 1
13:           3     1 0.134 1
14:           1     2 0.135 1
15:           2     2 0.136 1
16:           3     2 0.137 1
17:           1     3 0.138 1
18:           2     3 0.139 1
19:        bat.  bat.  bat. 2
20:           1     1 0.141 2
21:        bat.  bat.  bat. 3
22:           1     1 0.141 3
23:           2     1 0.142 3
24:           3     1 0.143 3
25:        bat.  bat.  bat. 4
26:           1     1 0.141 4
27:           1     1 0.141 4
28:           2     1 0.142 4
29:           3     1 0.143 4
30:           1     2 0.144 4
31:           2     2 0.145 4
32:           3     2 0.146 4
33:           1     3 0.147 4
34:           2     3 0.148 4
35:           3     3 0.149 4
    Measurement Valve Value g

然后 select 组

DF2 <- DF[, if (all(c("1","2") %in% Valve)) unique(.SD[Valve %in% c("1","2")]), by = g]

    g Measurement Valve Value
 1: 0           1     1 0.123
 2: 0           2     1 0.124
 3: 0           3     1 0.125
 4: 0           1     2 0.126
 5: 0           2     2 0.127
 6: 0           3     2 0.128
 7: 1           1     1 0.132
 8: 1           2     1 0.133
 9: 1           3     1 0.134
10: 1           1     2 0.135
11: 1           2     2 0.136
12: 1           3     2 0.137
13: 4           1     1 0.141
14: 4           2     1 0.142
15: 4           3     1 0.143
16: 4           1     2 0.144
17: 4           2     2 0.145
18: 4           3     2 0.146

如果第 26 行和第 27 行可能有不同的值,您可以 select 第一个 unique(.SD[Valve %in% c("1","2")], by=c("Measurement", "Valve"))