使用awk查找最大值的线性趋势
Find linear trend up to the maximum value using awk
我有一个数据文件如下:
ifile.txt
-10 /
-9 /
-8 /
-7 3
-6 4
-5 13
-4 16
-3 17
-2 23
-1 26
0 29
1 32
2 35
3 38
4 41
5 40
6 35
7 30
8 25
9 /
10 /
此处“/”为缺失值。我想计算直到 y 轴最大值的线性趋势(即直到第二列中的值“41”)。所以它应该根据以下数据计算趋势:
-7 3
-6 4
-5 13
-4 16
-3 17
-2 23
-1 26
0 29
1 32
2 35
3 38
4 41
其他(x, y)不考虑,因为(4, 41)后y值小于41
以下脚本适用于所有值:
awk '!/\//{sx+=; sy+=; c++;
sxx+=*; sxy+=*}
END {det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")}' ifile.txt
但我无法做到最大值
For the given example the result will be 3.486
根据您的评论进行了更新。我假设您的趋势计算很好并使用了它们:
$ awk '
!="/" {
b1[++j]= # buffer them up until or if used
b2[j]=
if(max=="" || >max) { # once a bigger than current max found
max= # new champion
for(i=1;i<=j;i++) { # use all so far buffered values
# print b1[i], b2[i] # debug to see values used
sx+=b1[i] # Your code from here on
sy+=b2[i]
c++
sxx+=b1[i]*b1[i]
sxy+=b1[i]*b2[i]
}
j=0 # buffer reset
delete b1
delete b2
}
}
END {
det=c*sxx-sx*sx
print (det?(c*sxy-sx*sy)/det:"DIV0")
}' file
对于数据:
0 /
1 1
2 2
3 4
4 3
5 5
6 10
7 7
8 8
使用调试 print
未注释的程序将输出:
1 1
2 2
3 4
4 3
5 5
6 10
1.51429
只有在 > max
时才可以对相关行进行更新,并将中间行保存到变量中。例如使用关联数组:
awk '
== "/" {next}
> max {
# update max if > max
max = ;
# add all elemenet of a1 to a and b1 to b
for (k in a1) {
a[k] = a1[k]; b[k] = b1[k]
}
# add the current row to a, b
a[NR] = ; b[NR] = ;
# reset a1, b1
delete a1; delete b1;
next;
}
# if <= max, then set a1, b1
{ a1[NR] = ; b1[NR] = }
END{
for (k in a) {
#print k, a[k], b[k]
sx += a[k]; sy += b[k]; sxx += a[k]*a[k]; sxy += a[k]*b[k]; c++
}
det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")
}
' ifile.txt
#3.48601
或者不使用数组直接计算sx,sy等:
awk '
== "/" {next}
> max {
# update max if > max
max = ;
# add the current Row plus the cached values
sx += +sx1; sy += +sy1; sxx += *+sxx1; sxy += *+sxy1; c += 1+c1
# reset the cached variables
sx1 = 0; sy1 = 0; sxx1 = 0; sxy1 = 0; c1 = 0;
next;
}
# if <= max, then calculate and cache the values
{ sx1 += ; sy1 += ; sxx1 += *; sxy1 += *; c1++ }
END{
det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")
}
' ifile.txt
我有一个数据文件如下:
ifile.txt
-10 /
-9 /
-8 /
-7 3
-6 4
-5 13
-4 16
-3 17
-2 23
-1 26
0 29
1 32
2 35
3 38
4 41
5 40
6 35
7 30
8 25
9 /
10 /
此处“/”为缺失值。我想计算直到 y 轴最大值的线性趋势(即直到第二列中的值“41”)。所以它应该根据以下数据计算趋势:
-7 3
-6 4
-5 13
-4 16
-3 17
-2 23
-1 26
0 29
1 32
2 35
3 38
4 41
其他(x, y)不考虑,因为(4, 41)后y值小于41
以下脚本适用于所有值:
awk '!/\//{sx+=; sy+=; c++;
sxx+=*; sxy+=*}
END {det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")}' ifile.txt
但我无法做到最大值
For the given example the result will be 3.486
根据您的评论进行了更新。我假设您的趋势计算很好并使用了它们:
$ awk '
!="/" {
b1[++j]= # buffer them up until or if used
b2[j]=
if(max=="" || >max) { # once a bigger than current max found
max= # new champion
for(i=1;i<=j;i++) { # use all so far buffered values
# print b1[i], b2[i] # debug to see values used
sx+=b1[i] # Your code from here on
sy+=b2[i]
c++
sxx+=b1[i]*b1[i]
sxy+=b1[i]*b2[i]
}
j=0 # buffer reset
delete b1
delete b2
}
}
END {
det=c*sxx-sx*sx
print (det?(c*sxy-sx*sy)/det:"DIV0")
}' file
对于数据:
0 /
1 1
2 2
3 4
4 3
5 5
6 10
7 7
8 8
使用调试 print
未注释的程序将输出:
1 1
2 2
3 4
4 3
5 5
6 10
1.51429
只有在 > max
时才可以对相关行进行更新,并将中间行保存到变量中。例如使用关联数组:
awk '
== "/" {next}
> max {
# update max if > max
max = ;
# add all elemenet of a1 to a and b1 to b
for (k in a1) {
a[k] = a1[k]; b[k] = b1[k]
}
# add the current row to a, b
a[NR] = ; b[NR] = ;
# reset a1, b1
delete a1; delete b1;
next;
}
# if <= max, then set a1, b1
{ a1[NR] = ; b1[NR] = }
END{
for (k in a) {
#print k, a[k], b[k]
sx += a[k]; sy += b[k]; sxx += a[k]*a[k]; sxy += a[k]*b[k]; c++
}
det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")
}
' ifile.txt
#3.48601
或者不使用数组直接计算sx,sy等:
awk '
== "/" {next}
> max {
# update max if > max
max = ;
# add the current Row plus the cached values
sx += +sx1; sy += +sy1; sxx += *+sxx1; sxy += *+sxy1; c += 1+c1
# reset the cached variables
sx1 = 0; sy1 = 0; sxx1 = 0; sxy1 = 0; c1 = 0;
next;
}
# if <= max, then calculate and cache the values
{ sx1 += ; sy1 += ; sxx1 += *; sxy1 += *; c1++ }
END{
det=c*sxx-sx*sx;
print (det?(c*sxy-sx*sy)/det:"DIV0")
}
' ifile.txt