脚本没有达到 awk 中的任何验证步骤
script is not hitting any validation steps in awk
此脚本应该以以下格式读入 csv
Name,Date,ID,Number
John Smith,09/05/2015,s,999-999-99
Mike Smith,09/06/2015,s,989-979-99
Fred Smith,09/03/2015,s,781-999-99
第一行是 header 它应该被跳过。因此,当脚本运行时,每个 .csv 文件似乎都在移动到 GoodFile 目录,我认为这是误报,我对验证步骤进行了篡改,例如第 3 个,并输入了 QE 而不是 SE(它必须是 S 或 E)它没有甚至敲代码?我不知道为什么.. for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,3]) == 0){
printf "Failed 3rd a validation"
exit 1
#!/bin/sh
for file in test/*.csv ; do
awk -F',' '
# skip the header and blank lines
NR = 1 || NF == 0 {next}
#save the data in to a 2d array called dataArr
{ for (i=1; i <= NF; i++) dataArr[++nr,i] = $i }
END {
STATUS = "GOOD"
#verify coulmn 1
for( linenum=1; linenum <= nr; linenum++) {
if (length(dataArr[linenum,1]) == 0){
printf "Failed 1st validation"
exit 1
}
}
printf "file: %s, verify column 1, STATUS: %s\n", FILENAME, STATUS
#verify coulmn 2
for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,2]) == 0){
printf "Failed 2nd a validation"
exit 1
}
if ((dataArr[linenum,2]) !~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/){
printf "Failed 2nd b validation"
exit 1
}
}
#verify coulmn 3
for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,3]) == 0){
printf "Failed 3rd a validation"
exit 1
}
# has to be either S or E
if ((dataArr[linenum,3]) !~ /^[SE]$/){
printf "Failed 3rd b validation"
exit 1
}
}
#verify coulmn 4
for(linenum = 1; linenum <nr; linenum++) {
#lenght has to between 9 AND 11
if ((length(dataArr[linenum,4])) < 9 || (length(dataArr[linenum,4]) > 11)){
printf "Failed 4th validation"
exit 1
}
}
}' "$file"
if [[ $? -eq 0 ]]; then
# "good" status
mv ${file} test1/goodFile
else
# "bad" status
mv ${file} test1/badFile
fi
完成
您不需要将文件保存在数组中,您只需要:
awk -F',' '
# skip the header and blank lines
NR == 1 || NF == 0 {next}
== "" { fails1++ }
== "" { fails2a++ }
!~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { fails2b++ }
== "" { fails3a++ }
!~ /^[SE]$/ { fails3b++ }
length() < 9 || length() > 11 { fails4++ }
END {
if (fails1) { print "Failed 1st validation"; exit 1 }
if (fails2a) { print "Failed 2nd a validation"; exit 1 }
if (fails2b) { print "Failed 2nd b validation"; exit 1 }
if (fails3a) { print "Failed 3rd a validation"; exit 1 }
if (fails3b) { print "Failed 3rd b validation"; exit 1 }
if (fails4) { print "Failed 4th validation"; exit 1 }
}' "$file"
要将失败消息打印到 stderr 而不是 stdout,顺便说一句,可移植的是:
if (fails4) { print "Failed 4th validation" | "cat>&2"; exit 1 }
如果您不关心文件包含多个错误时先报告哪个错误,请使用以下版本:
awk -F',' '
# skip the header and blank lines
NR == 1 || NF == 0 {next}
== "" { print "Failed 1st validation"; exit 1 }
== "" { print "Failed 2nd a validation"; exit 1 }
!~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { print "Failed 2nd b validation"; exit 1 }
== "" { print "Failed 3rd a validation"; exit 1 }
!~ /^[SE]$/ { print "Failed 3rd b validation"; exit 1 }
length() < 9 || length() > 11 { print "Failed 4th validation"; exit 1 }
' "$file"
此脚本应该以以下格式读入 csv
Name,Date,ID,Number
John Smith,09/05/2015,s,999-999-99
Mike Smith,09/06/2015,s,989-979-99
Fred Smith,09/03/2015,s,781-999-99
第一行是 header 它应该被跳过。因此,当脚本运行时,每个 .csv 文件似乎都在移动到 GoodFile 目录,我认为这是误报,我对验证步骤进行了篡改,例如第 3 个,并输入了 QE 而不是 SE(它必须是 S 或 E)它没有甚至敲代码?我不知道为什么.. for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,3]) == 0){
printf "Failed 3rd a validation"
exit 1
#!/bin/sh
for file in test/*.csv ; do
awk -F',' '
# skip the header and blank lines
NR = 1 || NF == 0 {next}
#save the data in to a 2d array called dataArr
{ for (i=1; i <= NF; i++) dataArr[++nr,i] = $i }
END {
STATUS = "GOOD"
#verify coulmn 1
for( linenum=1; linenum <= nr; linenum++) {
if (length(dataArr[linenum,1]) == 0){
printf "Failed 1st validation"
exit 1
}
}
printf "file: %s, verify column 1, STATUS: %s\n", FILENAME, STATUS
#verify coulmn 2
for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,2]) == 0){
printf "Failed 2nd a validation"
exit 1
}
if ((dataArr[linenum,2]) !~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/){
printf "Failed 2nd b validation"
exit 1
}
}
#verify coulmn 3
for(linenum = 1; linenum <nr; linenum++) {
if (length(dataArr[linenum,3]) == 0){
printf "Failed 3rd a validation"
exit 1
}
# has to be either S or E
if ((dataArr[linenum,3]) !~ /^[SE]$/){
printf "Failed 3rd b validation"
exit 1
}
}
#verify coulmn 4
for(linenum = 1; linenum <nr; linenum++) {
#lenght has to between 9 AND 11
if ((length(dataArr[linenum,4])) < 9 || (length(dataArr[linenum,4]) > 11)){
printf "Failed 4th validation"
exit 1
}
}
}' "$file"
if [[ $? -eq 0 ]]; then
# "good" status
mv ${file} test1/goodFile
else
# "bad" status
mv ${file} test1/badFile
fi
完成
您不需要将文件保存在数组中,您只需要:
awk -F',' '
# skip the header and blank lines
NR == 1 || NF == 0 {next}
== "" { fails1++ }
== "" { fails2a++ }
!~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { fails2b++ }
== "" { fails3a++ }
!~ /^[SE]$/ { fails3b++ }
length() < 9 || length() > 11 { fails4++ }
END {
if (fails1) { print "Failed 1st validation"; exit 1 }
if (fails2a) { print "Failed 2nd a validation"; exit 1 }
if (fails2b) { print "Failed 2nd b validation"; exit 1 }
if (fails3a) { print "Failed 3rd a validation"; exit 1 }
if (fails3b) { print "Failed 3rd b validation"; exit 1 }
if (fails4) { print "Failed 4th validation"; exit 1 }
}' "$file"
要将失败消息打印到 stderr 而不是 stdout,顺便说一句,可移植的是:
if (fails4) { print "Failed 4th validation" | "cat>&2"; exit 1 }
如果您不关心文件包含多个错误时先报告哪个错误,请使用以下版本:
awk -F',' '
# skip the header and blank lines
NR == 1 || NF == 0 {next}
== "" { print "Failed 1st validation"; exit 1 }
== "" { print "Failed 2nd a validation"; exit 1 }
!~ /^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9][0-9]$/) { print "Failed 2nd b validation"; exit 1 }
== "" { print "Failed 3rd a validation"; exit 1 }
!~ /^[SE]$/ { print "Failed 3rd b validation"; exit 1 }
length() < 9 || length() > 11 { print "Failed 4th validation"; exit 1 }
' "$file"