在 perl 中,为什么当散列包含长字符串时 sprintf(Dumper \%hash) 会发出警告?
in perl, why does sprintf(Dumper \%hash) throw a warning when the hash contains a long string?
几个月来我一直在使用如下语法,但没有触发警告:
die join('', sprintf(Dumper [@stack]), sprintf(Dumper {%oprAtnNOW}), 'opt tojudge not specified');
也就是说,我使用了 sprintf
和 Dumper
,没有指定格式。
在下面的代码中,我们看到这工作正常,但只能达到一定程度。当 %oprAtnNOW
包含长字符串时,会触发警告。
(在所有情况下,字符串都编译为正则表达式;但在编译之前,它只是一个字符串。)
是什么导致了长字符串的警告?为什么会有“遗漏的论点”?
当然,sprintf
应该被赋予一种格式,如
https://perldoc.perl.org/functions/sprintf。
但是,为什么仅当较小的字符串被较长的字符串替换时才会强制执行此操作?
#!/usr/bin/perl
use strict; use warnings;
use Data::Dumper qw(Dumper);
$Data::Dumper::Sortkeys = 1;
print "Perl version: $^V\n";
my %oprAtnNOW;
my $string='~~~~~1983-10-21 Fri 13:01:13, today we went to the movie.';
%oprAtnNOW = (
Vv => {
v=>[ '(?<a>a)',],
},
);
tryit();
%oprAtnNOW = (
Vv => {
v=>[
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))',
],
},
);
tryit();
sub tryit
{
my $rgx=qr/$oprAtnNOW{Vv}->{v}->[0]/;
if($string=~$rgx)
{
print Dumper \%+;
}
print "with format:\n";
print sprintf('%s', Dumper \%oprAtnNOW);
print "WITHOUT format:\n";
print sprintf(Dumper \%oprAtnNOW);
}
输出:
Perl version: v5.18.4
$VAR1 = {
'a' => 'a'
};
with format:
$VAR1 = {
'Vv' => {
'v' => [
'(?<a>a)'
]
}
};
WITHOUT format:
$VAR1 = {
'Vv' => {
'v' => [
'(?<a>a)'
]
}
};
$VAR1 = {
'HH' => '13',
'HHcMMcSS' => '13:01:13',
'MM' => '01',
'SS' => '13',
'YYYY' => '1983',
'boundjour2009' => '~~~~~1983-10-21 Fri 13:01:13',
'dateISO1' => '1983-10-21',
'dateISO1mbeWeekday' => '1983-10-21 Fri',
'dateISO1mbeWeekdaymbeTIME' => '1983-10-21 Fri 13:01:13',
'nMonth2' => '10',
'nMonthDay2' => '21',
'nTIMEdiverse' => '13:01:13',
'tilde5' => '~~~~~',
'wWeekdayAllor3' => 'Fri'
};
with format:
$VAR1 = {
'Vv' => {
'v' => [
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))'
]
}
};
WITHOUT format:
Missing argument in sprintf at /Users/kpr/u/kh/bin/z.pl line 38.
Invalid conversion in sprintf: "%:" at /Users/kpr/u/kh/bin/z.pl line 38.
$VAR1 = {
'Vv' => {
'v' => [
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))'
]
}
};
不是因为长度,而是因为长字符串中包含百分号
...(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))...
~
因为它是唯一的参数,所以它被解释为格式。
您可以使用更短的字符串来演示相同的行为,例如
sprintf '%';
如果不需要格式化,直接用print
:
print Dumper \%oprAtnNOW;
几个月来我一直在使用如下语法,但没有触发警告:
die join('', sprintf(Dumper [@stack]), sprintf(Dumper {%oprAtnNOW}), 'opt tojudge not specified');
也就是说,我使用了 sprintf
和 Dumper
,没有指定格式。
在下面的代码中,我们看到这工作正常,但只能达到一定程度。当 %oprAtnNOW
包含长字符串时,会触发警告。
(在所有情况下,字符串都编译为正则表达式;但在编译之前,它只是一个字符串。)
是什么导致了长字符串的警告?为什么会有“遗漏的论点”?
当然,sprintf
应该被赋予一种格式,如
https://perldoc.perl.org/functions/sprintf。
但是,为什么仅当较小的字符串被较长的字符串替换时才会强制执行此操作?
#!/usr/bin/perl
use strict; use warnings;
use Data::Dumper qw(Dumper);
$Data::Dumper::Sortkeys = 1;
print "Perl version: $^V\n";
my %oprAtnNOW;
my $string='~~~~~1983-10-21 Fri 13:01:13, today we went to the movie.';
%oprAtnNOW = (
Vv => {
v=>[ '(?<a>a)',],
},
);
tryit();
%oprAtnNOW = (
Vv => {
v=>[
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))',
],
},
);
tryit();
sub tryit
{
my $rgx=qr/$oprAtnNOW{Vv}->{v}->[0]/;
if($string=~$rgx)
{
print Dumper \%+;
}
print "with format:\n";
print sprintf('%s', Dumper \%oprAtnNOW);
print "WITHOUT format:\n";
print sprintf(Dumper \%oprAtnNOW);
}
输出:
Perl version: v5.18.4
$VAR1 = {
'a' => 'a'
};
with format:
$VAR1 = {
'Vv' => {
'v' => [
'(?<a>a)'
]
}
};
WITHOUT format:
$VAR1 = {
'Vv' => {
'v' => [
'(?<a>a)'
]
}
};
$VAR1 = {
'HH' => '13',
'HHcMMcSS' => '13:01:13',
'MM' => '01',
'SS' => '13',
'YYYY' => '1983',
'boundjour2009' => '~~~~~1983-10-21 Fri 13:01:13',
'dateISO1' => '1983-10-21',
'dateISO1mbeWeekday' => '1983-10-21 Fri',
'dateISO1mbeWeekdaymbeTIME' => '1983-10-21 Fri 13:01:13',
'nMonth2' => '10',
'nMonthDay2' => '21',
'nTIMEdiverse' => '13:01:13',
'tilde5' => '~~~~~',
'wWeekdayAllor3' => 'Fri'
};
with format:
$VAR1 = {
'Vv' => {
'v' => [
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))'
]
}
};
WITHOUT format:
Missing argument in sprintf at /Users/kpr/u/kh/bin/z.pl line 38.
Invalid conversion in sprintf: "%:" at /Users/kpr/u/kh/bin/z.pl line 38.
$VAR1 = {
'Vv' => {
'v' => [
'(?m)^(?<boundjour2009>(?<tilde5>[~]{5})[\x20\t]*(?<dateISO1mbeWeekdaymbeTIME>(?<dateISO1mbeWeekday>(?<dateISO1>(?<YYYY>[1-9]\d\d\d)[-](?<nMonth2>0[1-9]|1[0-2])[-](?<nMonthDay2>3[01]|[0-2][0-9]))([\x20\t]+(?<wWeekdayAllor3>Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat))?)([\x20\t]+(?<nTIMEdiverse>(at[\x20\t]+)?((?<HHcMMcSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9]):(?<SS>[0-5][0-9]))|(?<HHMMmbeSS>(?<HHMM>(?<HH>0[0-9]|1[0-9]|2[0-3])(?<MM>[0-5][0-9]))(?<SS>[0-5][0-9])?)|(?<HHcMM_pct_cSS>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))|(?<HHcMM_stop>(?<HH>0[0-9]|1[0-9]|2[0-3]):(?<MM>[0-5][0-9])(?![:][0-5][0-9])))))?))'
]
}
};
不是因为长度,而是因为长字符串中包含百分号
...(?<MM>[0-5][0-9])%:(?<SS>[0-5][0-9]))...
~
因为它是唯一的参数,所以它被解释为格式。
您可以使用更短的字符串来演示相同的行为,例如
sprintf '%';
如果不需要格式化,直接用print
:
print Dumper \%oprAtnNOW;