使用开始和结束模式的正则表达式提取子字符串
Regular Expression Extract Substring using Start and End Pattern
例如我有一个包含 png 文件列表的 QT RCC 文件
默认情况下,每个 PNG 都有:
起始模式:
++++++++++++++++++++++++++++++++++++++++
PNG HEADER = (89 50 4E 47 0D 0A 1A 0A
)
在解码文本中是:‰PNG....
======================================
结束模式:
++++++++++++++++++++++++++++++++++++++++
PNG 页脚 = (49 45 4E 44 AE 42 60 82
)
在解码文本中是:IEND®B
‚`
======================================
我的问题是:
我想将每个成功的匹配范围保存为“.png”文件,就像我手动使用 HEX 编辑器一样。
我使用以下代码将文件作为十六进制数据加载到 SynEdt 中:
const
PngHeader = '89504E470D0A1A0A';
PngFooter = '49454E44AE426082';
implementation
uses
System.IOUtils,
System.RegularExpressions,
Vcl.Imaging.pngimage,
System.Generics.Collections;
procedure FileViewHex(AMemo: TSynEdit; FileName: string);// use SynEdit instead of TMemo
const
MaxLineLength = 16 * 2; // each byte displayed with 2 characters (if you want add a space then replace "2" by "3"
BufferSize = 4096;
var
DataFile: File;
Buffer: array[1..BufferSize] of byte;
BytesRead, I: integer;
HexByte, Line: string;
begin
AssignFile(DataFile, FileName);
Reset(DataFile, 1);
AMemo.Clear;
while not Eof(DataFile) do begin
BlockRead(DataFile, Buffer, BufferSize, BytesRead);
Line := '';
for I := 1 to BytesRead do begin
HexByte := IntToHex(Buffer[I], 1); // convert a byte to hexadecimal
// Add leading 0 if result is shorter than 2, easier to read...
if Length(HexByte) < 2 then HexByte := '0' + HexByte;
Line := Line + HexByte;// + ' '; // the space
if Length(Line) >= MaxLineLength then begin
AMemo.Lines.Add(Line);
Line := '';
end;
end;
end;
// If not already added, add last line to TMemo
if Length(Line) > 0 then AMemo.Lines.Add(Line);
CloseFile(DataFile);
end;
procedure Get_PngList(Stream: string; PngList: TListView);
var
Matches: TMatchCollection;
Match: TMatch;
I: Integer;
Item: TListItem;
begin
I := 0;
Matches := TRegEx.Matches(Stream, PngHeader+'.*?'+PngFooter, [roIgnoreCase, roMultiLine]);
for Match in Matches do
begin
if Match.Success then
begin
Item := PngList.Items.Add;
Item.Caption := I.ToString;
Item.SubItems.Add(Match.Value);
Inc(I);
end;
end;
end;
我需要知道正确的模式..
在每个字符之间使用 \s?
以允许被换行符“打断”的匹配项:
8\s?9\s?5\s?0\s?4\s?E\s?4\s?7\s?0\s?D\s?0\s?A\s?1\s?A\s?0\s?A([\s\S]*?)4\s?9\s?4\s?5\s?4\s?E\s?4\s?4\s?A\s?E\s?4\s?2\s?6\s?0\s?8\s?2
解释
--------------------------------------------------------------------------------
8 '8'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
9 '9'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
5 '5'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
7 '7'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
D 'D'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
1 '1'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
( group and capture to :
--------------------------------------------------------------------------------
[\s\S]*? any character of: whitespace (\n, \r,
\t, \f, and " "), non-whitespace (all
but \n, \r, \t, \f, and " ") (0 or more
times (matching the least amount
possible))
--------------------------------------------------------------------------------
) end of
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
9 '9'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
5 '5'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
2 '2'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
6 '6'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
8 '8'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
2 '2'
例如我有一个包含 png 文件列表的 QT RCC 文件
默认情况下,每个 PNG 都有:
起始模式:
++++++++++++++++++++++++++++++++++++++++
PNG HEADER = (89 50 4E 47 0D 0A 1A 0A
)
在解码文本中是:‰PNG....
======================================
结束模式:
++++++++++++++++++++++++++++++++++++++++
PNG 页脚 = (49 45 4E 44 AE 42 60 82
)
在解码文本中是:IEND®B
‚`
======================================
我的问题是:
我想将每个成功的匹配范围保存为“.png”文件,就像我手动使用 HEX 编辑器一样。
我使用以下代码将文件作为十六进制数据加载到 SynEdt 中:
const
PngHeader = '89504E470D0A1A0A';
PngFooter = '49454E44AE426082';
implementation
uses
System.IOUtils,
System.RegularExpressions,
Vcl.Imaging.pngimage,
System.Generics.Collections;
procedure FileViewHex(AMemo: TSynEdit; FileName: string);// use SynEdit instead of TMemo
const
MaxLineLength = 16 * 2; // each byte displayed with 2 characters (if you want add a space then replace "2" by "3"
BufferSize = 4096;
var
DataFile: File;
Buffer: array[1..BufferSize] of byte;
BytesRead, I: integer;
HexByte, Line: string;
begin
AssignFile(DataFile, FileName);
Reset(DataFile, 1);
AMemo.Clear;
while not Eof(DataFile) do begin
BlockRead(DataFile, Buffer, BufferSize, BytesRead);
Line := '';
for I := 1 to BytesRead do begin
HexByte := IntToHex(Buffer[I], 1); // convert a byte to hexadecimal
// Add leading 0 if result is shorter than 2, easier to read...
if Length(HexByte) < 2 then HexByte := '0' + HexByte;
Line := Line + HexByte;// + ' '; // the space
if Length(Line) >= MaxLineLength then begin
AMemo.Lines.Add(Line);
Line := '';
end;
end;
end;
// If not already added, add last line to TMemo
if Length(Line) > 0 then AMemo.Lines.Add(Line);
CloseFile(DataFile);
end;
procedure Get_PngList(Stream: string; PngList: TListView);
var
Matches: TMatchCollection;
Match: TMatch;
I: Integer;
Item: TListItem;
begin
I := 0;
Matches := TRegEx.Matches(Stream, PngHeader+'.*?'+PngFooter, [roIgnoreCase, roMultiLine]);
for Match in Matches do
begin
if Match.Success then
begin
Item := PngList.Items.Add;
Item.Caption := I.ToString;
Item.SubItems.Add(Match.Value);
Inc(I);
end;
end;
end;
我需要知道正确的模式..
在每个字符之间使用 \s?
以允许被换行符“打断”的匹配项:
8\s?9\s?5\s?0\s?4\s?E\s?4\s?7\s?0\s?D\s?0\s?A\s?1\s?A\s?0\s?A([\s\S]*?)4\s?9\s?4\s?5\s?4\s?E\s?4\s?4\s?A\s?E\s?4\s?2\s?6\s?0\s?8\s?2
解释
--------------------------------------------------------------------------------
8 '8'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
9 '9'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
5 '5'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
7 '7'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
D 'D'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
1 '1'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
( group and capture to :
--------------------------------------------------------------------------------
[\s\S]*? any character of: whitespace (\n, \r,
\t, \f, and " "), non-whitespace (all
but \n, \r, \t, \f, and " ") (0 or more
times (matching the least amount
possible))
--------------------------------------------------------------------------------
) end of
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
9 '9'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
5 '5'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
A 'A'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
E 'E'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
4 '4'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
2 '2'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
6 '6'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
0 '0'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
8 '8'
--------------------------------------------------------------------------------
\s? whitespace (\n, \r, \t, \f, and " ")
(optional (matching the most amount
possible))
--------------------------------------------------------------------------------
2 '2'