Python-正则表达式:从文本文件中提取关键字后的元组列表
Python-Regular expressions: extract a list of tuples after a keyword from a text file
我想实施我所建议的简化版本 here to import some vertices from an OpenFOAM blockMeshDict
file and then visualize them with FreeCAD。
我感兴趣的文件部分是(xi yi zi)
s 浮点数元组列表,位于vertices
关键字后的圆括号之间。该文件如下所示:
vertices
(
(1 2 3)
(3 4 5)
...
)
我可以从与 python 脚本相同的文件夹中读取文件:
import os
os.chdir(os.path.dirname(__file__))
with open("blockMeshDict", "r") as f:
s=f.read()
但是当我尝试提取 vertices
之后括号之间的内容时:
import re
r1=re.search(r'vertices\n\((.*?)\)', s)
print r1.group(1)
我收到错误:
type 'exceptions.IndexError: no such group
我也不知道怎么解决。最后我想要的是像 [(x1,y1,z1),(x2,y2,z2)...]
这样的元组列表,如果你能帮助我知道如何在 Python 2.7.
中实现它,我将不胜感激
P.S. 可在 this GitHub Gist
中找到有关此工作的摘要
这将是查找外部结构的主要正则表达式:\bvertices\s*\((\s*(?:\([^)]+\)\s*)+)\)
在此之前,我们将删除所有评论。
然后是一个额外的正则表达式来提取顶点结构内的所有内容:\([^)]+\)
查看演示 here。
代码:
import re
test_str = """
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \ / O peration | Version: 5 |
| \ / A nd | Web: www.OpenFOAM.org |
| \/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class dictionary;
object blockMeshDict;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
convertToMeters 0.001;
vertices
(
(-20.6 0 -0.5)
(-20.6 25.4 -0.5) /* Some comment */
(0 -25.4 -0.5)
(0 0 -0.5)
(0 25.4 -0.5)
(206 -25.4 -0.5)
(206 0 -0.5)
(206 25.4 -0.5)
(290 -16.6 -0.5)
(290 0 -0.5)
(290 16.6 -0.5)
(-20.6 0 0.5)
(-20.6 25.4 0.5)
(0 -25.4 0.5)
(0 0 0.5)
(0 25.4 0.5)
(206 -25.4 0.5)
(206 0 0.5)
(206 25.4 0.5)
(290 -16.6 0.5)
(290 0 0.5)
(290 16.6 0.5)
/*(1 2 3 4)*/ // Commented tuple
//(1 2 3 4)
);
/* vertices commented
vertices
(
(-20.6 0 -0.5)
(-20.6 25.4 -0.5)
(0 -25.4 -0.5)
(0 0 -0.5)
(0 25.4 -0.5)
(206 -25.4 -0.5)
(206 0 -0.5)
(206 25.4 -0.5)
(290 -16.6 -0.5)
(290 0 -0.5)
(290 16.6 -0.5)
)
*/
negY
(
(2 4 1)
(1 3 0.3)
);
posY
(
(1 4 2)
(2 3 4)
(2 4 0.25)
);
posYR
(
(2 1 1)
(1 1 0.25)
);
blocks
(
hex (0 3 4 1 11 14 15 12)
(18 30 1)
simpleGrading (0.5 $posY 1)
hex (2 5 6 3 13 16 17 14)
(180 27 1)
edgeGrading (4 4 4 4 $negY 1 1 $negY 1 1 1 1)
hex (3 6 7 4 14 17 18 15)
(180 30 1)
edgeGrading (4 4 4 4 $posY $posYR $posYR $posY 1 1 1 1)
hex (5 8 9 6 16 19 20 17)
(25 27 1)
simpleGrading (2.5 1 1)
hex (6 9 10 7 17 20 21 18)
(25 30 1)
simpleGrading (2.5 $posYR 1)
);
edges
(
);
boundary
(
inlet
{
type patch;
faces
(
(0 1 12 11)
);
}
outlet
{
type patch;
faces
(
(8 9 20 19)
(9 10 21 20)
);
}
upperWall
{
type wall;
faces
(
(1 4 15 12)
(4 7 18 15)
(7 10 21 18)
);
}
lowerWall
{
type wall;
faces
(
(0 3 14 11)
(3 2 13 14)
(2 5 16 13)
(5 8 19 16)
);
}
frontAndBack
{
type empty;
faces
(
(0 3 4 1)
(2 5 6 3)
(3 6 7 4)
(5 8 9 6)
(6 9 10 7)
(11 14 15 12)
(13 16 17 14)
(14 17 18 15)
(16 19 20 17)
(17 20 21 18)
);
}
);
// ************************************************************************* //
"""
# Clean comments:
test_str = re.sub(r"//.*", '', test_str)
test_str = re.sub(r"/\*.*?\*/", '', test_str, 0, re.DOTALL)
# Match main group
matches = re.findall(r"\bvertices\s*\((\s*(?:\([^)]+\)\s*)+)\)", test_str, re.MULTILINE | re.DOTALL)
# Fetch tuples
matches2 = re.findall(r"\([^)]+\)", matches[0], re.MULTILINE | re.DOTALL)
print matches2
解释:
\b # word boundary
vertices # literal 'vertices'
\s* # 0 or more spaces (includes line feed/carriage return)
\( # literal '('
( # First capturing group
\s* # Som spaces
(?: # Group
\([^)]+\) # literal '(' + any non-')' character 1 or more times + literal ')'
\s* # extra spaces
)+ # repeated one or more times
)
\) # literal ')'
然后您将获得捕获的组并搜索 \([^)]+\)
。这将找到顶点的实例。
vertices
(
(1 2 3)
(3 4 5)
...
)
添加一个 re.DOTALL 如
r1 = re.search(r'(?:vertices\s+)?(\([\w\s]+\))', s, re.DOTALL)
print r1.group(1)
>>> (1 2 3)
如果您希望将所有结果存储为列表,如
,您可能需要使用 re.findall
r1 = re.findall(r'(?:vertices\s+)?(\([\w\s]+\))', s, re.DOTALL)
print r1
>>> ['(1 2 3)', '(3 4 5)']
我的测试数据文件blockMeshDict
:
vertices // comment 1
(
(1 2 3) // comment 2
/* :) */ (3 4 5) /* multi line...
...comment */
(65.71 72.8 2.0)
)
代码:
import re
with open("blockMeshDict", "r") as f:
s=f.read()
# Remove comments like "//" until end of line
s = re.sub(r'//.*', '', s)
# Remove comments between /* and */
s = re.sub(r'/\*(.|\s)*?\*/', '', s, re.DOTALL)
r1 = re.search(r'vertices\s*\(\s*(.*)\s*\)', s, re.DOTALL)
vertices = [(float(v[0]),float(v[1]),float(v[2]))
for v in re.findall(r'\(\s*([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s*\)', r1.group(1))]
print(vertices)
输出是(浮点数的)元组列表:
[(1.0, 2.0, 3.0), (3.0, 4.0, 5.0), (65.71, 72.8, 2.0)]
我想实施我所建议的简化版本 here to import some vertices from an OpenFOAM blockMeshDict
file and then visualize them with FreeCAD。
我感兴趣的文件部分是(xi yi zi)
s 浮点数元组列表,位于vertices
关键字后的圆括号之间。该文件如下所示:
vertices
(
(1 2 3)
(3 4 5)
...
)
我可以从与 python 脚本相同的文件夹中读取文件:
import os
os.chdir(os.path.dirname(__file__))
with open("blockMeshDict", "r") as f:
s=f.read()
但是当我尝试提取 vertices
之后括号之间的内容时:
import re
r1=re.search(r'vertices\n\((.*?)\)', s)
print r1.group(1)
我收到错误:
type 'exceptions.IndexError: no such group
我也不知道怎么解决。最后我想要的是像 [(x1,y1,z1),(x2,y2,z2)...]
这样的元组列表,如果你能帮助我知道如何在 Python 2.7.
P.S. 可在 this GitHub Gist
中找到有关此工作的摘要这将是查找外部结构的主要正则表达式:\bvertices\s*\((\s*(?:\([^)]+\)\s*)+)\)
在此之前,我们将删除所有评论。
然后是一个额外的正则表达式来提取顶点结构内的所有内容:\([^)]+\)
查看演示 here。
代码:
import re
test_str = """
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \ / O peration | Version: 5 |
| \ / A nd | Web: www.OpenFOAM.org |
| \/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class dictionary;
object blockMeshDict;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
convertToMeters 0.001;
vertices
(
(-20.6 0 -0.5)
(-20.6 25.4 -0.5) /* Some comment */
(0 -25.4 -0.5)
(0 0 -0.5)
(0 25.4 -0.5)
(206 -25.4 -0.5)
(206 0 -0.5)
(206 25.4 -0.5)
(290 -16.6 -0.5)
(290 0 -0.5)
(290 16.6 -0.5)
(-20.6 0 0.5)
(-20.6 25.4 0.5)
(0 -25.4 0.5)
(0 0 0.5)
(0 25.4 0.5)
(206 -25.4 0.5)
(206 0 0.5)
(206 25.4 0.5)
(290 -16.6 0.5)
(290 0 0.5)
(290 16.6 0.5)
/*(1 2 3 4)*/ // Commented tuple
//(1 2 3 4)
);
/* vertices commented
vertices
(
(-20.6 0 -0.5)
(-20.6 25.4 -0.5)
(0 -25.4 -0.5)
(0 0 -0.5)
(0 25.4 -0.5)
(206 -25.4 -0.5)
(206 0 -0.5)
(206 25.4 -0.5)
(290 -16.6 -0.5)
(290 0 -0.5)
(290 16.6 -0.5)
)
*/
negY
(
(2 4 1)
(1 3 0.3)
);
posY
(
(1 4 2)
(2 3 4)
(2 4 0.25)
);
posYR
(
(2 1 1)
(1 1 0.25)
);
blocks
(
hex (0 3 4 1 11 14 15 12)
(18 30 1)
simpleGrading (0.5 $posY 1)
hex (2 5 6 3 13 16 17 14)
(180 27 1)
edgeGrading (4 4 4 4 $negY 1 1 $negY 1 1 1 1)
hex (3 6 7 4 14 17 18 15)
(180 30 1)
edgeGrading (4 4 4 4 $posY $posYR $posYR $posY 1 1 1 1)
hex (5 8 9 6 16 19 20 17)
(25 27 1)
simpleGrading (2.5 1 1)
hex (6 9 10 7 17 20 21 18)
(25 30 1)
simpleGrading (2.5 $posYR 1)
);
edges
(
);
boundary
(
inlet
{
type patch;
faces
(
(0 1 12 11)
);
}
outlet
{
type patch;
faces
(
(8 9 20 19)
(9 10 21 20)
);
}
upperWall
{
type wall;
faces
(
(1 4 15 12)
(4 7 18 15)
(7 10 21 18)
);
}
lowerWall
{
type wall;
faces
(
(0 3 14 11)
(3 2 13 14)
(2 5 16 13)
(5 8 19 16)
);
}
frontAndBack
{
type empty;
faces
(
(0 3 4 1)
(2 5 6 3)
(3 6 7 4)
(5 8 9 6)
(6 9 10 7)
(11 14 15 12)
(13 16 17 14)
(14 17 18 15)
(16 19 20 17)
(17 20 21 18)
);
}
);
// ************************************************************************* //
"""
# Clean comments:
test_str = re.sub(r"//.*", '', test_str)
test_str = re.sub(r"/\*.*?\*/", '', test_str, 0, re.DOTALL)
# Match main group
matches = re.findall(r"\bvertices\s*\((\s*(?:\([^)]+\)\s*)+)\)", test_str, re.MULTILINE | re.DOTALL)
# Fetch tuples
matches2 = re.findall(r"\([^)]+\)", matches[0], re.MULTILINE | re.DOTALL)
print matches2
解释:
\b # word boundary
vertices # literal 'vertices'
\s* # 0 or more spaces (includes line feed/carriage return)
\( # literal '('
( # First capturing group
\s* # Som spaces
(?: # Group
\([^)]+\) # literal '(' + any non-')' character 1 or more times + literal ')'
\s* # extra spaces
)+ # repeated one or more times
)
\) # literal ')'
然后您将获得捕获的组并搜索 \([^)]+\)
。这将找到顶点的实例。
vertices
(
(1 2 3)
(3 4 5)
...
)
添加一个 re.DOTALL 如
r1 = re.search(r'(?:vertices\s+)?(\([\w\s]+\))', s, re.DOTALL)
print r1.group(1)
>>> (1 2 3)
如果您希望将所有结果存储为列表,如
,您可能需要使用 re.findallr1 = re.findall(r'(?:vertices\s+)?(\([\w\s]+\))', s, re.DOTALL)
print r1
>>> ['(1 2 3)', '(3 4 5)']
我的测试数据文件blockMeshDict
:
vertices // comment 1
(
(1 2 3) // comment 2
/* :) */ (3 4 5) /* multi line...
...comment */
(65.71 72.8 2.0)
)
代码:
import re
with open("blockMeshDict", "r") as f:
s=f.read()
# Remove comments like "//" until end of line
s = re.sub(r'//.*', '', s)
# Remove comments between /* and */
s = re.sub(r'/\*(.|\s)*?\*/', '', s, re.DOTALL)
r1 = re.search(r'vertices\s*\(\s*(.*)\s*\)', s, re.DOTALL)
vertices = [(float(v[0]),float(v[1]),float(v[2]))
for v in re.findall(r'\(\s*([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s*\)', r1.group(1))]
print(vertices)
输出是(浮点数的)元组列表:
[(1.0, 2.0, 3.0), (3.0, 4.0, 5.0), (65.71, 72.8, 2.0)]