按块解析文件并使用 Python 从每个块中提取信息
Parsing a file by blocks and extracting information from each of the blocks with Python
我正在尝试解析 vrml 文件中的一些文本块。具体来说,我对 IndexedFaceSets 中包含的信息感兴趣,它表示一系列由其坐标点(场点)和点之间的连通性(场 coordIndex)定义的三角形。
我已经成功地创建了一个函数来解析单个 indexedFaceSet(第一个)文本,但是我不知道如何修改代码以便我可以存储以下列表 [translate,verts,facets,normals]
对于每个 IndexedFaceSet。
我的函数是:
def extractDataFromVRML(root):
#Lists to be populated
translate=[]
verts=[]
facets=[]
normals=[]
#Parsing the vrml file
f=open(root+'.wrl')
while 1:# skip through initial stuff
linney=str.split(f.readline())
# print linney
if len(linney) == 4:
if linney[0] == 'translation': #Not shown in th vrml example
transx = float(linney[1])
transy = float(linney[2])
transz = float(linney[3])
t=[transx,transy,transz]
translate.append(t)
if linney == ['point', '[']:
break
#print 'Reading vertex coordinates.'
while 1:
xyz = f.readline()
i = str.find(xyz,',')
if i < 0: # end of vertex coordinates
break
verts.append(xyz[:i]) # building a list of xyz strings
print ('We have',len(verts)-1,'vertices.')
#for v in verts: print v
print ('Reading triangles.')
while 1:
linney=f.readline()
if linney == '}\n':
break # end of file
abc = str.split(str.replace(linney,',',' ')) # l of vertex ids
if len(abc) < 3:
continue # separation between groups of triangles
# look up a vertex in the list to generate a triangle tuple
xyz = str.split(verts[int(abc[0])])
p1=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
xyz = str.split(verts[int(abc[1])])
p2=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
xyz = str.split(verts[int(abc[2])])
p3=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
tri=[p1,p2,p3]
facets.append(tri)
#n=getNormals(p1[0],p1[1],p1[2],p2[0],p2[1],p2[2],p3[0],p3[1],p3[2])
#normals.append(n)
print ('END')
return [translate,verts,facets,normals]
vrml中的立方体示例如下:
#VRML V2.0 utf8
WorldInfo {
info [ "File created using CATIA" ]
}
NavigationInfo {
type [ "EXAMINE" , "WALK" , "FLY" ]
}
Background {
skyColor [ 1 1 1 ]
}
Viewpoint {
position 0.411502 0.183945 0.216403
orientation 0.326678 0.502925 0.800218 2.185925
fieldOfView 0.471225
description "Main Viewpoint"
}
Viewpoint {
position 0.288675 0.288675 0.288675
orientation 0.187053 0.451587 0.872399 2.448076
fieldOfView 0.471225
description "Iso View"
}
Viewpoint {
position 0.500000 0.000000 0.000000
orientation 0.577350 0.577350 0.577350 2.094395
fieldOfView 0.471225
description "Front View"
}
Viewpoint {
position -0.500000 0.000000 0.000000
orientation 0.577350 -0.577350 -0.577350 2.094395
fieldOfView 0.471225
description "Back View"
}
Viewpoint {
position 0.000000 -0.500000 0.000000
orientation 1.000000 -0.000173 0.000173 1.570796
fieldOfView 0.471225
description "Left View"
}
Viewpoint {
position 0.000000 0.500000 0.000000
orientation -0.000122 -0.707107 -0.707107 3.141348
fieldOfView 0.471225
description "Right View"
}
Viewpoint {
position 0.000000 0.000000 0.500000
orientation 0.000000 0.000000 1.000000 1.570796
fieldOfView 0.471225
description "Top View"
}
Viewpoint {
position 0.000000 0.000000 -0.500000
orientation 0.707107 0.707107 0.000000 3.141593
fieldOfView 0.471225
description "Bottom View"
}
Transform {
scale 0.001 0.001 0.001
children [
Group {
children [
Group {
children [
DEF _000000002346A9A0 Group {
children [
]
}
]
}
Group {
children [
DEF _0000000023470B20 Group {
children [
]
}
]
}
Group {
children [
DEF _00000000234700D0 Group {
children [
]
}
]
}
Group {
children [
Group {
children [
DEF _0000000020D0F390 Group {
children [
Shape {
appearance Appearance {
material DEF _material0 Material {
diffuseColor 0.823529 0.823529 1
}
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 50,
50 50 50,
50 0 50,
0 0 50,
]
}
coordIndex [
3,2,0,-1,
2,1,0,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 0,
50 50 0,
50 0 0,
0 0 0,
]
}
coordIndex [
3,0,2,-1,
0,1,2,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 0 50,
0 0 0,
0 50 0,
0 50 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 50,
0 50 0,
50 50 0,
50 50 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
50 50 50,
50 50 0,
50 0 0,
50 0 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
50 0 50,
50 0 0,
0 0 0,
0 0 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
]
}
]
}
]
}
]
}
]
}
这是一个非常重要的解析问题。如果你能找到一些Python包来解析,那就最好了。这是我的方法:逐行扫描文件。如果我看到我理解的内容,则将剩余的行传递给该块的解析器。例如,我有解析器来解析以 "geometry IndexedFaceSet".
行开头的块
import itertools
import json
import logging
from pprint import pprint
import os
# For the next line, use logging.WARN to turn off debug print, use
# logging.DEBUG to turn on
logging.basicConfig(level=os.getenv('LOGLEVEL', logging.WARN))
logger = logging.getLogger(__name__)
def skip_pass(marker, lines):
"""
Skip until reach the line which contains the marker, then also skip
the marker line
"""
result = itertools.dropwhile(
lambda line: marker not in line, # Condition
lines) # The lines
next(result) # skip pass the marker
return result
def take(marker, lines):
"""
Take and return those lines which contains a marker
"""
result = itertools.takewhile(
lambda line: marker in line, # Condition
lines) # The lines
return result
def parse_indexed_face_set(translate, lines):
"""
Parse one block of 'geometry IndexedFaceSet'
"""
# lines = skip_pass('geometry IndexedFaceSet', lines)
# Parse the "point" structure
lines = skip_pass('point', lines)
point_lines = take(',', lines)
verts = [[float(token) for token in line.strip(',\n').split()] for line in point_lines]
logger.debug('verts: %r', verts)
# parse the coordIndex structure
lines = skip_pass('coordIndex', lines)
coor_lines = take(',', lines)
coord_index = [tuple(int(token) for token in line.strip(',\n').split(',')) for line in coor_lines]
logger.debug('coord_index: %r', coord_index)
facets = [[verts[i] for i in indices[:3]] for indices in coord_index]
logger.debug('facets: %r', facets)
return dict(vert=verts, facets=facets, translate=translate, normals=[])
def parse_translate(line):
"""
Given a line such as: "translate 5 6 7", return [5.0, 6.0, 7.0]
"""
translate = [float(x) for x in line.split()[1:4]]
return translate
def extractDataFromVRML(root):
indexed_face_sets = []
translate = []
with open(root + '.wrl') as infile:
for line in infile:
if 'geometry IndexedFaceSet' in line:
a_set = parse_indexed_face_set(translate=translate, lines=infile)
indexed_face_sets.append(a_set)
elif 'translation' in line and line.split()[0] == 'translation':
translate = parse_translate(line)
return indexed_face_sets
# main
indexed_face_sets = extractDataFromVRML('root')
for a_set in indexed_face_sets:
print('vert:', a_set['vert'])
print('facets:', a_set['facets'])
print('---')
输出
vert: [[0.0, 50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 0.0, 50.0], [0.0, 0.0, 50.0]]
facets: [[[0.0, 0.0, 50.0], [50.0, 0.0, 50.0], [0.0, 50.0, 50.0]], [[50.0, 0.0, 50.0], [50.0, 50.0, 50.0], [0.0, 50.0, 50.0]]]
---
vert: [[0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
facets: [[[0.0, 0.0, 0.0], [0.0, 50.0, 0.0], [50.0, 0.0, 0.0]], [[0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0]]]
---
vert: [[0.0, 0.0, 50.0], [0.0, 0.0, 0.0], [0.0, 50.0, 0.0], [0.0, 50.0, 50.0]]
facets: [[[0.0, 50.0, 0.0], [0.0, 0.0, 0.0], [0.0, 50.0, 50.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 50.0], [0.0, 50.0, 50.0]]]
---
vert: [[0.0, 50.0, 50.0], [0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 50.0, 50.0]]
facets: [[[50.0, 50.0, 0.0], [0.0, 50.0, 0.0], [50.0, 50.0, 50.0]], [[0.0, 50.0, 0.0], [0.0, 50.0, 50.0], [50.0, 50.0, 50.0]]]
---
vert: [[50.0, 50.0, 50.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0], [50.0, 0.0, 50.0]]
facets: [[[50.0, 0.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 50.0]], [[50.0, 50.0, 0.0], [50.0, 50.0, 50.0], [50.0, 0.0, 50.0]]]
---
vert: [[50.0, 0.0, 50.0], [50.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 50.0]]
facets: [[[0.0, 0.0, 0.0], [50.0, 0.0, 0.0], [0.0, 0.0, 50.0]], [[50.0, 0.0, 0.0], [50.0, 0.0, 50.0], [0.0, 0.0, 50.0]]]
---
备注
- 在开发过程中,我经常需要打印一些值以供调试之用。为此,我使用了从
logging
库 创建的记录器对象
- 我使用
itertools.takewhile()
和 itertools.dropwhile()
函数来简化一些任务
我正在尝试解析 vrml 文件中的一些文本块。具体来说,我对 IndexedFaceSets 中包含的信息感兴趣,它表示一系列由其坐标点(场点)和点之间的连通性(场 coordIndex)定义的三角形。
我已经成功地创建了一个函数来解析单个 indexedFaceSet(第一个)文本,但是我不知道如何修改代码以便我可以存储以下列表 [translate,verts,facets,normals]
对于每个 IndexedFaceSet。
我的函数是:
def extractDataFromVRML(root):
#Lists to be populated
translate=[]
verts=[]
facets=[]
normals=[]
#Parsing the vrml file
f=open(root+'.wrl')
while 1:# skip through initial stuff
linney=str.split(f.readline())
# print linney
if len(linney) == 4:
if linney[0] == 'translation': #Not shown in th vrml example
transx = float(linney[1])
transy = float(linney[2])
transz = float(linney[3])
t=[transx,transy,transz]
translate.append(t)
if linney == ['point', '[']:
break
#print 'Reading vertex coordinates.'
while 1:
xyz = f.readline()
i = str.find(xyz,',')
if i < 0: # end of vertex coordinates
break
verts.append(xyz[:i]) # building a list of xyz strings
print ('We have',len(verts)-1,'vertices.')
#for v in verts: print v
print ('Reading triangles.')
while 1:
linney=f.readline()
if linney == '}\n':
break # end of file
abc = str.split(str.replace(linney,',',' ')) # l of vertex ids
if len(abc) < 3:
continue # separation between groups of triangles
# look up a vertex in the list to generate a triangle tuple
xyz = str.split(verts[int(abc[0])])
p1=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
xyz = str.split(verts[int(abc[1])])
p2=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
xyz = str.split(verts[int(abc[2])])
p3=[float(xyz[0]),float(xyz[1]),float(xyz[2])]
tri=[p1,p2,p3]
facets.append(tri)
#n=getNormals(p1[0],p1[1],p1[2],p2[0],p2[1],p2[2],p3[0],p3[1],p3[2])
#normals.append(n)
print ('END')
return [translate,verts,facets,normals]
vrml中的立方体示例如下:
#VRML V2.0 utf8
WorldInfo {
info [ "File created using CATIA" ]
}
NavigationInfo {
type [ "EXAMINE" , "WALK" , "FLY" ]
}
Background {
skyColor [ 1 1 1 ]
}
Viewpoint {
position 0.411502 0.183945 0.216403
orientation 0.326678 0.502925 0.800218 2.185925
fieldOfView 0.471225
description "Main Viewpoint"
}
Viewpoint {
position 0.288675 0.288675 0.288675
orientation 0.187053 0.451587 0.872399 2.448076
fieldOfView 0.471225
description "Iso View"
}
Viewpoint {
position 0.500000 0.000000 0.000000
orientation 0.577350 0.577350 0.577350 2.094395
fieldOfView 0.471225
description "Front View"
}
Viewpoint {
position -0.500000 0.000000 0.000000
orientation 0.577350 -0.577350 -0.577350 2.094395
fieldOfView 0.471225
description "Back View"
}
Viewpoint {
position 0.000000 -0.500000 0.000000
orientation 1.000000 -0.000173 0.000173 1.570796
fieldOfView 0.471225
description "Left View"
}
Viewpoint {
position 0.000000 0.500000 0.000000
orientation -0.000122 -0.707107 -0.707107 3.141348
fieldOfView 0.471225
description "Right View"
}
Viewpoint {
position 0.000000 0.000000 0.500000
orientation 0.000000 0.000000 1.000000 1.570796
fieldOfView 0.471225
description "Top View"
}
Viewpoint {
position 0.000000 0.000000 -0.500000
orientation 0.707107 0.707107 0.000000 3.141593
fieldOfView 0.471225
description "Bottom View"
}
Transform {
scale 0.001 0.001 0.001
children [
Group {
children [
Group {
children [
DEF _000000002346A9A0 Group {
children [
]
}
]
}
Group {
children [
DEF _0000000023470B20 Group {
children [
]
}
]
}
Group {
children [
DEF _00000000234700D0 Group {
children [
]
}
]
}
Group {
children [
Group {
children [
DEF _0000000020D0F390 Group {
children [
Shape {
appearance Appearance {
material DEF _material0 Material {
diffuseColor 0.823529 0.823529 1
}
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 50,
50 50 50,
50 0 50,
0 0 50,
]
}
coordIndex [
3,2,0,-1,
2,1,0,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 0,
50 50 0,
50 0 0,
0 0 0,
]
}
coordIndex [
3,0,2,-1,
0,1,2,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 0 50,
0 0 0,
0 50 0,
0 50 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
0 50 50,
0 50 0,
50 50 0,
50 50 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
50 50 50,
50 50 0,
50 0 0,
50 0 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
Shape {
appearance Appearance {
material USE _material0
}
geometry IndexedFaceSet {
solid FALSE
coord Coordinate {
point [
50 0 50,
50 0 0,
0 0 0,
0 0 50,
]
}
coordIndex [
2,1,3,-1,
1,0,3,-1,
]
}
}
]
}
]
}
]
}
]
}
]
}
这是一个非常重要的解析问题。如果你能找到一些Python包来解析,那就最好了。这是我的方法:逐行扫描文件。如果我看到我理解的内容,则将剩余的行传递给该块的解析器。例如,我有解析器来解析以 "geometry IndexedFaceSet".
行开头的块import itertools
import json
import logging
from pprint import pprint
import os
# For the next line, use logging.WARN to turn off debug print, use
# logging.DEBUG to turn on
logging.basicConfig(level=os.getenv('LOGLEVEL', logging.WARN))
logger = logging.getLogger(__name__)
def skip_pass(marker, lines):
"""
Skip until reach the line which contains the marker, then also skip
the marker line
"""
result = itertools.dropwhile(
lambda line: marker not in line, # Condition
lines) # The lines
next(result) # skip pass the marker
return result
def take(marker, lines):
"""
Take and return those lines which contains a marker
"""
result = itertools.takewhile(
lambda line: marker in line, # Condition
lines) # The lines
return result
def parse_indexed_face_set(translate, lines):
"""
Parse one block of 'geometry IndexedFaceSet'
"""
# lines = skip_pass('geometry IndexedFaceSet', lines)
# Parse the "point" structure
lines = skip_pass('point', lines)
point_lines = take(',', lines)
verts = [[float(token) for token in line.strip(',\n').split()] for line in point_lines]
logger.debug('verts: %r', verts)
# parse the coordIndex structure
lines = skip_pass('coordIndex', lines)
coor_lines = take(',', lines)
coord_index = [tuple(int(token) for token in line.strip(',\n').split(',')) for line in coor_lines]
logger.debug('coord_index: %r', coord_index)
facets = [[verts[i] for i in indices[:3]] for indices in coord_index]
logger.debug('facets: %r', facets)
return dict(vert=verts, facets=facets, translate=translate, normals=[])
def parse_translate(line):
"""
Given a line such as: "translate 5 6 7", return [5.0, 6.0, 7.0]
"""
translate = [float(x) for x in line.split()[1:4]]
return translate
def extractDataFromVRML(root):
indexed_face_sets = []
translate = []
with open(root + '.wrl') as infile:
for line in infile:
if 'geometry IndexedFaceSet' in line:
a_set = parse_indexed_face_set(translate=translate, lines=infile)
indexed_face_sets.append(a_set)
elif 'translation' in line and line.split()[0] == 'translation':
translate = parse_translate(line)
return indexed_face_sets
# main
indexed_face_sets = extractDataFromVRML('root')
for a_set in indexed_face_sets:
print('vert:', a_set['vert'])
print('facets:', a_set['facets'])
print('---')
输出
vert: [[0.0, 50.0, 50.0], [50.0, 50.0, 50.0], [50.0, 0.0, 50.0], [0.0, 0.0, 50.0]]
facets: [[[0.0, 0.0, 50.0], [50.0, 0.0, 50.0], [0.0, 50.0, 50.0]], [[50.0, 0.0, 50.0], [50.0, 50.0, 50.0], [0.0, 50.0, 50.0]]]
---
vert: [[0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
facets: [[[0.0, 0.0, 0.0], [0.0, 50.0, 0.0], [50.0, 0.0, 0.0]], [[0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0]]]
---
vert: [[0.0, 0.0, 50.0], [0.0, 0.0, 0.0], [0.0, 50.0, 0.0], [0.0, 50.0, 50.0]]
facets: [[[0.0, 50.0, 0.0], [0.0, 0.0, 0.0], [0.0, 50.0, 50.0]], [[0.0, 0.0, 0.0], [0.0, 0.0, 50.0], [0.0, 50.0, 50.0]]]
---
vert: [[0.0, 50.0, 50.0], [0.0, 50.0, 0.0], [50.0, 50.0, 0.0], [50.0, 50.0, 50.0]]
facets: [[[50.0, 50.0, 0.0], [0.0, 50.0, 0.0], [50.0, 50.0, 50.0]], [[0.0, 50.0, 0.0], [0.0, 50.0, 50.0], [50.0, 50.0, 50.0]]]
---
vert: [[50.0, 50.0, 50.0], [50.0, 50.0, 0.0], [50.0, 0.0, 0.0], [50.0, 0.0, 50.0]]
facets: [[[50.0, 0.0, 0.0], [50.0, 50.0, 0.0], [50.0, 0.0, 50.0]], [[50.0, 50.0, 0.0], [50.0, 50.0, 50.0], [50.0, 0.0, 50.0]]]
---
vert: [[50.0, 0.0, 50.0], [50.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 50.0]]
facets: [[[0.0, 0.0, 0.0], [50.0, 0.0, 0.0], [0.0, 0.0, 50.0]], [[50.0, 0.0, 0.0], [50.0, 0.0, 50.0], [0.0, 0.0, 50.0]]]
---
备注
- 在开发过程中,我经常需要打印一些值以供调试之用。为此,我使用了从
logging
库 创建的记录器对象
- 我使用
itertools.takewhile()
和itertools.dropwhile()
函数来简化一些任务