全局变量减慢 PyParsing
Global variables slowing down PyParsing
使用 Pyparsing 解析文件时,解析 Ex1 大约需要一分钟,解析 Ex2 大约需要 15 秒。唯一的区别是,在 Ex1 中,PyParsing 使用的变量在 class 之前声明为全局变量。在 Ex2 中,变量在每个方法中单独声明(例如 def parse_components(self)
)。这是预期的吗?如果是,解释是什么?
Ex1(解析时间:~60s):
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
# GLOBALS for PyParsing
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
Ex2(解析时间:~15s):
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
可能的罪魁祸首似乎正在这里盯着你看:
... in Ex1 the variables used by PyParsing are declared as globals before the class.
from multiprocessing import (Process, Manager, Event)
Multiprocessing 可能正在以一种有趣的方式重新加载或与之交互。 DEF.sections_grp
在您的计时测试中是否总是一个列表(例如 1 个进程)?
使用 Pyparsing 解析文件时,解析 Ex1 大约需要一分钟,解析 Ex2 大约需要 15 秒。唯一的区别是,在 Ex1 中,PyParsing 使用的变量在 class 之前声明为全局变量。在 Ex2 中,变量在每个方法中单独声明(例如 def parse_components(self)
)。这是预期的吗?如果是,解释是什么?
Ex1(解析时间:~60s):
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
# GLOBALS for PyParsing
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
Ex2(解析时间:~15s):
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
可能的罪魁祸首似乎正在这里盯着你看:
... in Ex1 the variables used by PyParsing are declared as globals before the class.
from multiprocessing import (Process, Manager, Event)
Multiprocessing 可能正在以一种有趣的方式重新加载或与之交互。 DEF.sections_grp
在您的计时测试中是否总是一个列表(例如 1 个进程)?