无法腌制 <class 'abc.class_name'>:abc 上的属性查找 class_name 失败

Can't pickle <class 'abc.class_name'>: attribute lookup class_name on abc failed

我在尝试根据字典中定义的依赖关系("cmdList). For instance, "BDX010" 是 "BDX020" 的依赖项)创建依赖项(子任务)时遇到上述错误。我是使用 Python 3.7.

请查看底部的堆栈跟踪以了解确切的错误消息。

import luigi
from helpers import SQLTask
import helpers
import logging 
import time

acctDate = '201904'
ssisDate = '201905'
runDesc0xx = 'prod period 4 test2'  
runDesc9xx = 'test2'  

YY = acctDate[:4]
MM = acctDate[4:6]


bdx_sql = 'r:\1.SQL\BDX_SQL\'
cmdList = {
        'BDX010': (f'"{bdx_sql}BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 ',''),
        'BDX020': (f'"{bdx_sql}BDX_001_NI_DM 020.sql"  ','BDX010'),
        'BDX022a': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 010.sql"  -S LWVPDBSQLC070 ','BDX020'),
        'BDX022b': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 020.sql"  -S LWVPDBSQLC070  -v Year1={YY} MM={MM}','BDX022a'),
        'BDX022c': (f'"{bdx_sql}BDX_022_P038_All_Final_CatAdj 030.sql"  -v Year={YY} Month={MM}', 'BDX022b'),
    }

class BDX_Task(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter(default=None)
    queryKey = luigi.Parameter()
    queryCmd = luigi.Parameter()
    runDesc = luigi.Parameter()
    dependQry = luigi.Parameter()


    def __init__(self, *args, **kwargs):
        super(BDX_Task, self).__init__(*args, **kwargs)
        self.trans_id = f"00903_BDX_Query_{self.queryKey}__{self.runDesc}"

    def requires(self):
        cmdListComb = dict(cmdList)

        if self.dependQry != '' and self.dependQry in cmdListComb:
            dep_cmd, dep_dep_key = cmdListComb[self.dependQry]

            klass = globals()[self.dependQry]
            return [klass(         
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = self.dependQry,
                queryCmd = dep_cmd,
                runDesc = self.runDesc,
                dependQry = dep_dep_key
            )]
        else:
            return []

    def run(self):

        strQuery_and_args = f""" -i {self.queryCmd} """
        time.sleep(5)
        print(strQuery_and_args)
        self.get_target().touch()


class BDX_Query_0XX(SQLTask):
    acctDate = luigi.Parameter()
    ssisDate = luigi.Parameter()  
    runDesc = luigi.Parameter()


    def __init__(self, *args, **kwargs):
        super(BDX_Query_0XX, self).__init__(*args, **kwargs)

        self.trans_id =  "00902_BDX_Query_0XX" + "__" + self.runDesc  # static.


    def requires(self):
        for queryKey, (queryCmd, dependQry) in cmdList.items():
            klass = type(queryKey, (BDX_Task,),{})
            globals()[queryKey] = klass
            yield klass(
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = queryKey,
                queryCmd = queryCmd,
                runDesc = self.runDesc,  
                dependQry = dependQry
            )

    def run(self):
        self.get_target().touch()



class BDX_Query_Main(SQLTask):
    acctDate = luigi.Parameter(default=acctDate)
    ssisDate = luigi.Parameter(default=ssisDate)  # one month lag/later than acctDate
    # runDesc = globals().runDesc

    trans_id = "09000_Metaclass test" + "__" + runDesc9xx  # static.

    def requires(self):
        YY = self.acctDate[:4]
        MM = self.acctDate[4:6]
        acctDate = self.acctDate
        ssisDate = self.ssisDate

        return [BDX_Query_0XX( acctDate=self.acctDate, ssisDate = self.ssisDate, runDesc = runDesc0xx )
               ]

    def run(self):
        self.get_target().touch()


if __name__ == '__main__':
    luigi.run()

堆栈跟踪:

DEBUG: Checking if BDX_Query_Main(acctDate=201904, ssisDate=201905) is complete
DEBUG: Checking if BDX_Query_0XX(acctDate=201904, ssisDate=201905, runDesc=prod period 4 test2) is complete
INFO: Informed scheduler that task   BDX_Query_Main_201904_201905_444c47aebc   has status   PENDING
DEBUG: BDX_Task.__init__ called for queryKey ="BDX010"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX020"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022a"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022b"
DEBUG: BDX_Task.__init__ called for queryKey ="BDX022c"
DEBUG: Checking if BDX010(acctDate=201904, ssisDate=201905, queryKey=BDX010, queryCmd="r:.SQL\BDX_SQL\BDX_001_NI_DM 010.sql" -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=) is complete
DEBUG: Checking if BDX020(acctDate=201904, ssisDate=201905, queryKey=BDX020, queryCmd="r:.SQL\BDX_SQL\BDX_001_NI_DM 020.sql"  , runDesc=prod period 4 test2, dependQry=BDX010) is complete
DEBUG: Checking if BDX022a(acctDate=201904, ssisDate=201905, queryKey=BDX022a, queryCmd="r:.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 010.sql"  -S LWVPDBSQLC070 , runDesc=prod period 4 test2, dependQry=BDX020) is complete
DEBUG: Checking if BDX022b(acctDate=201904, ssisDate=201905, queryKey=BDX022b, queryCmd="r:.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 020.sql"  -S LWVPDBSQLC070  -v Year1=2019 MM=04, runDesc=prod period 4 test2, dependQry=BDX022a) is complete
DEBUG: Checking if BDX022c(acctDate=201904, ssisDate=201905, queryKey=BDX022c, queryCmd="r:.SQL\BDX_SQL\BDX_022_P038_All_Final_CatAdj 030.sql"  -v Year=2019 Month=04, runDesc=prod period 4 test2, dependQry=BDX022b) is complete
INFO: Informed scheduler that task   BDX_Query_0XX_201904_prod_period_4_te_201905_73ccfa7be3   has status   PENDING
INFO: Informed scheduler that task   BDX022c_201904_BDX022b__r__1_SQL_BDX_SQ_5c6660ab25   has status   PENDING
INFO: Informed scheduler that task   BDX022b_201904_BDX022a__r__1_SQL_BDX_SQ_c0677e7954   has status   PENDING
INFO: Informed scheduler that task   BDX022a_201904_BDX020__r__1_SQL_BDX_SQ_784cf5b40a   has status   PENDING
INFO: Informed scheduler that task   BDX020_201904_BDX010__r__1_SQL_BDX_SQ_d37e4e46a2   has status   PENDING
INFO: Informed scheduler that task   BDX010_201904___r__1_SQL_BDX_SQ_9d353a8cd3   has status   PENDING
INFO: Done scheduling tasks
INFO: Running Worker with 5 processes
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 7
INFO: Worker Worker(salt=751624561, workers=5, host=LWVPWEACT001, username=i805649, pid=4108) was stopped. Shutting down Keep-Alive thread
Traceback (most recent call last):
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1664, in <module>
    main()
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1658, in main
    globals = debugger.run(setup['file'], None, None, is_module)
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\pydevd.py", line 1068, in run
    pydev_imports.execfile(file, globals, locals)  # execute the script
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "R:/1.PY/DataPipeLine/run_test.py", line 178, in <module>
    luigi.run()
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 192, in run
    return _run(*args, **kwargs)['success']
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 209, in _run
    return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\interface.py", line 172, in _schedule_and_run
    success &= worker.run()
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 1184, in run
    self._run_task(get_work_response.task_id)
  File "C:\ProgramData\Anaconda3\lib\site-packages\luigi\worker.py", line 996, in _run_task
    task_process.start()
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 112, in start
    self._popen = self._Popen(self)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
    return Popen(process_obj)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
    reduction.dump(process_obj, to_child)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
**_pickle.PicklingError: Can't pickle <class 'abc.BDX010'>: attribute lookup BDX010 on abc failed**

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spawn_main
    exitcode = _main(fd)
  File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 115, in _main
    self = reduction.pickle.load(from_parent)
EOFError: Ran out of input

当使用 ABC 的元 class 动态创建 classes 时,模块变为 abc,并且当 worker 试图找到任务时,它会转到抽象基 class 模块并尝试在那里找到它,但它当然不存在。

要解决此问题,请通过手动重置 __module__ 变量确保 luigi 知道在哪里可以找到构建 class 的代码。

将行更改为:

klass = type(queryKey, (BDX_Task,),{'__module__':__name__})

据我所知,这只是 Windows 上的一个问题。

编辑:对不起,我真蠢。如果新进程只导入模块,还必须确保所有自定义 classes 都被重新创建和添加。

# Run this first outside any other logic so it gets run if someone imports the module:
for queryKey in cmdList.keys():
    globals()[queryKey] = type(queryKey,(BDX_Task,){'__module__':__name__})

#Then you requires function can look like:
class BDX_Query_0XX(SQLTask):

    # ...

    def requires(self):
        for queryKey, (queryCmd, dependQry) in cmdList.items():
            yield globals()[queryKey](
                acctDate = self.acctDate,
                ssisDate = self.ssisDate,
                queryKey = queryKey,
                queryCmd = queryCmd,
                runDesc = self.runDesc,  
                dependQry = dependQry
            )