我可以假设我的线程在 threading.active_count() returns 1 时完成吗?
Can I assume my threads are done when threading.active_count() returns 1?
给出以下 class:
from abc import ABCMeta, abstractmethod
from time import sleep
import threading
from threading import active_count, Thread
class ScraperPool(metaclass=ABCMeta):
Queue = []
ResultList = []
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
Thread(target=self.worker, args=(w + 1, PrintIDs,)).start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done.
while active_count() > 1:
print("Active threads: " + str(active_count()))
sleep(5)
self.HandleResults()
def worker(self, id, printID):
if printID:
print("Starting worker " + str(id) + ".")
while (len(self.Queue) > 0):
self.scraperMethod()
if printID:
print("Worker " + str(id) + " is quiting.")
# Todo Kill is this Thread.
return
def NumWorkers(self):
return 1 # Simplified for testing purposes.
@abstractmethod
def scraperMethod(self):
pass
class TestScraper(ScraperPool):
def scraperMethod(self):
# print("I am scraping.")
# print("Scraping. Threads#: " + str(active_count()))
temp_item = self.Queue[-1]
self.Queue.pop()
self.ResultList.append(temp_item)
def HandleResults(self):
print(self.ResultList)
ScraperPool.register(TestScraper)
scraper = TestScraper(Queue=["Jaap", "Piet"])
scraper.run()
print(threading.active_count())
# print(scraper.ResultList)
当所有线程都完成后,仍然有一个活动线程 - 最后一行的 threading.active_count()
得到了那个数字。
活动线程是 <_MainThread(MainThread, started 12960)>
- 如 threading.enumerate()
所示。
我可以假设我的所有线程都在 active_count() == 1
时完成了吗?
或者,例如,导入的模块可以启动额外的线程,以便我的线程在 active_count() > 1
时实际完成 - 这也是我在 运行 方法中使用的循环条件。
根据 docs active_count()
包括主线程,所以如果你在 1 那么你很可能已经完成了,但是如果你的程序中有另一个新线程源那么你可能会在 active_count()
命中 1 之前完成。
我建议在您的 ScraperPool
上实施明确的 join
方法并跟踪您的工作人员并在需要时明确地将他们加入主线程,而不是检查您是否已完成 [=10] =]来电。
还记得 GIL...
你可以假设你的线程在 active_count()
达到 1 时完成。问题是,如果任何其他模块创建线程,你永远不会达到 1。你应该明确地管理你的线程。
示例:您可以将线程放在一个列表中,然后一次加入一个。您的代码的相关更改是:
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
self.WorkerThreads = []
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
thread = Thread(target=self.worker, args=(w + 1, PrintIDs,))
self.WorkerThreads.append(thread)
thread.start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done. Waiting in order
# so some threads further in the list may finish first, but we
# will get to all of them eventually
while self.WorkerThreads:
self.WorkerThreads[0].join()
self.HandleResults()
给出以下 class:
from abc import ABCMeta, abstractmethod
from time import sleep
import threading
from threading import active_count, Thread
class ScraperPool(metaclass=ABCMeta):
Queue = []
ResultList = []
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
Thread(target=self.worker, args=(w + 1, PrintIDs,)).start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done.
while active_count() > 1:
print("Active threads: " + str(active_count()))
sleep(5)
self.HandleResults()
def worker(self, id, printID):
if printID:
print("Starting worker " + str(id) + ".")
while (len(self.Queue) > 0):
self.scraperMethod()
if printID:
print("Worker " + str(id) + " is quiting.")
# Todo Kill is this Thread.
return
def NumWorkers(self):
return 1 # Simplified for testing purposes.
@abstractmethod
def scraperMethod(self):
pass
class TestScraper(ScraperPool):
def scraperMethod(self):
# print("I am scraping.")
# print("Scraping. Threads#: " + str(active_count()))
temp_item = self.Queue[-1]
self.Queue.pop()
self.ResultList.append(temp_item)
def HandleResults(self):
print(self.ResultList)
ScraperPool.register(TestScraper)
scraper = TestScraper(Queue=["Jaap", "Piet"])
scraper.run()
print(threading.active_count())
# print(scraper.ResultList)
当所有线程都完成后,仍然有一个活动线程 - 最后一行的 threading.active_count()
得到了那个数字。
活动线程是 <_MainThread(MainThread, started 12960)>
- 如 threading.enumerate()
所示。
我可以假设我的所有线程都在 active_count() == 1
时完成了吗?
或者,例如,导入的模块可以启动额外的线程,以便我的线程在 active_count() > 1
时实际完成 - 这也是我在 运行 方法中使用的循环条件。
根据 docs active_count()
包括主线程,所以如果你在 1 那么你很可能已经完成了,但是如果你的程序中有另一个新线程源那么你可能会在 active_count()
命中 1 之前完成。
我建议在您的 ScraperPool
上实施明确的 join
方法并跟踪您的工作人员并在需要时明确地将他们加入主线程,而不是检查您是否已完成 [=10] =]来电。
还记得 GIL...
你可以假设你的线程在 active_count()
达到 1 时完成。问题是,如果任何其他模块创建线程,你永远不会达到 1。你应该明确地管理你的线程。
示例:您可以将线程放在一个列表中,然后一次加入一个。您的代码的相关更改是:
def __init__(self, Queue, MaxNumWorkers=0, ItemsPerWorker=50):
# Initialize attributes
self.MaxNumWorkers = MaxNumWorkers
self.ItemsPerWorker = ItemsPerWorker
self.Queue = Queue # For testing purposes.
self.WorkerThreads = []
def initWorkerPool(self, PrintIDs=True):
for w in range(self.NumWorkers()):
thread = Thread(target=self.worker, args=(w + 1, PrintIDs,))
self.WorkerThreads.append(thread)
thread.start()
sleep(1) # Explicitly wait one second for this worker to start.
def run(self):
self.initWorkerPool()
# Wait until all workers (i.e. threads) are done. Waiting in order
# so some threads further in the list may finish first, but we
# will get to all of them eventually
while self.WorkerThreads:
self.WorkerThreads[0].join()
self.HandleResults()