在 Python 中将多线程转换为多处理

Convert multithreading to multiprocessing in Python

如何将这个东西从多线程转换为多处理?使用多线程,它实际上运行得更慢,而使用的 CPU 并不多。所以我希望多处理可能有所帮助。

  def multiprocess(sentences):

     responselist = []

     #called by each thread
     def processfunction(asentence,i):
         pro_sentence = processthesentence(asentence[0],asentence[1],asentence[2],asentence[3],asentence[4],asentence[5],asentence[6],asentence[7],asentence[8])
         mytyple = asentence,pro_sentence
         responselist.append(mytyple)

     # ----- function end --------- #

     #start threading1
     threadlist = []
     for i in range (2):
         asentence = sentences[i]
         t = Thread(target=processfunction, args=(asentence,i,))
         threadlist.append(t)
         t.start()

     for thr in threadlist:
         thr.join()

     return responselist

我试过了(用进程替换一个词 - 线程,但这不起作用):

  from multiprocessing import Process 

  def processthesentence(asentence):
      return asentence + " done"

  def multiprocess(sentences):

     responselist = []

     #called by each thread
     def processfunction(asentence,i):
         pro_sentence = processthesentence(asentence)
         mytyple = asentence,pro_sentence
         responselist.append(mytyple)

     # ----- function end --------- #

     #start threading1
     threadlist = []
     for i in range (2):
         asentence = sentences[i]
         t = Process(target=processfunction, args=(asentence,i,))
         threadlist.append(t)
         t.start()

     for thr in threadlist:
         thr.join()

     return responselist


  sentences = []
  sentences.append("I like apples.")
  sentences.append("Green apples are bad.")
  multiprocess(sentences) 

尝试使用 greenevent 但出现了一些错误:

import greenlet
import gevent

def dotheprocess(sentences):

    responselist = []

    #called by each thread
    def task(asentence):
        thesentence = processsentence(asentence[0],asentence[1],asentence[2],asentence[3],asentence[4],asentence[5],asentence[6],asentence[7],asentence[8])

        mytyple = asentence,thesentence
        responselist.append(mytyple)

    # ----- function end --------- #

    def asynchronous():
        threads = [gevent.spawn(task, asentence) for asentence in sentences]
        gevent.joinall(threads)   

    asynchronous()

    return responselist

尝试使用 gevent 生成多个 greenlets,这样您就可以使用其他的 CPU。这是根据您的示例。看到一个 Queue 被用来在 gevent

的上下文切换中正常工作
import greenlet
import gevent
from gevent import monkey
monkey.patch_all()

def dotheprocess(sentences):
    queue = gevent.queue.Queue()
    #called by each thread

    def task(asentence):
        thesentence = processsentence(asentence[0],asentence[1],asentence[2],asentence[3],asentence[4],asentence[5],asentence[6],asentence[7],asentence[8])
        queue.put((asentence,thesentence))

    threads = [gevent.spawn(task, asentence) for asentence in sentences]
    gevent.joinall(threads)   

    return queue
#call the dotheprocess function with your sentences

除非线程中有一些等待函数(I/O 实现),否则线程不会使函数更快。 Multiprocessing 在理论上会有帮助,但由于开销,简单的函数不会从中获益太多,因此请谨慎使用。使用 Manager 作为共享变量。

from multiprocessing import Process, Manager, freeze_support

class multiProcess():
    def __init__(self, sentences):
        self.responseList = Manager().list()
        self.processList = []
        self.sentences = sentences

    def processSentence(self,a0,a1,a2,a3,a4,a5,a6,a7,a8):
        reversedValue = a8+a7+a6+a5+a4+a3+a2+a1+a0
        return reversedValue

    #called by each process
    def processFunction(self,asentence):
        pro_sentence = self.processSentence(asentence[0],asentence[1],asentence[2],asentence[3],asentence[4],asentence[5],asentence[6],asentence[7],asentence[8])
        mytuple = (asentence,pro_sentence)
        self.responseList.append(mytuple)
        return

    def run(self):
        for i in range(2):
            asentence = self.sentences[i]
            p = Process(target=self.processFunction, args=(asentence,))
            self.processList.append(p)
            p.start()

        for pro in self.processList:
            pro.join()

        return self.responseList


if __name__=="__main__":
    freeze_support()
    sentences = ['interesting','wonderful']
    output = multiProcess(sentences).run()
    print(output)

这对我来说效果最好 - 比不使用它快 50% 左右:

def processthesentence(asentence):
     return asentence

import multiprocessing as mympro 
if __name__=="__main__":
    sentences = ['I like something','Great cool']
    numberofprocesses = 3
    thepool = mympro.Pool(processes=numberofprocesses)
    results = [thepool.apply_async(processthesentence, args=(asent,)) for asent in sentences]
    output = [item.get() for item in results]