为什么线程不能使在 numpy 数组上迭代更快?
Why does not threading make it faster to iterate on a numpy array?
我的问题是关于 python 中的多线程。我正在处理的问题是从 1000 万个数组中找到与给定数组具有相同长度的 80% 相似数组(长度为 64)。问题是,虽然当我在 while 循环内线性迭代时我的代码执行时间为 12.148 秒,但当我使用多线程时它不会在至少 28-30 秒内执行。两种实现都在下面。任何建议表示赞赏并请启发我,为什么在这种情况下多线程会变慢?
第一个代码:
import timeit
import numpy as np
ph = np.load('newDataPhoto.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
start = timeit.default_timer()
kk=0
i=0
while i< 10000000:
u = np.count_nonzero(ph[i] != myPhoto1)
if u <= 14:
kk+=1
i+=1
print(kk)
stop = timeit.default_timer()
print stop-start
第二个(多线程):
from threading import Thread
import numpy as np
import timeit
start = timeit.default_timer()
ph = np.load('newDataPhoto.npy')
pc = np.load('newDataPopCount.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
def hamming_dist(left, right, name):
global kk
start = timeit.default_timer()
while left<=right:
if(np.count_nonzero(ph[left] != myPhoto1)<=14):
kk+=1
left+=1
stop=timeit.default_timer()
print name
print stop-start
def Main():
global kk
kk=0
t1 = Thread(target=hamming_dist, args=(0,2500000, 't1'))
t2 = Thread(target=hamming_dist, args=(2500001, 5000000, 't2'))
t3 = Thread(target=hamming_dist, args=(5000001, 7500000,'t3'))
t4 = Thread(target=hamming_dist, args=(7500001, 9999999, 't4'))
t1.start()
t2.start()
t3.start()
t4.start()
print ('main done')
if __name__ == "__main__":
Main()
它们的输出顺序是:
38
12.148679018
#####
main done
t4
26.4695241451
t2
27.4959039688
t3
27.5113890171
t1
27.5896160603
我解决了这个问题。我发现线程被 GIL 阻塞了,它永远不允许使用比当前处理器更多的线程。但是使用多处理模块工作。这是我所做的修改:
import numpy as np
import multiprocessing
import timeit
start = timeit.default_timer()
ph = np.load('newDataPhoto.npy')
pc = np.load('newDataPopCount.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
def hamming_dist(left, right, name):
global kk
start = timeit.default_timer()
while left<=right:
if(np.count_nonzero(ph[left] != myPhoto1)<=14):
kk+=1
left+=1
stop=timeit.default_timer()
print name
print stop-start
def Main():
global kk
kk=0
t1 = multiprocessing.Process(target=hamming_dist, args=(0,2500000, 't1'))
t2 = multiprocessing.Process(target=hamming_dist, args=(2500001, 5000000, 't2'))
t3 = multiprocessing.Process(target=hamming_dist, args=(5000001, 7500000,'t3'))
t4 = multiprocessing.Process(target=hamming_dist, args=(7500001, 9999999, 't4'))
t1.start()
t2.start()
t3.start()
t4.start()
print ('main done')
if __name__ == "__main__":
Main()
我的问题是关于 python 中的多线程。我正在处理的问题是从 1000 万个数组中找到与给定数组具有相同长度的 80% 相似数组(长度为 64)。问题是,虽然当我在 while 循环内线性迭代时我的代码执行时间为 12.148 秒,但当我使用多线程时它不会在至少 28-30 秒内执行。两种实现都在下面。任何建议表示赞赏并请启发我,为什么在这种情况下多线程会变慢? 第一个代码:
import timeit
import numpy as np
ph = np.load('newDataPhoto.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
start = timeit.default_timer()
kk=0
i=0
while i< 10000000:
u = np.count_nonzero(ph[i] != myPhoto1)
if u <= 14:
kk+=1
i+=1
print(kk)
stop = timeit.default_timer()
print stop-start
第二个(多线程):
from threading import Thread
import numpy as np
import timeit
start = timeit.default_timer()
ph = np.load('newDataPhoto.npy')
pc = np.load('newDataPopCount.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
def hamming_dist(left, right, name):
global kk
start = timeit.default_timer()
while left<=right:
if(np.count_nonzero(ph[left] != myPhoto1)<=14):
kk+=1
left+=1
stop=timeit.default_timer()
print name
print stop-start
def Main():
global kk
kk=0
t1 = Thread(target=hamming_dist, args=(0,2500000, 't1'))
t2 = Thread(target=hamming_dist, args=(2500001, 5000000, 't2'))
t3 = Thread(target=hamming_dist, args=(5000001, 7500000,'t3'))
t4 = Thread(target=hamming_dist, args=(7500001, 9999999, 't4'))
t1.start()
t2.start()
t3.start()
t4.start()
print ('main done')
if __name__ == "__main__":
Main()
它们的输出顺序是:
38
12.148679018
#####
main done
t4
26.4695241451
t2
27.4959039688
t3
27.5113890171
t1
27.5896160603
我解决了这个问题。我发现线程被 GIL 阻塞了,它永远不允许使用比当前处理器更多的线程。但是使用多处理模块工作。这是我所做的修改:
import numpy as np
import multiprocessing
import timeit
start = timeit.default_timer()
ph = np.load('newDataPhoto.npy')
pc = np.load('newDataPopCount.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
, 0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
, 1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
def hamming_dist(left, right, name):
global kk
start = timeit.default_timer()
while left<=right:
if(np.count_nonzero(ph[left] != myPhoto1)<=14):
kk+=1
left+=1
stop=timeit.default_timer()
print name
print stop-start
def Main():
global kk
kk=0
t1 = multiprocessing.Process(target=hamming_dist, args=(0,2500000, 't1'))
t2 = multiprocessing.Process(target=hamming_dist, args=(2500001, 5000000, 't2'))
t3 = multiprocessing.Process(target=hamming_dist, args=(5000001, 7500000,'t3'))
t4 = multiprocessing.Process(target=hamming_dist, args=(7500001, 9999999, 't4'))
t1.start()
t2.start()
t3.start()
t4.start()
print ('main done')
if __name__ == "__main__":
Main()