如何使这个循环并行且更快?
How to make this loop parallel and faster?
我有这组图像,我想从中创建一组步长为 128*128 的子图像,原始图像必须大于此大小(行和列),我已创建以下功能:
def sliding_window(image, stride, imgSize):
height, width, _ = image.shape
img = []
a1 = list(range(0, height-imgSize+stride, stride))
a2 = list(range(0, width-imgSize+stride, stride))
if (a1[-1]+imgSize != height):
a1[-1] = height-imgSize
if (a2[-1]+imgSize != width):
a2[-1] = width-imgSize
for y in a1:
for x in a2:
im1 = image[y:y+imgSize, x:x+imgSize, :]
img.append(np.array(im1))
return img
以及我称之为此定义的主要代码片段:
im_counter = 0
image_data = []
image_label = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
im = image.load_img(path)
im = image.img_to_array(im)
im_counter += 1
if(im_counter % 500 == 0):
print("{} images processed...".format(im_counter))
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
del temp_img
gc.collect()
image.append(class_dictionary[cl])
现在,上面的代码片段只用了 3000 张图像就永远 运行(仅使用 1 个 CPU 核心至少需要 25 小时),我想让它更快,我有服务器访问,CPU 有很多核心,所以你能建议它的并行版本以便它 运行 更快吗?
注意:从原始图像返回的子图像的顺序非常重要,不允许任意顺序的图像。
这是您可以尝试的粗略概述。
def main():
# Create a list of tuples consisting of the file path, and the class
# dictionary info for each of the cl arguments
args = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
args.append((path, class_dictionary[cl]))
with multiprocessing.Pool(processes=30) as pool: # or however many processes
image_counter = 0
# Use multiprocessing to call handle_on_image(pathname, info)
# and return the results in order
for images, info in pool.starmap(handle_one_image, args):
# Images is a list of returned images. info is the class_dictionary info that we passed
for image in images:
image_counter += 1
image_data.append(image)
image_label.append(info)
def handle_one_image(path, info):
image_data = []
im = image.load_img(path)
im = image.img_to_array(im)
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
return image_data, info
else:
# indicate that no images are available
return [], info
我有这组图像,我想从中创建一组步长为 128*128 的子图像,原始图像必须大于此大小(行和列),我已创建以下功能:
def sliding_window(image, stride, imgSize):
height, width, _ = image.shape
img = []
a1 = list(range(0, height-imgSize+stride, stride))
a2 = list(range(0, width-imgSize+stride, stride))
if (a1[-1]+imgSize != height):
a1[-1] = height-imgSize
if (a2[-1]+imgSize != width):
a2[-1] = width-imgSize
for y in a1:
for x in a2:
im1 = image[y:y+imgSize, x:x+imgSize, :]
img.append(np.array(im1))
return img
以及我称之为此定义的主要代码片段:
im_counter = 0
image_data = []
image_label = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
im = image.load_img(path)
im = image.img_to_array(im)
im_counter += 1
if(im_counter % 500 == 0):
print("{} images processed...".format(im_counter))
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
del temp_img
gc.collect()
image.append(class_dictionary[cl])
现在,上面的代码片段只用了 3000 张图像就永远 运行(仅使用 1 个 CPU 核心至少需要 25 小时),我想让它更快,我有服务器访问,CPU 有很多核心,所以你能建议它的并行版本以便它 运行 更快吗?
注意:从原始图像返回的子图像的顺序非常重要,不允许任意顺序的图像。
这是您可以尝试的粗略概述。
def main():
# Create a list of tuples consisting of the file path, and the class
# dictionary info for each of the cl arguments
args = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
args.append((path, class_dictionary[cl]))
with multiprocessing.Pool(processes=30) as pool: # or however many processes
image_counter = 0
# Use multiprocessing to call handle_on_image(pathname, info)
# and return the results in order
for images, info in pool.starmap(handle_one_image, args):
# Images is a list of returned images. info is the class_dictionary info that we passed
for image in images:
image_counter += 1
image_data.append(image)
image_label.append(info)
def handle_one_image(path, info):
image_data = []
im = image.load_img(path)
im = image.img_to_array(im)
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
return image_data, info
else:
# indicate that no images are available
return [], info