您如何使用 Pthreads 并行化图像翻转?
How do you parallelize the flipping of image with Pthreads?
我是 Pthreads
和 c++
的新手,正在尝试并行化图像翻转程序。显然它不起作用。有人告诉我我需要从图像 class 移植一些代码,但不太确定移植的含义。我刚刚复制并粘贴了代码,但我猜这是错误的。
我明白了。分配工作负载、初始化线程、创建线程、加入线程并定义回调函数。
我不太确定 cells_per_thread
应该是什么。我很确定它应该是图像 width * height / threads
。这看起来正确吗?
我在使用 cmake 编译时遇到多个错误。
其说法m_thread_number
、getWidth
、getHeight
、getPixel
、temp
未在范围内定义。我认为那是因为图像 class 代码未移植?
PthreadImage.cxx
//Declare a callabck fucntion for Horizontal flip
void* H_flip_callback_function(void* aThreadData);
PthreadImage PthreadImage::flipHorizontally() const
{
if (m_thread_number == 0 || m_thread_number == 1)
{
return PthreadImage(Image::flipHorizontally(), m_thread_number);
}
else
{
PthreadImage temp(getWidth(), getHeight(), m_thread_number);
//Workload allocation
//Create a vector of type ThreadData whcih is constructed at the top of the class under Struct ThreadData. Pass in the number of threads.
vector<ThreadData> p_thread_data(m_thread_number);
//create an integer to hold the last element. inizialize it as -1.
int last_element = -1;
//create an unsigned int to hold how many cells we need per thread. For the image we want the width and height divided by the number of threads.
unsigned int cells_per_thread = getHeight() * getWidth() / m_thread_number;
//Next create a variable to hold the remainder of the sum.
unsigned int remainder = getHeight() * getWidth() % m_thread_number;
//print the number of cells per thread to the console
cout << "Default number for cells per thread: " << cells_per_thread << endl;
//inizialize the threads with a for loop to interate through each thread and populate it
for (int i = 0; i < m_thread_number; i++)
{
//thread ids correspond with the for loop index values.
p_thread_data[i].thread_id = i;
//start is last element + 1 i.e -1 + 1 start = 0.
p_thread_data[i].start_id = ++last_element;
p_thread_data[i].end_id = last_element + cells_per_thread - 1;
p_thread_data[i].input = this;
p_thread_data[i].output = &temp;
//if the remainder is > thats 0 add 1 to the end them remove 1 remainder.
if (remainder > 0)
{
p_thread_data[i].end_id++;
--remainder;
}
//make the last element not = -1 but = the end of the threads.
last_element = p_thread_data[i].end_id;
//print to console what number then thread start and end on
cout << "Thread[" << i << "] starts with " << p_thread_data[i].start_id << " and stops on " << p_thread_data[i].end_id << endl;
}
//create the threads with antoher for loop
for (int i = 0; i < m_thread_number; i++)
{
pthread_create(&p_thread_data[i].thread_id, NULL, H_flip_callback_function, &p_thread_data[i]);
}
//Wait for each thread to complete;
for (int i = 0; i < m_thread_number; i++)
{
pthread_join(p_thread_data[i].thread_id, NULL);
}
return temp;
}
}
回调函数
//Define the callabck fucntion for Horizontal flip
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
int tempHeight = temp(getHeight());
int tempWidth = temp(getWidth());
for (int i = p_thread_data->start_id; i <= p_thread_data->end_id; i++)
{
// Process every row of the image
for (unsigned int j = 0; j < m_height; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < m_width / 2; ++i)
{
(*(p_thread_data->output))( i, j) = getPixel(m_width - i - 1, j);
(*(p_thread_data->output))(m_width - i - 1, j) = getPixel( i, j);
}
}
}
}
图片class
#include <sstream> // Header file for stringstream
#include <fstream> // Header file for filestream
#include <algorithm> // Header file for min/max/fill
#include <numeric> // Header file for accumulate
#include <cmath> // Header file for abs and pow
#include <vector>
#include "Image.h"
//-----------------
Image::Image():
//-----------------
m_width(0),
m_height(0)
//-----------------
{}
//----------------------------------
Image::Image(const Image& anImage):
//----------------------------------
m_width(anImage.m_width),
m_height(anImage.m_height),
m_p_image(anImage.m_p_image)
//--------------------------------
图片class待移植代码
//-----------------------------------
Image Image::flipHorizontally() const
//-----------------------------------
{
// Create an image of the right size
Image temp(getWidth(), getHeight());
// Process every row of the image
for (unsigned int j = 0; j < m_height; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < tempWidth / 2; ++i)
{
temp(i, j) = getPixel(tempWidth - i - 1, j);
temp(tempWidth - i - 1, j) = getPixel(i, j);
}
}
return 0;
}
我觉得很接近。非常感谢任何帮助!
编辑
好的,所以对于那些在这上面浪费时间的人来说,这是正确的代码。
显然有一些地方不对。
我不知道为什么有3个for循环。行应该有 2 个,列应该有 1 个。
cells_per_thread
应该是 pixels_per_thread
和 rows/threads 正如@Larry B 建议不要 ALL 每个线程的像素。
您可以使用 -> 获取指针的成员,即 setPixel(),
getPixel` 等。谁知道!?
有一个数据结构对你们来说很重要,但我忘记了。
构造线程数据
{
pthread_t thread_id;
无符号整数 start_id;
无符号整数 end_id;
常量图像*输入;
图像*输出;
};
回调正确
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
int width = p_thread_data->input->getWidth();
// Process every row of the image
for (unsigned int j = p_thread_data->start_id; j <=p_thread_data->end_id; ++j)
}
// Process every column of the image
for (unsigned int i = 0; i < width / 2; ++i)
{
p_thread_data->output->setPixel(i,j, p_thread_data->input->getPixel(width - i - 1, j));
p_thread_data->output->setPixel(width - i - 1, j, p_thread_data->input->getPixel(i, j));
}
}
return 0;
}
所以现在这段代码可以编译和翻转。
谢谢!
将单线程代码移植到多线程版本的一般策略实质上是重写现有代码以将工作划分为您可以移交的自包含工作单元到线程执行。
考虑到这一点,我不同意您实施 H_flip_callback_function
:
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
// Create an image of the right size
PthreadImage temp(getWidth(), getHeight(), m_thread_number);
int tempHeight = temp(getHeight());
int tempWidth = temp(getWidth());
for (int i = p_thread_data->start_id; i <= p_thread_data->end_id; i++)
{
// Process every row of the image
for (unsigned int j = 0; j < tempHeight; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < tempWidth / 2; ++i)
{
temp(i, j) = getPixel(tempWidth - i - 1, j);
temp(tempWidth - i - 1, j) = getPixel(i, j);
}
}
}
}
从表面上看,您的所有线程似乎都将在整个图像上运行。如果是这种情况,单线程和多线程版本之间没有真正的区别,因为您只是在多线程版本中多次执行相同的工作。
我认为最小的独立工作单元是水平翻转图像的一行。但是,如果您的线程数少于行数,那么您可以为每个线程分配 (Num rows / Num threads)。然后每个线程将翻转分配给它的行,主线程将收集结果和 assemble 最终图像。
关于您的构建警告和错误,您必须提供完整的源代码、构建设置、环境等。
我是 Pthreads
和 c++
的新手,正在尝试并行化图像翻转程序。显然它不起作用。有人告诉我我需要从图像 class 移植一些代码,但不太确定移植的含义。我刚刚复制并粘贴了代码,但我猜这是错误的。
我明白了。分配工作负载、初始化线程、创建线程、加入线程并定义回调函数。
我不太确定 cells_per_thread
应该是什么。我很确定它应该是图像 width * height / threads
。这看起来正确吗?
我在使用 cmake 编译时遇到多个错误。
其说法m_thread_number
、getWidth
、getHeight
、getPixel
、temp
未在范围内定义。我认为那是因为图像 class 代码未移植?
PthreadImage.cxx
//Declare a callabck fucntion for Horizontal flip
void* H_flip_callback_function(void* aThreadData);
PthreadImage PthreadImage::flipHorizontally() const
{
if (m_thread_number == 0 || m_thread_number == 1)
{
return PthreadImage(Image::flipHorizontally(), m_thread_number);
}
else
{
PthreadImage temp(getWidth(), getHeight(), m_thread_number);
//Workload allocation
//Create a vector of type ThreadData whcih is constructed at the top of the class under Struct ThreadData. Pass in the number of threads.
vector<ThreadData> p_thread_data(m_thread_number);
//create an integer to hold the last element. inizialize it as -1.
int last_element = -1;
//create an unsigned int to hold how many cells we need per thread. For the image we want the width and height divided by the number of threads.
unsigned int cells_per_thread = getHeight() * getWidth() / m_thread_number;
//Next create a variable to hold the remainder of the sum.
unsigned int remainder = getHeight() * getWidth() % m_thread_number;
//print the number of cells per thread to the console
cout << "Default number for cells per thread: " << cells_per_thread << endl;
//inizialize the threads with a for loop to interate through each thread and populate it
for (int i = 0; i < m_thread_number; i++)
{
//thread ids correspond with the for loop index values.
p_thread_data[i].thread_id = i;
//start is last element + 1 i.e -1 + 1 start = 0.
p_thread_data[i].start_id = ++last_element;
p_thread_data[i].end_id = last_element + cells_per_thread - 1;
p_thread_data[i].input = this;
p_thread_data[i].output = &temp;
//if the remainder is > thats 0 add 1 to the end them remove 1 remainder.
if (remainder > 0)
{
p_thread_data[i].end_id++;
--remainder;
}
//make the last element not = -1 but = the end of the threads.
last_element = p_thread_data[i].end_id;
//print to console what number then thread start and end on
cout << "Thread[" << i << "] starts with " << p_thread_data[i].start_id << " and stops on " << p_thread_data[i].end_id << endl;
}
//create the threads with antoher for loop
for (int i = 0; i < m_thread_number; i++)
{
pthread_create(&p_thread_data[i].thread_id, NULL, H_flip_callback_function, &p_thread_data[i]);
}
//Wait for each thread to complete;
for (int i = 0; i < m_thread_number; i++)
{
pthread_join(p_thread_data[i].thread_id, NULL);
}
return temp;
}
}
回调函数
//Define the callabck fucntion for Horizontal flip
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
int tempHeight = temp(getHeight());
int tempWidth = temp(getWidth());
for (int i = p_thread_data->start_id; i <= p_thread_data->end_id; i++)
{
// Process every row of the image
for (unsigned int j = 0; j < m_height; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < m_width / 2; ++i)
{
(*(p_thread_data->output))( i, j) = getPixel(m_width - i - 1, j);
(*(p_thread_data->output))(m_width - i - 1, j) = getPixel( i, j);
}
}
}
}
图片class
#include <sstream> // Header file for stringstream
#include <fstream> // Header file for filestream
#include <algorithm> // Header file for min/max/fill
#include <numeric> // Header file for accumulate
#include <cmath> // Header file for abs and pow
#include <vector>
#include "Image.h"
//-----------------
Image::Image():
//-----------------
m_width(0),
m_height(0)
//-----------------
{}
//----------------------------------
Image::Image(const Image& anImage):
//----------------------------------
m_width(anImage.m_width),
m_height(anImage.m_height),
m_p_image(anImage.m_p_image)
//--------------------------------
图片class待移植代码
//-----------------------------------
Image Image::flipHorizontally() const
//-----------------------------------
{
// Create an image of the right size
Image temp(getWidth(), getHeight());
// Process every row of the image
for (unsigned int j = 0; j < m_height; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < tempWidth / 2; ++i)
{
temp(i, j) = getPixel(tempWidth - i - 1, j);
temp(tempWidth - i - 1, j) = getPixel(i, j);
}
}
return 0;
}
我觉得很接近。非常感谢任何帮助!
编辑
好的,所以对于那些在这上面浪费时间的人来说,这是正确的代码。
显然有一些地方不对。
我不知道为什么有3个for循环。行应该有 2 个,列应该有 1 个。
cells_per_thread
应该是pixels_per_thread
和 rows/threads 正如@Larry B 建议不要 ALL 每个线程的像素。您可以使用 -> 获取指针的成员,即
setPixel(),
getPixel` 等。谁知道!?有一个数据结构对你们来说很重要,但我忘记了。
构造线程数据 { pthread_t thread_id; 无符号整数 start_id; 无符号整数 end_id; 常量图像*输入; 图像*输出; };
回调正确
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
int width = p_thread_data->input->getWidth();
// Process every row of the image
for (unsigned int j = p_thread_data->start_id; j <=p_thread_data->end_id; ++j)
}
// Process every column of the image
for (unsigned int i = 0; i < width / 2; ++i)
{
p_thread_data->output->setPixel(i,j, p_thread_data->input->getPixel(width - i - 1, j));
p_thread_data->output->setPixel(width - i - 1, j, p_thread_data->input->getPixel(i, j));
}
}
return 0;
}
所以现在这段代码可以编译和翻转。 谢谢!
将单线程代码移植到多线程版本的一般策略实质上是重写现有代码以将工作划分为您可以移交的自包含工作单元到线程执行。
考虑到这一点,我不同意您实施 H_flip_callback_function
:
void* H_flip_callback_function(void* aThreadData)
{
//convert void to Thread data
ThreadData* p_thread_data = static_cast<ThreadData*>(aThreadData);
// Create an image of the right size
PthreadImage temp(getWidth(), getHeight(), m_thread_number);
int tempHeight = temp(getHeight());
int tempWidth = temp(getWidth());
for (int i = p_thread_data->start_id; i <= p_thread_data->end_id; i++)
{
// Process every row of the image
for (unsigned int j = 0; j < tempHeight; ++j)
{
// Process every column of the image
for (unsigned int i = 0; i < tempWidth / 2; ++i)
{
temp(i, j) = getPixel(tempWidth - i - 1, j);
temp(tempWidth - i - 1, j) = getPixel(i, j);
}
}
}
}
从表面上看,您的所有线程似乎都将在整个图像上运行。如果是这种情况,单线程和多线程版本之间没有真正的区别,因为您只是在多线程版本中多次执行相同的工作。
我认为最小的独立工作单元是水平翻转图像的一行。但是,如果您的线程数少于行数,那么您可以为每个线程分配 (Num rows / Num threads)。然后每个线程将翻转分配给它的行,主线程将收集结果和 assemble 最终图像。
关于您的构建警告和错误,您必须提供完整的源代码、构建设置、环境等。