如何创建多个线程来填充数组优化处理时间
How to create multiple threads to populate array optimising processing time
大家好,我在创建线程来填充数组时遇到了问题,
objective 是使用 pthreads 以最快的速度填充数组以显示并行化。
我正在尝试并行化这段代码以提高处理时间的速度。
for (int i = 0; i < size; i++)
{
v3[i] = v1[i] + v2[i];
}
我已经尝试制作您在并行函数中需要的基本准系统,但是我不确定从这里下一步该去哪里。
我相信我需要制作另一个函数来将数组填充到一定数量并并行调用它,这样该函数的每个线程都会加速这个过程但是因为我是 c++ 的新手我不确定怎么做。
如有任何建议或资源,我们将不胜感激。
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <chrono>
#include <pthread.h>
using namespace std::chrono;
using namespace std;
#define THREAD 8
struct arg_struct
{
int *v1, *v2, *v3;
unsigned long size;
};
int part = 0;
void randomVector(int vector[], int size)
{
for (int i = 0; i < size; i++)
{
//sets a random int value into the vector ith element position
vector[i] = rand() % 100;
}
}
void *parallel(void *arguments)
{
struct arg_struct *args = (struct arg_struct *)arguments;
unsigned long size = args->size;
int *v1 = args->v1;
int *v2 = args->v2;
int *v3 = args->v3;
int thread_part = part++;
for (int i = 0; i < size; i++)
{
v3[i] = v1[i] + v2[i];
}
}
void somefunction(void *arguments)
{
//
}
int main()
{
struct arg_struct args;
args.size = 100000000;
srand(time(0));
pthread_t threads[THREAD];
//int *v1, *v2, *v3;
//sets the clock to start by getting the time now
auto start = high_resolution_clock::now();
//memory alloc is used to dynamically allocate memory with the specified size.
args.v1 = (int *) malloc(args.size * sizeof(int *));
args.v2 = (int *) malloc(args.size * sizeof(int *));
args.v3 = (int *) malloc(args.size * sizeof(int *));
randomVector(args.v1, args.size);
randomVector(args.v2, args.size);
//ToDo: goes in a for loop that will append v3 ith value with v1 + v2
auto stop = high_resolution_clock::now();
for (int i = 0; i < THREAD; i++)
{
pthread_create(&threads[i], NULL, ¶llel, NULL);
}
pthread_join(threads[THREAD], NULL);
//gets the start and stop time and subtracts it to get the amount of time spent processing.
auto duration = duration_cast<microseconds>(stop - start);
cout << "Time taken by function: " << duration.count() << " microseconds" << endl;
return 0;
}
给定以下结构:
struct thread_context {
pthread_t thread;
int *v1, *v2, *v3;
unsigned long size;
};
您可以执行以下操作:
thread_context threads[THREAD];
for (int i = 0; i < THREAD; i++) {
int part_size = size / THREAD;
int part_offset = part_size * i;
threads[i] = { 0, &v1[part_offset], &v2[part_offset], &v3[part_offset], part_size };
pthread_create(&threads[i].thread, NULL, ¶llel, &threads[i]);
}
使用 std::thread
API 你可以做得更好:(假设 parallel
需要一些额外的参数)
std::vector<std::thread> threads;
for (int i = 0; i < THREAD; i++) {
threads.emplace_back(¶llel, &v1[part_offset], &v2[part_offset], &v3[part_offset], part_size);
}
大家好,我在创建线程来填充数组时遇到了问题, objective 是使用 pthreads 以最快的速度填充数组以显示并行化。
我正在尝试并行化这段代码以提高处理时间的速度。
for (int i = 0; i < size; i++)
{
v3[i] = v1[i] + v2[i];
}
我已经尝试制作您在并行函数中需要的基本准系统,但是我不确定从这里下一步该去哪里。
我相信我需要制作另一个函数来将数组填充到一定数量并并行调用它,这样该函数的每个线程都会加速这个过程但是因为我是 c++ 的新手我不确定怎么做。
如有任何建议或资源,我们将不胜感激。
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <chrono>
#include <pthread.h>
using namespace std::chrono;
using namespace std;
#define THREAD 8
struct arg_struct
{
int *v1, *v2, *v3;
unsigned long size;
};
int part = 0;
void randomVector(int vector[], int size)
{
for (int i = 0; i < size; i++)
{
//sets a random int value into the vector ith element position
vector[i] = rand() % 100;
}
}
void *parallel(void *arguments)
{
struct arg_struct *args = (struct arg_struct *)arguments;
unsigned long size = args->size;
int *v1 = args->v1;
int *v2 = args->v2;
int *v3 = args->v3;
int thread_part = part++;
for (int i = 0; i < size; i++)
{
v3[i] = v1[i] + v2[i];
}
}
void somefunction(void *arguments)
{
//
}
int main()
{
struct arg_struct args;
args.size = 100000000;
srand(time(0));
pthread_t threads[THREAD];
//int *v1, *v2, *v3;
//sets the clock to start by getting the time now
auto start = high_resolution_clock::now();
//memory alloc is used to dynamically allocate memory with the specified size.
args.v1 = (int *) malloc(args.size * sizeof(int *));
args.v2 = (int *) malloc(args.size * sizeof(int *));
args.v3 = (int *) malloc(args.size * sizeof(int *));
randomVector(args.v1, args.size);
randomVector(args.v2, args.size);
//ToDo: goes in a for loop that will append v3 ith value with v1 + v2
auto stop = high_resolution_clock::now();
for (int i = 0; i < THREAD; i++)
{
pthread_create(&threads[i], NULL, ¶llel, NULL);
}
pthread_join(threads[THREAD], NULL);
//gets the start and stop time and subtracts it to get the amount of time spent processing.
auto duration = duration_cast<microseconds>(stop - start);
cout << "Time taken by function: " << duration.count() << " microseconds" << endl;
return 0;
}
给定以下结构:
struct thread_context {
pthread_t thread;
int *v1, *v2, *v3;
unsigned long size;
};
您可以执行以下操作:
thread_context threads[THREAD];
for (int i = 0; i < THREAD; i++) {
int part_size = size / THREAD;
int part_offset = part_size * i;
threads[i] = { 0, &v1[part_offset], &v2[part_offset], &v3[part_offset], part_size };
pthread_create(&threads[i].thread, NULL, ¶llel, &threads[i]);
}
使用 std::thread
API 你可以做得更好:(假设 parallel
需要一些额外的参数)
std::vector<std::thread> threads;
for (int i = 0; i < THREAD; i++) {
threads.emplace_back(¶llel, &v1[part_offset], &v2[part_offset], &v3[part_offset], part_size);
}