MPI - mpirun 注意到进程...在信号 6 上退出
MPI - mpirun noticed that process... exited on signal 6
int proc_cnt, rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &proc_cnt);
if (rank == 0) {
std::vector<int> segment_ids = read_segment_ids(argv[kParDataIx]);
std::map<int, ParameterSet> computed_par_sets;
int buf_send[kBufMsToSlSize];
double buf_recv[kBufSlToMsSize];
MPI_Status status;
int curr_segment_ix = 0;
int recv_par_sets = 0;
//inits workers
for (int i = 1; i < proc_cnt; i++) {
buf_send[0] = segment_ids[curr_segment_ix++];
MPI_Send(
buf_send, kBufMsToSlSize * sizeof (int), MPI_INT,
i, 0, MPI_COMM_WORLD);
}
//sends slaves what to do and receives answers
while(recv_par_sets < segment_ids.size()) {
//receives answer
MPI_Recv(buf_recv, kBufSlToMsSize * sizeof (double), MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
recv_par_sets++;
if (curr_segment_ix < segment_ids.size()) {
//there are still segments to process
buf_send[0] = segment_ids[curr_segment_ix++];
} else {
//there is no segment to process, sends to slave termination char
buf_send[0] = -1;
}
//sends back to source which segment to process as next
MPI_Send(
buf_send, kBufMsToSlSize * sizeof (int), MPI_INT,
status.MPI_SOURCE, 0, MPI_COMM_WORLD);
std::pair<int,ParameterSet> computed_seg_par_set = convert_array_to_seg_par_set(buf_recv);
computed_par_sets.insert(computed_seg_par_set);
}
print_parameter_sets(computed_par_sets);
std::cout << "[Master] was termianted" << std::endl;
} else {
int bufToSl[kBufMsToSlSize];
double bufToMs[kBufSlToMsSize];
Bounds bounds = read_bounds_file(argv[kParBoundsIx]);
Config config = read_config_file(kConfigFileName);
while (true) {
MPI_Recv(bufToSl, kBufMsToSlSize * sizeof (int), MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
int segment_id = bufToSl[0];
if (segment_id == -1) {
//termination character was found
break;
}
Segment segment = read_segment(argv[kParDataIx], segment_id);
std::map<int, Segment> segment_map;
segment_map.insert(std::pair<int, Segment>(segment.GetId(), segment));
SimplexComputer simplex_computer(segment_map, bounds, config);
ParameterSet par_set = simplex_computer.ComputeSegment(&segment);
convert_seg_par_set_to_array(segment_id, par_set, bufToMs);
MPI_Send(
bufToMs, kBufSlToMsSize * sizeof (double), MPI_DOUBLE,
0, 0, MPI_COMM_WORLD);
}
std::cout << "[SLAVE] " << rank << " was terminated" << std::endl;
}
MPI_Finalize();
我只是不明白。当我尝试使用 mpi运行 并将进程计数设置为 5 运行 时,所有进程都完成,控制输出显示主或从被终止,但最后有这样的声明:
mpi运行 注意到节点 Jan-MacBook 上 PID 为 1534 的进程等级 0 在信号 6 上退出(中止陷阱:6)。
我做错了什么?提前谢谢大家。
根据 Send 和 Recv 的定义,第二个参数 count 是您要发送或接收的元素的数量。然后将这些元素的数据类型指定为两个调用的第三个参数:
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
Recv 也是如此:
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status)
希望对您有所帮助。
int proc_cnt, rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &proc_cnt);
if (rank == 0) {
std::vector<int> segment_ids = read_segment_ids(argv[kParDataIx]);
std::map<int, ParameterSet> computed_par_sets;
int buf_send[kBufMsToSlSize];
double buf_recv[kBufSlToMsSize];
MPI_Status status;
int curr_segment_ix = 0;
int recv_par_sets = 0;
//inits workers
for (int i = 1; i < proc_cnt; i++) {
buf_send[0] = segment_ids[curr_segment_ix++];
MPI_Send(
buf_send, kBufMsToSlSize * sizeof (int), MPI_INT,
i, 0, MPI_COMM_WORLD);
}
//sends slaves what to do and receives answers
while(recv_par_sets < segment_ids.size()) {
//receives answer
MPI_Recv(buf_recv, kBufSlToMsSize * sizeof (double), MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
recv_par_sets++;
if (curr_segment_ix < segment_ids.size()) {
//there are still segments to process
buf_send[0] = segment_ids[curr_segment_ix++];
} else {
//there is no segment to process, sends to slave termination char
buf_send[0] = -1;
}
//sends back to source which segment to process as next
MPI_Send(
buf_send, kBufMsToSlSize * sizeof (int), MPI_INT,
status.MPI_SOURCE, 0, MPI_COMM_WORLD);
std::pair<int,ParameterSet> computed_seg_par_set = convert_array_to_seg_par_set(buf_recv);
computed_par_sets.insert(computed_seg_par_set);
}
print_parameter_sets(computed_par_sets);
std::cout << "[Master] was termianted" << std::endl;
} else {
int bufToSl[kBufMsToSlSize];
double bufToMs[kBufSlToMsSize];
Bounds bounds = read_bounds_file(argv[kParBoundsIx]);
Config config = read_config_file(kConfigFileName);
while (true) {
MPI_Recv(bufToSl, kBufMsToSlSize * sizeof (int), MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
int segment_id = bufToSl[0];
if (segment_id == -1) {
//termination character was found
break;
}
Segment segment = read_segment(argv[kParDataIx], segment_id);
std::map<int, Segment> segment_map;
segment_map.insert(std::pair<int, Segment>(segment.GetId(), segment));
SimplexComputer simplex_computer(segment_map, bounds, config);
ParameterSet par_set = simplex_computer.ComputeSegment(&segment);
convert_seg_par_set_to_array(segment_id, par_set, bufToMs);
MPI_Send(
bufToMs, kBufSlToMsSize * sizeof (double), MPI_DOUBLE,
0, 0, MPI_COMM_WORLD);
}
std::cout << "[SLAVE] " << rank << " was terminated" << std::endl;
}
MPI_Finalize();
我只是不明白。当我尝试使用 mpi运行 并将进程计数设置为 5 运行 时,所有进程都完成,控制输出显示主或从被终止,但最后有这样的声明:
mpi运行 注意到节点 Jan-MacBook 上 PID 为 1534 的进程等级 0 在信号 6 上退出(中止陷阱:6)。
我做错了什么?提前谢谢大家。
根据 Send 和 Recv 的定义,第二个参数 count 是您要发送或接收的元素的数量。然后将这些元素的数据类型指定为两个调用的第三个参数:
int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
Recv 也是如此:
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status)
希望对您有所帮助。