使用共享变量的线程
Threading with shared variables
我试过用多线程插入一个boost::bimap
。我在线程之间有一些共享变量,我需要通过引用传递它们,其中一些在每个线程执行时都会被修改。但是,我收到错误:
Segmentation fault (core dumped)
我有以下代码。我试图通过使用 std::lock_guard<std::mutex> lock(mtx)
来避免对变量的并发访问,但无法使其工作。
parallel_index.cpp
#include <iostream>
#include <string>
#include <algorithm>
#include <thread>
#include <mutex>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include "parallel_index.h"
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string> > bimap_reference;
typedef bimap_reference::value_type position;
bimap_reference reference_index_vector;
size_t total_threads = std::thread::hardware_concurrency();
std::string sequence_content = "ABCDDBACDDDCBBAAACBDAADCBDAAADCBDADADACBDDCBBBCDCBCDAADCBBCDAAADCBDA";
uint64_t sequence_length = sequence_content.length();
int split = 5;
uint64_t erase_length = 0;
unsigned int seq_itr = 0;
std::mutex mtx; // to protect against concurent access
int main(){
thread_test::create_index index;
std::thread threads[total_threads];
std::cout << total_threads << " threads lanched" << std::endl;
for(unsigned int i = 0; i < total_threads; i++){
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(sequence_length), std::ref(split), std::ref(sequence_content), std::ref(erase_length));
}
for(unsigned int i = 0; i < total_threads; i++){
threads[i].join();
}
}
/*
* Creating index
*/
void thread_test::create_index::reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, uint64_t &erase_length ){
for (; seq_itr < sequence_length; ++seq_itr ){
std::lock_guard<std::mutex> lock(mtx);
std::string splitstr = sequence_content.substr(erase_length, split);
reference_index_vector.insert(position(seq_itr, splitstr));
seq_itr += split-1;
erase_length += split;
if(erase_length > 10000){
sequence_content.erase(0,erase_length);
erase_length = 0;
}
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}
parallel_index.h
#ifndef PARALLEL_INDEX_H_
#define PARALLEL_INDEX_H_
#include<iostream>
#include <algorithm>
#include <utility>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
//typedef boost::unordered_map<int, std::pair<int, unsigned long int>& > reference_map;
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string > > bimap_reference;
typedef bimap_reference::value_type position;
extern bimap_reference reference_index_vector;
namespace thread_test{
class create_index{
public:
void reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, uint64_t &erase_length);
};
}
#endif /* PARALLEL_INDEX_H_ */
----------------------------编辑-------- ------------------------
我尝试将字符串内容划分为与线程数相等的分区,以使每个线程在本地可用。但似乎没有任何效果。有时它会完成第一个线程并在 Segmentation fault (core dumped)
.
之后停止
parallel_index.cpp
#include <iostream>
#include <string>
#include <algorithm>
#include <thread>
#include <mutex>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include "parallel_index.h"
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string> > bimap_reference;
typedef bimap_reference::value_type position;
bimap_reference reference_index_vector;
//create threads
size_t total_threads = std::thread::hardware_concurrency();
std::string sequence_content = "ABCDDBACDDDCBBAAACBDAADCBDAAADCBDADADACBDDCBBBCDCBCDAADCBBCDAAADCBDADDCCCAAABBBAAACDCA";
uint64_t sequence_length = sequence_content.length();
int split = 5;
// split the sequence_content equal to the number of threads, and assign each partition to each thread.
uint64_t each_partition_len = sequence_content.length()/total_threads- (sequence_content.length()/total_threads)%split ;
uint64_t last_partition_len = sequence_content.length()/total_threads +
(((sequence_content.length()/total_threads)%split)*(total_threads-1)) + sequence_content.length()%total_threads;
std::mutex mtx; // to protect against concurent access
int main(){
thread_test::create_index index;
std::thread threads[total_threads];
std::cout << total_threads << " threads lanched" << std::endl;
for(unsigned int i = 0; i < total_threads; i++){
if(i < total_threads-1)
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(each_partition_len), std::ref(split), std::ref(sequence_content), i);
else
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(last_partition_len), std::ref(split), std::ref(sequence_content), i);
//std::lock_guard<std::mutex> lck(mtx);
std::cout << "launched thread " << i << "with id " << threads[i].get_id() << std::endl;
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
for( unsigned int i = 0; i < total_threads; ++i){
if(threads[i].joinable()){
std::cout << "trying to join thread " << i << std:: endl;
threads[i].join();
std::cout << "thread joined " << i << std:: endl;
}
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}
/*
* Creating index
*/
void thread_test::create_index::reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, int i ){
uint64_t seq_strt = 0;
// set seq_strt
if(i == 0)
seq_strt = sequence_length * i;
else
seq_strt = sequence_length * i + 1;
for (uint64_t seq_itr = seq_strt; seq_itr <= sequence_length; ++seq_itr ){
std::string splitstr = sequence_content.substr(seq_itr, split);
mtx.lock();
//std::lock_guard<std::mutex> lock(mtx);
reference_index_vector.insert(position(seq_itr, splitstr));
mtx.unlock();
seq_itr += split-1;
}
}
parallel_index.h
#ifndef PARALLEL_INDEX_H_
#define PARALLEL_INDEX_H_
#include<iostream>
#include <algorithm>
#include <utility>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string > > bimap_reference;
typedef bimap_reference::value_type position;
extern bimap_reference reference_index_vector;
namespace thread_test{
class create_index{
public:
void reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, int i);
};
}
#endif /* PARALLEL_INDEX_H_ */
所有线程都会尝试获取临界区的锁,为了保持位图完整,需要一个条件变量,让线程有序执行。这是合理的,因为您在 reference_index_hash() 中使用 seq_itr 作为局部变量,并且需要按正确的顺序递增。
您原始代码中的一个问题是 unsigned int seq_itr
是在没有从多个线程同步的情况下访问的。除了产生无效结果外,它还可能导致 seq_itr
递增到大于实际序列长度的值,并且以下访问可能会导致崩溃。
新代码通过传递索引解决了这个问题,只要这些索引是 non-overlapping 并且计算正确就可以了。我无法完全遵循逻辑,但如果您的 seq_strt
计算失败,程序也可能因索引无效而崩溃。应该很容易在调试器中或使用一些索引断言进行验证。
然而,在线程启动后直接打印地图的第二个代码示例中存在问题
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
这不会产生正确的结果,因为所有工作线程同时访问地图。 join()
s 后访问是安全的。
我觉得 segmentation fault
的罪魁祸首不过是 static
链接库。它不是通过将 seq_itr
递增到大于实际序列长度的值,因为如果 seq_itr
大于实际序列长度,您的 for 循环将永远不允许进入。您尝试删除 -static
标志,它应该可以通过不给出分段错误来工作,但是它不能保证其他代码的正确性。有关 thread
分段错误的更多详细信息,请参见 here
我试过用多线程插入一个boost::bimap
。我在线程之间有一些共享变量,我需要通过引用传递它们,其中一些在每个线程执行时都会被修改。但是,我收到错误:
Segmentation fault (core dumped)
我有以下代码。我试图通过使用 std::lock_guard<std::mutex> lock(mtx)
来避免对变量的并发访问,但无法使其工作。
parallel_index.cpp
#include <iostream>
#include <string>
#include <algorithm>
#include <thread>
#include <mutex>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include "parallel_index.h"
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string> > bimap_reference;
typedef bimap_reference::value_type position;
bimap_reference reference_index_vector;
size_t total_threads = std::thread::hardware_concurrency();
std::string sequence_content = "ABCDDBACDDDCBBAAACBDAADCBDAAADCBDADADACBDDCBBBCDCBCDAADCBBCDAAADCBDA";
uint64_t sequence_length = sequence_content.length();
int split = 5;
uint64_t erase_length = 0;
unsigned int seq_itr = 0;
std::mutex mtx; // to protect against concurent access
int main(){
thread_test::create_index index;
std::thread threads[total_threads];
std::cout << total_threads << " threads lanched" << std::endl;
for(unsigned int i = 0; i < total_threads; i++){
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(sequence_length), std::ref(split), std::ref(sequence_content), std::ref(erase_length));
}
for(unsigned int i = 0; i < total_threads; i++){
threads[i].join();
}
}
/*
* Creating index
*/
void thread_test::create_index::reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, uint64_t &erase_length ){
for (; seq_itr < sequence_length; ++seq_itr ){
std::lock_guard<std::mutex> lock(mtx);
std::string splitstr = sequence_content.substr(erase_length, split);
reference_index_vector.insert(position(seq_itr, splitstr));
seq_itr += split-1;
erase_length += split;
if(erase_length > 10000){
sequence_content.erase(0,erase_length);
erase_length = 0;
}
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}
parallel_index.h
#ifndef PARALLEL_INDEX_H_
#define PARALLEL_INDEX_H_
#include<iostream>
#include <algorithm>
#include <utility>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
//typedef boost::unordered_map<int, std::pair<int, unsigned long int>& > reference_map;
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string > > bimap_reference;
typedef bimap_reference::value_type position;
extern bimap_reference reference_index_vector;
namespace thread_test{
class create_index{
public:
void reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, uint64_t &erase_length);
};
}
#endif /* PARALLEL_INDEX_H_ */
----------------------------编辑-------- ------------------------
我尝试将字符串内容划分为与线程数相等的分区,以使每个线程在本地可用。但似乎没有任何效果。有时它会完成第一个线程并在 Segmentation fault (core dumped)
.
parallel_index.cpp
#include <iostream>
#include <string>
#include <algorithm>
#include <thread>
#include <mutex>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include "parallel_index.h"
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string> > bimap_reference;
typedef bimap_reference::value_type position;
bimap_reference reference_index_vector;
//create threads
size_t total_threads = std::thread::hardware_concurrency();
std::string sequence_content = "ABCDDBACDDDCBBAAACBDAADCBDAAADCBDADADACBDDCBBBCDCBCDAADCBBCDAAADCBDADDCCCAAABBBAAACDCA";
uint64_t sequence_length = sequence_content.length();
int split = 5;
// split the sequence_content equal to the number of threads, and assign each partition to each thread.
uint64_t each_partition_len = sequence_content.length()/total_threads- (sequence_content.length()/total_threads)%split ;
uint64_t last_partition_len = sequence_content.length()/total_threads +
(((sequence_content.length()/total_threads)%split)*(total_threads-1)) + sequence_content.length()%total_threads;
std::mutex mtx; // to protect against concurent access
int main(){
thread_test::create_index index;
std::thread threads[total_threads];
std::cout << total_threads << " threads lanched" << std::endl;
for(unsigned int i = 0; i < total_threads; i++){
if(i < total_threads-1)
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(each_partition_len), std::ref(split), std::ref(sequence_content), i);
else
threads[i] = std::thread(&thread_test::create_index::reference_index_hash, index,
std::ref(last_partition_len), std::ref(split), std::ref(sequence_content), i);
//std::lock_guard<std::mutex> lck(mtx);
std::cout << "launched thread " << i << "with id " << threads[i].get_id() << std::endl;
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
for( unsigned int i = 0; i < total_threads; ++i){
if(threads[i].joinable()){
std::cout << "trying to join thread " << i << std:: endl;
threads[i].join();
std::cout << "thread joined " << i << std:: endl;
}
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}
/*
* Creating index
*/
void thread_test::create_index::reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, int i ){
uint64_t seq_strt = 0;
// set seq_strt
if(i == 0)
seq_strt = sequence_length * i;
else
seq_strt = sequence_length * i + 1;
for (uint64_t seq_itr = seq_strt; seq_itr <= sequence_length; ++seq_itr ){
std::string splitstr = sequence_content.substr(seq_itr, split);
mtx.lock();
//std::lock_guard<std::mutex> lock(mtx);
reference_index_vector.insert(position(seq_itr, splitstr));
mtx.unlock();
seq_itr += split-1;
}
}
parallel_index.h
#ifndef PARALLEL_INDEX_H_
#define PARALLEL_INDEX_H_
#include<iostream>
#include <algorithm>
#include <utility>
#include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
namespace bimaps = boost::bimaps;
typedef boost::bimap<bimaps::unordered_set_of<uint64_t>,
bimaps::unordered_multiset_of<std::string > > bimap_reference;
typedef bimap_reference::value_type position;
extern bimap_reference reference_index_vector;
namespace thread_test{
class create_index{
public:
void reference_index_hash(uint64_t &sequence_length, int &split,
std::string &sequence_content, int i);
};
}
#endif /* PARALLEL_INDEX_H_ */
所有线程都会尝试获取临界区的锁,为了保持位图完整,需要一个条件变量,让线程有序执行。这是合理的,因为您在 reference_index_hash() 中使用 seq_itr 作为局部变量,并且需要按正确的顺序递增。
您原始代码中的一个问题是 unsigned int seq_itr
是在没有从多个线程同步的情况下访问的。除了产生无效结果外,它还可能导致 seq_itr
递增到大于实际序列长度的值,并且以下访问可能会导致崩溃。
新代码通过传递索引解决了这个问题,只要这些索引是 non-overlapping 并且计算正确就可以了。我无法完全遵循逻辑,但如果您的 seq_strt
计算失败,程序也可能因索引无效而崩溃。应该很容易在调试器中或使用一些索引断言进行验证。
然而,在线程启动后直接打印地图的第二个代码示例中存在问题
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
这不会产生正确的结果,因为所有工作线程同时访问地图。 join()
s 后访问是安全的。
我觉得 segmentation fault
的罪魁祸首不过是 static
链接库。它不是通过将 seq_itr
递增到大于实际序列长度的值,因为如果 seq_itr
大于实际序列长度,您的 for 循环将永远不允许进入。您尝试删除 -static
标志,它应该可以通过不给出分段错误来工作,但是它不能保证其他代码的正确性。有关 thread
分段错误的更多详细信息,请参见 here