程序间歇性地坚持 main 报告不同的线程 id 而不是线程本身
program intermittently stuck with main reporting a different thread id as opposed to the thread itself
我正在尝试弄清楚多线程是如何工作的,这是我的代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <pthread.h>
static pthread_cond_t threadDied = PTHREAD_COND_INITIALIZER ; // cond var initialization
static pthread_mutex_t threadMutex = PTHREAD_MUTEX_INITIALIZER ; // mutex initialization
// this mutex will protect all of the below global vars
static int totThreads = 0 ; // total number of threads created
static int numLive = 0 ; // Total no. of threads still alive .. or terminated but not joined
static int numUnjoined = 0 ; // no. of threads that have not yet been joined
enum tstate { // enumeration of thread states
TS_ALIVE, // thread is alive
TS_TERMINATED, // thread terminated, not yet joined
TS_JOINED // thread terminated and joined
};
static struct { // info about each thread
pthread_t tid ; // thread ID
enum tstate state; // Thread state as per the above enum
int sleepTime ; // no. of seconds to live before terminating
} *thread ; // name of the struct .. well a pointer
static void *threadFunc (void *arg) { // default start function for each thread
int idx = *(int *)arg ; // since arg is of type void , we typecast it to * of type int and deref it
int s ; // for ret val
sleep(thread[idx].sleepTime) ; // pretending as though thread is doing some work :/
s = pthread_mutex_lock(&threadMutex);
if (s!=0) {
printf("whoops, couldn't acquire mutex\n") ;
fflush(stdout);
exit (-1) ;
}
numUnjoined ++ ;
thread[idx].state = TS_TERMINATED ;
s = pthread_mutex_unlock(&threadMutex) ;
if ( s!=0 ) {
printf("whoops, couldn't release mutex\n") ;
fflush(stdout);
exit (-2) ;
}
s = pthread_cond_signal(&threadDied) ; // signalling any listening thread to wake up !!
if (s != 0) {
printf("whoops, couldn't signal the main thread to reap\n");
fflush(stdout);
exit (-3) ;
}
printf("Thread %d has worked hard and is now terminating\n", idx);
fflush(stdout);
return NULL ;
}
int main(int argc, char *argv[]) {
int s, idx ;
if (argc < 2 || strcmp(argv[1], "--help") == 0) {
printf("Usage : %s nsecs...\n", argv[0]);
fflush(stdout);
exit(-4) ;
}
thread = calloc(argc -1, sizeof(*thread) );
if (thread == NULL) {
printf("whoops, couldn't allocate memory of size %lu\n", (argc -1) * sizeof(*thread) );
fflush(stdout);
exit(-5);
}
// Let's create all the threads now !!
for (idx =0 ; idx < argc -1 ; idx++ ) {
thread[idx].sleepTime = atoi(argv[idx + 1 ]) ; // thread sleeps for the duration entered in the cmd line
thread[idx].state = TS_ALIVE ;
s = pthread_create(&thread[idx].tid, NULL, threadFunc, &idx);
printf("Main created thread %d with tid : %lu \n", ( * (int *)&idx ), (unsigned long)thread[idx].tid);
fflush(stdout);
if (s != 0 ){
printf("whoops couldn't create thread %lu\n",(unsigned long) (&thread[idx].tid) );
fflush(stdout);
exit(-6) ;
}
//sleep(1); // << -- if I don't add this sleep, then it just deadlocks
}
totThreads = argc -1 ;
numLive = totThreads ;
// Join terminated threads
while (numLive > 0 ) {
s = pthread_mutex_lock(&threadMutex) ;
if (s!=0){
printf("whoops, couldn't lock mutex for joining\n") ;
fflush(stdout);
exit(-7) ;
}
while (numUnjoined == 0) {
s = pthread_cond_wait(&threadDied, &threadMutex) ;
if (s!=0) {
printf("whoops, couldn't wait for thread join\n") ;
fflush(stdout);
exit(-8) ;
}
}
for (idx = 0 ; idx < totThreads ; idx++ ) {
if (thread[idx].state == TS_TERMINATED) {
s = pthread_join(thread[idx].tid, NULL) ;
if (s!=0) {
printf("Failed thread join\n");
fflush(stdout);
exit(-9) ;
}
thread[idx].state = TS_JOINED ;
numLive-- ;
numUnjoined-- ;
printf("Reaped thread %d (numLive=%d)\n", idx, numLive);
fflush(stdout);
}
}
s = pthread_mutex_unlock(&threadMutex) ;
if (s!=0){
printf("whopps, couldn't unlock mutex after joining\n");
fflush(stdout);
exit(-10) ;
}
}
exit(EXIT_SUCCESS);
}
对于线程数为 1 的情况,此代码有时有效,有时它只是挂起:(
正在工作:
#./thread_multijoin 1
主线程 0 的 tid 为:139835063281408
线程 0 一直在努力工作,现在正在终止
收割线程 0 (numLive=0)
挂起:
#./thread_multijoin 1
主线程 0 的 tid 为:140301613573888
线程 1 已经努力工作,现在正在终止
^C
注意这里 Main 说 "Thread 0 was created" ;而线程本身说 "Thread 1" ... 为什么不匹配 ??
当我有多个线程时肯定会卡住:
#./thread_multijoin 1 2 2 1
主线程 0 的 tid 为:140259455936256
主线程 1 的 tid 为:140259447543552
主线程 2 的 tid 为:140259439150848
主线程 3 的 tid 为:140259430758144
线程 4 已经努力工作,现在正在终止
线程 0 一直在努力工作,现在正在终止
收割线程 0 (numLive=3)
收割线程 3 (numLive=2)
线程 3 已经努力工作,现在正在终止
收割线程 2 (numLive=1)
线程 2 已经努力工作,现在正在终止
^C
我唯一能理解的是 main 报告的线程 ID 和线程本身是不同的,所以我猜测由于并行调度,线程计数器发生了一些事情......你能伙计们帮我缩小范围好吗?
提前致谢。
========================================
感谢@mevets 和@user3386109 的回答:)
我尝试按照@mevets 的建议进行操作:i,e
pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);
和
int idx = (int)arg ;
但是在编译的时候出现了这个错误:
thread_multijoin.c: In function ‘threadFunc’:
thread_multijoin.c:32:15: error: cast from pointer to integer of different
size [-Werror=pointer-to-int-cast]
int idx = (int)arg ; // since arg is of type void , we typecast it to * of type int and deref it
thread_multijoin.c: In function ‘main’:
thread_multijoin.c:90:64: error: cast to pointer from integer of different
size [-Werror=int-to-pointer-cast]
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx );
经过进一步研究,发现了这个帖子:
cast to pointer from integer of different size, pthread code
建议使用 intptr_t :
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)(intptr_t)idx );
和
int idx = (intptr_t)arg
一切正常,没有错误。再次感谢您的宝贵时间,非常感谢:)
PS : 要使用 intptr_t ,你需要使用 _GNU_SOURCE :
#define _GNU_SOURCE
[线程 ID]:
您将 idx 的地址传递给每个线程,然后取消引用它以索引 table。所以每个线程都得到相同的指针参数。
您可能想要:
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);
和
int idx = (int)arg ; // 因为 arg 是 void 类型,所以我们将其类型转换为 * int 类型并取消引用它
即;不要取消引用它,只需将它传递到“void *”容器中即可。
我正在尝试弄清楚多线程是如何工作的,这是我的代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <pthread.h>
static pthread_cond_t threadDied = PTHREAD_COND_INITIALIZER ; // cond var initialization
static pthread_mutex_t threadMutex = PTHREAD_MUTEX_INITIALIZER ; // mutex initialization
// this mutex will protect all of the below global vars
static int totThreads = 0 ; // total number of threads created
static int numLive = 0 ; // Total no. of threads still alive .. or terminated but not joined
static int numUnjoined = 0 ; // no. of threads that have not yet been joined
enum tstate { // enumeration of thread states
TS_ALIVE, // thread is alive
TS_TERMINATED, // thread terminated, not yet joined
TS_JOINED // thread terminated and joined
};
static struct { // info about each thread
pthread_t tid ; // thread ID
enum tstate state; // Thread state as per the above enum
int sleepTime ; // no. of seconds to live before terminating
} *thread ; // name of the struct .. well a pointer
static void *threadFunc (void *arg) { // default start function for each thread
int idx = *(int *)arg ; // since arg is of type void , we typecast it to * of type int and deref it
int s ; // for ret val
sleep(thread[idx].sleepTime) ; // pretending as though thread is doing some work :/
s = pthread_mutex_lock(&threadMutex);
if (s!=0) {
printf("whoops, couldn't acquire mutex\n") ;
fflush(stdout);
exit (-1) ;
}
numUnjoined ++ ;
thread[idx].state = TS_TERMINATED ;
s = pthread_mutex_unlock(&threadMutex) ;
if ( s!=0 ) {
printf("whoops, couldn't release mutex\n") ;
fflush(stdout);
exit (-2) ;
}
s = pthread_cond_signal(&threadDied) ; // signalling any listening thread to wake up !!
if (s != 0) {
printf("whoops, couldn't signal the main thread to reap\n");
fflush(stdout);
exit (-3) ;
}
printf("Thread %d has worked hard and is now terminating\n", idx);
fflush(stdout);
return NULL ;
}
int main(int argc, char *argv[]) {
int s, idx ;
if (argc < 2 || strcmp(argv[1], "--help") == 0) {
printf("Usage : %s nsecs...\n", argv[0]);
fflush(stdout);
exit(-4) ;
}
thread = calloc(argc -1, sizeof(*thread) );
if (thread == NULL) {
printf("whoops, couldn't allocate memory of size %lu\n", (argc -1) * sizeof(*thread) );
fflush(stdout);
exit(-5);
}
// Let's create all the threads now !!
for (idx =0 ; idx < argc -1 ; idx++ ) {
thread[idx].sleepTime = atoi(argv[idx + 1 ]) ; // thread sleeps for the duration entered in the cmd line
thread[idx].state = TS_ALIVE ;
s = pthread_create(&thread[idx].tid, NULL, threadFunc, &idx);
printf("Main created thread %d with tid : %lu \n", ( * (int *)&idx ), (unsigned long)thread[idx].tid);
fflush(stdout);
if (s != 0 ){
printf("whoops couldn't create thread %lu\n",(unsigned long) (&thread[idx].tid) );
fflush(stdout);
exit(-6) ;
}
//sleep(1); // << -- if I don't add this sleep, then it just deadlocks
}
totThreads = argc -1 ;
numLive = totThreads ;
// Join terminated threads
while (numLive > 0 ) {
s = pthread_mutex_lock(&threadMutex) ;
if (s!=0){
printf("whoops, couldn't lock mutex for joining\n") ;
fflush(stdout);
exit(-7) ;
}
while (numUnjoined == 0) {
s = pthread_cond_wait(&threadDied, &threadMutex) ;
if (s!=0) {
printf("whoops, couldn't wait for thread join\n") ;
fflush(stdout);
exit(-8) ;
}
}
for (idx = 0 ; idx < totThreads ; idx++ ) {
if (thread[idx].state == TS_TERMINATED) {
s = pthread_join(thread[idx].tid, NULL) ;
if (s!=0) {
printf("Failed thread join\n");
fflush(stdout);
exit(-9) ;
}
thread[idx].state = TS_JOINED ;
numLive-- ;
numUnjoined-- ;
printf("Reaped thread %d (numLive=%d)\n", idx, numLive);
fflush(stdout);
}
}
s = pthread_mutex_unlock(&threadMutex) ;
if (s!=0){
printf("whopps, couldn't unlock mutex after joining\n");
fflush(stdout);
exit(-10) ;
}
}
exit(EXIT_SUCCESS);
}
对于线程数为 1 的情况,此代码有时有效,有时它只是挂起:(
正在工作:
#./thread_multijoin 1
主线程 0 的 tid 为:139835063281408
线程 0 一直在努力工作,现在正在终止
收割线程 0 (numLive=0)
挂起:
#./thread_multijoin 1
主线程 0 的 tid 为:140301613573888
线程 1 已经努力工作,现在正在终止
^C
注意这里 Main 说 "Thread 0 was created" ;而线程本身说 "Thread 1" ... 为什么不匹配 ??
当我有多个线程时肯定会卡住:
#./thread_multijoin 1 2 2 1
主线程 0 的 tid 为:140259455936256
主线程 1 的 tid 为:140259447543552
主线程 2 的 tid 为:140259439150848
主线程 3 的 tid 为:140259430758144
线程 4 已经努力工作,现在正在终止
线程 0 一直在努力工作,现在正在终止
收割线程 0 (numLive=3)
收割线程 3 (numLive=2)
线程 3 已经努力工作,现在正在终止
收割线程 2 (numLive=1)
线程 2 已经努力工作,现在正在终止
^C
我唯一能理解的是 main 报告的线程 ID 和线程本身是不同的,所以我猜测由于并行调度,线程计数器发生了一些事情......你能伙计们帮我缩小范围好吗?
提前致谢。
========================================
感谢@mevets 和@user3386109 的回答:)
我尝试按照@mevets 的建议进行操作:i,e
pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);
和
int idx = (int)arg ;
但是在编译的时候出现了这个错误:
thread_multijoin.c: In function ‘threadFunc’:
thread_multijoin.c:32:15: error: cast from pointer to integer of different
size [-Werror=pointer-to-int-cast]
int idx = (int)arg ; // since arg is of type void , we typecast it to * of type int and deref it
thread_multijoin.c: In function ‘main’:
thread_multijoin.c:90:64: error: cast to pointer from integer of different
size [-Werror=int-to-pointer-cast]
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx );
经过进一步研究,发现了这个帖子: cast to pointer from integer of different size, pthread code
建议使用 intptr_t :
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)(intptr_t)idx );
和
int idx = (intptr_t)arg
一切正常,没有错误。再次感谢您的宝贵时间,非常感谢:)
PS : 要使用 intptr_t ,你需要使用 _GNU_SOURCE :
#define _GNU_SOURCE
[线程 ID]: 您将 idx 的地址传递给每个线程,然后取消引用它以索引 table。所以每个线程都得到相同的指针参数。 您可能想要:
s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);
和 int idx = (int)arg ; // 因为 arg 是 void 类型,所以我们将其类型转换为 * int 类型并取消引用它
即;不要取消引用它,只需将它传递到“void *”容器中即可。