程序间歇性地坚持 main 报告不同的线程 id 而不是线程本身

program intermittently stuck with main reporting a different thread id as opposed to the thread itself

我正在尝试弄清楚多线程是如何工作的,这是我的代码:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <pthread.h>

static pthread_cond_t threadDied = PTHREAD_COND_INITIALIZER ; // cond var initialization
static pthread_mutex_t threadMutex = PTHREAD_MUTEX_INITIALIZER ; // mutex initialization
// this mutex will protect all of the below global vars

static int totThreads = 0 ; // total number of threads created
static int numLive = 0 ;   // Total no. of threads still alive .. or terminated but not joined
static int numUnjoined = 0 ; // no. of threads that have not yet been joined

enum tstate { // enumeration of thread states
    TS_ALIVE, // thread is alive
    TS_TERMINATED, // thread terminated, not yet joined
    TS_JOINED  // thread terminated and joined
};

static struct {  // info about each thread
    pthread_t tid ; // thread ID
    enum tstate state; // Thread state as per the above enum
    int sleepTime ;  // no. of seconds to live before terminating
} *thread ; // name of the struct .. well a pointer

static void *threadFunc (void *arg) { // default start function for each thread
    int idx = *(int *)arg  ; // since arg is of type void , we typecast it to * of type int and deref it
    int s ; // for ret val

    sleep(thread[idx].sleepTime) ;  // pretending as though thread is doing some work :/

    s = pthread_mutex_lock(&threadMutex);
    if (s!=0) {
        printf("whoops, couldn't acquire mutex\n") ;
        fflush(stdout);
        exit (-1) ;
    }

    numUnjoined ++ ;
    thread[idx].state = TS_TERMINATED ;

    s = pthread_mutex_unlock(&threadMutex) ;
    if ( s!=0 ) {
        printf("whoops, couldn't release mutex\n") ;
        fflush(stdout);
        exit (-2) ;
    }

    s = pthread_cond_signal(&threadDied) ; // signalling any listening thread to wake up !!
    if (s != 0) {
        printf("whoops, couldn't signal the main thread to reap\n");
        fflush(stdout);
        exit (-3) ;
    }
    printf("Thread %d has worked hard and is now terminating\n", idx);
    fflush(stdout);

    return NULL ;
}

int main(int argc, char *argv[]) {
    int s, idx ;

    if (argc < 2 || strcmp(argv[1], "--help") == 0) {
        printf("Usage : %s nsecs...\n", argv[0]);
        fflush(stdout);
        exit(-4) ;
    }
    thread = calloc(argc -1, sizeof(*thread) );
    if (thread == NULL) {
        printf("whoops, couldn't allocate memory of size %lu\n", (argc -1) * sizeof(*thread) );
        fflush(stdout);
        exit(-5);
    }

    // Let's create all the threads now !!

    for (idx =0 ; idx < argc -1 ; idx++ ) {
        thread[idx].sleepTime = atoi(argv[idx + 1 ]) ; // thread sleeps for the duration entered in the cmd line
        thread[idx].state = TS_ALIVE ;
        s = pthread_create(&thread[idx].tid, NULL, threadFunc, &idx);
        printf("Main created thread %d with tid : %lu \n", ( * (int *)&idx ), (unsigned long)thread[idx].tid);
        fflush(stdout);
        if (s != 0 ){
            printf("whoops couldn't create thread %lu\n",(unsigned long) (&thread[idx].tid) );
            fflush(stdout);
            exit(-6) ;
        }
        //sleep(1); // << -- if I don't add this sleep, then it just deadlocks
    }

    totThreads = argc -1 ;
    numLive = totThreads ;

    // Join terminated threads

    while (numLive > 0 ) {
        s = pthread_mutex_lock(&threadMutex) ;
        if (s!=0){
            printf("whoops, couldn't lock mutex for joining\n") ;
            fflush(stdout);
            exit(-7) ;
        }
        while (numUnjoined == 0) {
            s = pthread_cond_wait(&threadDied, &threadMutex) ;
            if (s!=0) {
                printf("whoops, couldn't wait for thread join\n") ;
                fflush(stdout);
                exit(-8) ;
            }
        }

        for (idx = 0 ; idx < totThreads ; idx++ ) {
            if (thread[idx].state == TS_TERMINATED) {
                s = pthread_join(thread[idx].tid, NULL) ;
                if (s!=0) {
                    printf("Failed thread join\n");
                    fflush(stdout);
                    exit(-9) ;
                }

                thread[idx].state = TS_JOINED ;
                numLive-- ;
                numUnjoined-- ;
                printf("Reaped thread %d (numLive=%d)\n", idx, numLive);
                fflush(stdout);
            }
        }

        s = pthread_mutex_unlock(&threadMutex) ;
        if (s!=0){
            printf("whopps, couldn't unlock mutex after joining\n");
            fflush(stdout);
            exit(-10) ;
        }
    }
    exit(EXIT_SUCCESS);

}

对于线程数为 1 的情况,此代码有时有效,有时它只是挂起:(

正在工作:

#./thread_multijoin 1

主线程 0 的 tid 为:139835063281408

线程 0 一直在努力工作,现在正在终止

收割线程 0 (numLive=0)

挂起:

#./thread_multijoin 1

主线程 0 的 tid 为:140301613573888

线程 1 已经努力工作,现在正在终止

^C

注意这里 Main 说 "Thread 0 was created" ;而线程本身说 "Thread 1" ... 为什么不匹配 ??

当我有多个线程时肯定会卡住:

#./thread_multijoin 1 2 2 1

主线程 0 的 tid 为:140259455936256

主线程 1 的 tid 为:140259447543552

主线程 2 的 tid 为:140259439150848

主线程 3 的 tid 为:140259430758144

线程 4 已经努力工作,现在正在终止

线程 0 一直在努力工作,现在正在终止

收割线程 0 (numLive=3)

收割线程 3 (numLive=2)

线程 3 已经努力工作,现在正在终止

收割线程 2 (numLive=1)

线程 2 已经努力工作,现在正在终止

^C

我唯一能理解的是 main 报告的线程 ID 和线程本身是不同的,所以我猜测由于并行调度,线程计数器发生了一些事情......你能伙计们帮我缩小范围好吗?

提前致谢。

========================================

感谢@mevets 和@user3386109 的回答:)

我尝试按照@mevets 的建议进行操作:i,e

pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);

int idx = (int)arg ;

但是在编译的时候出现了这个错误:

thread_multijoin.c: In function ‘threadFunc’:

thread_multijoin.c:32:15: error: cast from pointer to integer of different 
size [-Werror=pointer-to-int-cast]

int idx = (int)arg  ; // since arg is of type void , we typecast it to * of type int and deref it


thread_multijoin.c: In function ‘main’:

thread_multijoin.c:90:64: error: cast to pointer from integer of different 
size [-Werror=int-to-pointer-cast]

s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx );

经过进一步研究,发现了这个帖子: cast to pointer from integer of different size, pthread code

建议使用 intptr_t :

s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)(intptr_t)idx );

int idx = (intptr_t)arg

一切正常,没有错误。再次感谢您的宝贵时间,非常感谢:)

PS : 要使用 intptr_t ,你需要使用 _GNU_SOURCE :

#define _GNU_SOURCE

[线程 ID]: 您将 idx 的地址传递给每个线程,然后取消引用它以索引 table。所以每个线程都得到相同的指针参数。 您可能想要:

        s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);

和 int idx = (int)arg ; // 因为 arg 是 void 类型,所以我们将其类型转换为 * int 类型并取消引用它

即;不要取消引用它,只需将它传递到“void *”容器中即可。