setjmp longjmp 与堆栈

setjmp longjmp with Stack

我必须使用 setjmp/longjmp 实现用户级线程库作为作业。这是我写的代码:

#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/select.h>
#include <time.h>
#include <errno.h>
#include <signal.h>
#include <setjmp.h>



#define STACKSIZE   128*1024    //approriate stack size

typedef void (*ult_func)();
struct tcb_str; //forward declaration

typedef struct tcb_str {
    //fill this struct with statusinformations
    stack_t stack;  //stack for local vars
    jmp_buf buf;
    int id;
    void (*threadfunc)();
    int waitingfor; //ID of Thread/ Filediscriptor the Thread is currently waiting for
    int waitingtype; //0 = Thread, 1 = fd
    int exitnumber; //Is set on exit
} tcb;

typedef struct queue queue;

struct queue *runqueue;
struct queue *blockedqueue;
struct queue *zombiequeue;

jmp_buf backtonormal;

int counter = 0;

struct queue {
    struct queue_node *start;
    struct queue_node *end;
};
struct queue_node {
    struct tcb_str *element;
    struct queue_node *next;
};

struct queue_node* pop(struct queue *qu) {

  if (qu == NULL || qu->start == NULL) {
    return NULL;
  }

  struct queue_node *node = qu->start;

  qu->start = node->next;
  if (qu->start == NULL) {
    qu->end = NULL;

  }
  node->next = NULL;
  return node;

}


int push(struct queue *qu, struct queue_node *node) {

  if (qu == NULL) {
    return -1;
  }
  node->next = NULL;
  if (qu->end == NULL) {
    qu->start = qu->end = node;
  } else {
    qu->end->next = node;
    qu->end = node;
  }
  return 1;
}


struct queue_node* removeByTid(struct queue *qu, int tid) {
    struct queue_node* tmp = qu->start;
    struct queue_node* previous = qu->start;
    if(tmp->element->id == tid) {
        pop(qu);
        return tmp;
    }

    do {
        if(tmp->element->id == tid) {
            //What if first and only
            previous->next = tmp->next;
            //What if only one left after
            tmp->next = NULL;
            if(qu->start->next == NULL) {
                qu->end = qu->start;
            }
            return tmp;
        }
        previous = tmp;
    }
    while((tmp = tmp->next));
    return NULL;
}

struct queue *initqueue() {
      queue *qu = malloc(sizeof(*qu));

      if (qu == NULL) {
        return NULL;
      }
      qu->start =  qu->end = NULL;
      return qu;
}
int checkfordata(int fd) {
    int data; //returns != 0 if data is available
    struct timeval tv_str;
    fd_set fds;
    FD_ZERO(&fds);


    if (!FD_ISSET(fd, &fds)) {
        FD_SET(fd, &fds);       //Creating fd_set for select()
    }
    tv_str.tv_sec = 0;
    tv_str.tv_usec = 0;
    //is data available?
    data = select(fd + 1, &fds, NULL, NULL, &tv_str);
    FD_CLR(fd, &fds);
    return data;
}

void schedulerThread() {
    while(1) {
    //Check blocked Threads
    struct queue_node* tmp = blockedqueue->start;
    fd_set fds;
    FD_ZERO(&fds);

    if(tmp != NULL) {

        //Go through blocked Threads
        do {
                int data = checkfordata(tmp->element->waitingfor);
                if(data > 0) {
                    removeByTid(blockedqueue, tmp->element->id);
                    //Add to running queue (at start)
                    tmp->next = runqueue->start;
                    runqueue->start = tmp;
                    return;
                }
                else {

                    FD_SET(tmp->element->waitingfor, &fds);
                }
        }
        while((tmp = tmp->next));
    }
    if(runqueue->start == NULL) {
        if(blockedqueue->start == NULL) {
            free(runqueue);
            free(blockedqueue);
            struct queue_node* qu;
            while((qu = pop(zombiequeue)) != NULL) {
                free(qu->element->stack.ss_sp);
                free(qu);
            }
            free(zombiequeue);
            return;
        }
        else {  
                    struct timeval tv_str;
                    tv_str.tv_sec = 0;
                    tv_str.tv_usec = 800 * 1000;
                    //We have to copy fds, as select will mess it up
                    fd_set fdset = fds;
                    select(FD_SETSIZE, &fdset, NULL, NULL, &tv_str);
        }
    }
    else {
        return;
    }
    }
}
/*
 This function only exists to tell the process to use an empty stack for the thread
 */
void signalHandlerSpawn( int arg ) {

    if ( setjmp( runqueue->start->element->buf ) ) {
        runqueue->start->element->threadfunc();
        longjmp(backtonormal, 1);
    }
    return;
}

int ult_spawn(ult_func f) {
    struct tcb_str* tcb = malloc(sizeof(struct tcb_str));
    tcb->threadfunc = f;
    tcb->waitingfor = -1;
    tcb->waitingtype = -1;
    tcb->id = ++counter;
    tcb->stack.ss_flags = 0;
    tcb->stack.ss_size = STACKSIZE;
    tcb->stack.ss_sp = malloc(STACKSIZE);
    if ( tcb->stack.ss_sp == 0 ) {
        perror( "Could not allocate stack." );
        exit( 1 );
    }
    stack_t oldStack;
    sigaltstack( &(tcb->stack), 0 );
    struct sigaction sa;
    struct sigaction oldHandler;
    sa.sa_handler = &signalHandlerSpawn;
    sa.sa_flags = SA_ONSTACK;
    sigemptyset( &sa.sa_mask );
    sigaction( SIGUSR1, &sa, &oldHandler );

    struct queue_node* node = malloc(sizeof(struct queue_node));
    node->element = tcb;
    push(runqueue, node);
    struct queue_node* q = runqueue->start;
    runqueue->start = runqueue->end;
    raise( SIGUSR1 );

    /* Restore the original stack and handler */
    sigaltstack( &oldStack, 0 );
    sigaction( SIGUSR1, &oldHandler, 0 );

    runqueue->start = q;

    return tcb->id;
}

void ult_yield() {
    if(runqueue->start == NULL) {
        exit(1);
        //TODO clean up
    }
    //We're the only one, so no need to schedule
    if(runqueue->start == runqueue->end && blockedqueue->start == NULL && runqueue->start != NULL) {
        return;
    }
    else {
        if (setjmp(runqueue->start->element->buf))
                 return;
        else {
            struct queue_node* tmp = pop(runqueue);
            push(runqueue, tmp);
            longjmp(backtonormal, 1);
        }
    }
}

int ult_read(int fd, void *buf, int count) {
        if (setjmp(runqueue->start->element->buf)) {
            return read(fd, buf, count);
        }
        else {
            struct queue_node* tmp = pop(runqueue);
            tmp->element->waitingfor = fd;
            tmp->element->waitingtype = 1;
            push(blockedqueue, tmp);
            longjmp(backtonormal, 1);
        }
    return -1;
}
void ult_init(ult_func f) {
    runqueue = initqueue();
    blockedqueue = initqueue();
    zombiequeue = initqueue();
    ult_spawn(f);
    while(1) {
        if(setjmp(backtonormal))
            continue;
        else {
            schedulerThread();
            if(runqueue->start == NULL)
                return; //TODO clean up
            longjmp(runqueue->start->element->buf, 1);
        }
    }
}


void threadA()
{
    int fd;
    char *inpt = "/dev/random";
    char buf[8];
    fd = open(inpt, O_RDONLY, O_NONBLOCK);

    if (fd == -1)
    {
        perror("open()");
    }
    while(1)
    {
        memset(buf, 0, 8);
        ult_read(fd, &buf, sizeof(buf));
    }
}

void threadB()
{
    char input[512] = {0};
    while(1)
    {
        memset(input, 0, 512);
        ult_read(STDIN_FILENO, &input, 512);
        if(strcmp(input, "stats\n") == 0)
        {
           //print stats
            continue;
        }
    }
}

void myInit()
{
    int status;
    ult_spawn(&threadA);
    ult_spawn(&threadB);
    while(1) {
        ult_yield();
    }
}
int  main() {
    ult_init(&myInit);
    return 0;
}

问题出现在ult_read。一旦这个函数被调用,它会先跳回调度器。调度器检查数据是否可用(防止整个进程阻塞),一旦有数据可读,就跳回函数。现在,当函数 returns 出现分段错误时。 Valgrind 告诉我:

Jump to the invalid address stated on the next line
==20408==    at 0x0: ???
==20408==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Ult_yield 虽然使用相同的技术,但效果很好。我检查了这个问题 SetJmp/LongJmp: Why is this throwing a segfault?,但我认为这是一个不同的问题,因为我正在为每个 "thread" 创建一个单独的堆栈。 谁能给我解释一下,问题是什么以及如何解决?

setjmp 被调用一次,但 return 被调用两次。问题是把它放在信号处理程序中。第二个return(在longjmp之后)没地方去return了。

修改您的代码,使 setjmp 处于适合跳远的环境中。

请注意,setjmp 保存的信号处理程序环境不是正常运行时。您不能调用许多常用函数。我不确定 longjmp 之后的结果环境是什么。

可以尝试的一件事是在您的主例程中调用 setjmp 并且只在您的信号处理程序中设置一个标志。检查标志并设置时调用 longjmp

我看不出你的代码有什么明显的错误,但它不是 MVCE,所以很难说——你的调度程序或你的 push 和 pop 函数可能有问题.

似乎有问题的一件事是 ult_yieldult_read 中的测试:

if(runqueue->start == NULL && blockedqueue->start == NULL) ...

这两个应该是:

if (runqueue->start == NULL) {
    printf("Scheduler queue corrupted");
    abort(); }

因为调用这些函数时,runqueue->start 必须指向当前线程的 tcb/queue 节点。

您的 valgrind 错误看起来像是在尝试 longjmp 通过无效的 jmp_buf,因此请尝试回溯以查看它来自何处以及如何进入该状态。

您可能还应该在 ult_spawn 末尾取消设置信号处理程序(或使用 SA_RESETHAND),以免来自某处的虚假 SIGUSR1 导致事物损坏。