由 std::unique_ptr.release() 引起的间歇性 SIGSEGV?

Intermittent SIGSEGV caused by std::unique_ptr.release()?

我在用 C++ 编写的游戏中遇到了一个问题,在这个游戏中,我遇到了一个段错误,这种错误在罕见且看似随机的时间间隔内发生。下面是我的代码的摘录,是演示问题所需的最低限度。

它是使用以下命令行编译的:

g++ -g3 -std=c++17 -Wall -Wextra -Wpedantic -Weffc++ -o ex ex.cc -lncurses -ltinfo

在 Ubuntu 20.04 下在 WSL 上在 Windows 11 使用 g++ 10.3.0。 (您还需要 ncurses 库)

#include <chrono>
#include <memory>
#include <csignal>
#include <cstdlib>
#include <map>
#include <ncurses.h>

struct Command;

class View {
public:
    explicit View();
    ~View();
    void drawStats(int);
    Command* handleEvents();
private:
    std::map<int, std::unique_ptr<Command>>  keymap_;
};

class Game {
public:
    Game();
    void delta(const int);
    static void end(int);
    void render(View&);
    void run(View&);
    void update();
private:
    int delta_;
    std::unique_ptr<Command> nextCommand_;
};

struct Command {
    virtual ~Command() {}
    virtual void execute(Game&)=0;
};

struct MoveCommand : public Command {
    explicit MoveCommand(int);
    void execute(Game&) override;
private:
    int delta_;
};

View::View() : keymap_{
} {
    keymap_['['] = std::make_unique<MoveCommand>(-1);;
    keymap_[' '] = std::make_unique<MoveCommand>(0);
    keymap_[']'] = std::make_unique<MoveCommand>(1);

    initscr();
    cbreak();
    noecho();
    nonl();
    nodelay(stdscr, TRUE);
    intrflush(stdscr, FALSE);
    keypad(stdscr, TRUE);
    scrollok(stdscr, TRUE);
    curs_set(0);
    clear();
}

View::~View() {
    curs_set(1);
    endwin();
}

void View::drawStats(int delta) {
    mvprintw(0, 0, "          ");;
    mvprintw(0, 0, "Delta = %d", delta);
}

Command* View::handleEvents() {
    int c;

    if ((c = getch()) != ERR) {
        auto command = keymap_.find(c);
        if (command != keymap_.end()) {
            return command->second.get();
        }
    }

    return nullptr;
}

constexpr static double TICK = 72000000;

volatile bool endflag = false;

Game::Game() : delta_{0}, nextCommand_{} {
    struct sigaction act;
    act.sa_handler = Game::end;
    sigemptyset (&act.sa_mask);
    act.sa_flags = 0;
    sigaction(SIGHUP, &act, NULL);
    sigaction(SIGINT, &act, NULL);
    sigaction(SIGTERM, &act, NULL);
}

void Game::delta(const int delta) {
    delta_ = delta;
}

void Game::end(int sig) {
    switch (sig) {
        case SIGINT:
        case SIGTERM:
            endflag = true;
            break;
        case SIGHUP:
            exit(EXIT_FAILURE);
            break;
        default:
            break;
    }
}

void Game::render(View& view) {
    view.drawStats(delta_);
}

void Game::run(View& view) {
    std::chrono::steady_clock clock;
    auto previous = clock.now();
    double lag = 0.0;

    while (!endflag) {
        auto current = clock.now();
        auto elapsed = current - previous;
        previous = current;
        lag += elapsed.count();

        auto command = view.handleEvents();
        if (command) {
            nextCommand_.reset(command);
        }

        while (lag >= TICK) {
            lag -= TICK;
            update();
        }

        render(view);
    }
}

void Game::update() {
    if (nextCommand_) {
        nextCommand_->execute(*this);
        nextCommand_.release();
    }
}

MoveCommand::MoveCommand(int delta) : delta_{delta} {
}

void MoveCommand::execute(Game& game) {
    game.delta(delta_);
}

int main() {
    Game game;
    View view;

    game.run(view);

    return EXIT_SUCCESS;
}

当你运行这个程序时,随机按键([]SPACE)。这可能需要一两分钟,但最终你会遇到段错误。当我 运行 这个 gdb 下的程序,我在段错误后得到以下内容:

 Program received signal SIGSEGV, Segmentation fault.
                                                              
0x0000000000000000 in ?? ()
(gdb) where
#0  0x0000000000000000 in ?? ()
#1  0x0000555555556ae0 in Game::update (this=0x7fffffffd170) at ex.cc:149
#2  0x0000555555556a65 in Game::run (this=0x7fffffffd170, view=...)
    at ex.cc:140
#3  0x0000555555556ba1 in main () at ex.cc:165

第 149 行是 nextCommand_.release();。我认为正在发生的是 在 nextCommand_ 中的 Command* 仍在发布过程中,下一次更新正在发生某种竞争条件。所以 if(nextCommand_) 行成功,但当我们到达 nextCommand_->execute(*this); 时它已经消失,因此在空指针上调用 execute() 会导致段错误。是这样吗?如果是这样,我应该怎么做才能使执行命令并释放它的动作成为原子操作?如果不是,可能是什么问题?

        auto command = view.handleEvents();
        if (command) {
            nextCommand_.reset(command);
        }

这个从 unique_ptr 获取原始指针并将其分配给其他 unique_ptr,但将原始 unique_ptr 保留在映射中,尽管它是空的。因此,您最终会得到 use-after-free,Address Sanitizer 会准确地告诉您:

==25993==ERROR: AddressSanitizer: heap-use-after-free on address 0x602000000010 at pc 0x56026e8d74fe bp 0x7fff48410330 sp 0
x7fff48410328
READ of size 8 at 0x602000000010 thread T0
    #0 0x56026e8d74fd in Game::update() /home/alagner/ncu/file.cc:149
    #1 0x56026e8d73f1 in Game::run(View&) /home/alagner/ncu/file.cc:140
    #2 0x56026e8d76ee in main /home/alagner/ncu/file.cc:165
    #3 0x7f32fa53ad09 in __libc_start_main ../csu/libc-start.c:308
    #4 0x56026e8d6329 in _start (/home/alagner/ncu/a.out+0x2329)

0x602000000010 is located 0 bytes inside of 16-byte region [0x602000000010,0x602000000020)
freed by thread T0 here:
    #0 0x7f32fa9c5467 in operator delete(void*, unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:172
    #1 0x56026e8d90aa in MoveCommand::~MoveCommand() /home/alagner/ncu/file.cc:38
    #2 0x56026e8d95d0 in std::default_delete<Command>::operator()(Command*) const /usr/include/c++/10/bits/unique_ptr.h:85
    #3 0x56026e8d966f in std::__uniq_ptr_impl<Command, std::default_delete<Command> >::reset(Command*) /usr/include/c++/10/bits/unique_ptr.h:182
    #4 0x56026e8d88e0 in std::unique_ptr<Command, std::default_delete<Command> >::reset(Command*) /usr/include/c++/10/bits/unique_ptr.h:456
    #5 0x56026e8d73b4 in Game::run(View&) /home/alagner/ncu/file.cc:135
    #6 0x56026e8d76ee in main /home/alagner/ncu/file.cc:165

previously allocated by thread T0 here:
    #0 0x7f32fa9c4647 in operator new(unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:99
    #1 0x56026e8d83ba in std::_MakeUniq<MoveCommand>::__single_object std::make_unique<MoveCommand, int>(int&&) /usr/include/c++/10/bits/unique_ptr.h:962
    #2 0x56026e8d65c5 in View::View() /home/alagner/ncu/file.cc:47
    #3 0x56026e8d76db in main /home/alagner/ncu/file.cc:163
    #4 0x7f32fa53ad09 in __libc_start_main ../csu/libc-start.c:308
```