通过 move_pages() 查询失败

Fail to query via move_pages()

#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
    const constexpr uint64_t size = 16lu * 1024 * 1024;
    const constexpr uint32_t nPages = size / (4lu * 1024 * 1024);
    int32_t status[nPages];
    std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
    void* pages[nPages];

    auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
    void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    if (ptr == MAP_FAILED) {
        if (fd > 0) close(fd);
        throw "failed to map hugepages";
    }

    for (uint32_t i = 0; i < nPages; i++) {
        pages[i] = (char*)ptr + 4 * 1024 * 1024;
    }

    if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
        std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
    }
    else {
            for (uint32_t i = 0; i < nPages; i++) {
            std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
        }
    }
    munmap(ptr, size);
    close(fd);
}

并打印:

page # 0 locates at numa node -2
page # 1 locates at numa node -2
page # 2 locates at numa node -2
page # 3 locates at numa node -2

根据 manpage,它指出:

nodes is an array of integers that specify the desired location for each page.
Each element in the array is a node number. nodes can also be NULL, in which 
case move_pages() does not move any pages but instead will return the node where 
each page currently resides, in the status array. Obtaining the status of each 
page may be necessary to determine pages that need to be moved.

为什么查询return成功却打印负值?我的机器只有 2 个 NUMA -- 0 和 1。

内核版本:3.10.0-862.2.3.el7.x86_64

这里是大页面的版本:

#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
        const int32_t dst_node = strtoul(argv[1], nullptr, 10);
        const constexpr uint64_t size = 4lu * 1024 * 1024;
        const constexpr uint64_t pageSize = 2lu * 1024 * 1024;
        const constexpr uint32_t nPages = size / pageSize;
        int32_t status[nPages];
        std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
        void* pages[nPages];
        int32_t dst_nodes[nPages];
        void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB, -1, 0);

        if (ptr == MAP_FAILED) {
                throw "failed to map hugepages";
        }
        memset(ptr, 0x41, nPages*pageSize);
        for (uint32_t i = 0; i < nPages; i++) {
                pages[i] = &((char*)ptr)[i*pageSize];
                dst_nodes[i] = dst_node;
        }

        std::cout << "Before moving" << std::endl;

        if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
            std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
        }
        else {
                for (uint32_t i = 0; i < nPages; i++) {
                        std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
                }
        }

        // real move
        if (0 != move_pages(0, nPages, pages, dst_nodes, status, MPOL_MF_MOVE_ALL)) {
                std::cout << "failed to move pages because " << strerror(errno) << std::endl;
                exit(-1);
        }

        const constexpr uint64_t smallPageSize = 4lu * 1024;
        const constexpr uint32_t nSmallPages = size / smallPageSize;
        void* smallPages[nSmallPages];
        int32_t smallStatus[nSmallPages] = {std::numeric_limits<int32_t>::min()};
        for (uint32_t i = 0; i < nSmallPages; i++) {
                smallPages[i] = &((char*)ptr)[i*smallPageSize];
        }


        std::cout << "after moving" << std::endl;
        if (0 != move_pages(0, nSmallPages, smallPages, nullptr, smallStatus, 0)) {
            std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
        }
        else {
                for (uint32_t i = 0; i < nSmallPages; i++) {
                        std::cout << "page # " << i << " locates at numa node " << smallStatus[i] << std::endl;
                }
        }

}

有趣的是,move_pages() 似乎理解大页面,因为在移动大页面之后,我根据小页面大小进行查询,并填充预期的 NUMA ID。

您对 shm_open 和 mmap 的使用可能无法获得您想要的大页面。

move_pages 系统调用(和 libnuma 包装器)在 x86_64 的 4096 字节标准页面上工作。

并且您以错误的方式使用了 move_pages,第三个参数 "pages" 不正确。它不应该是指向内存的指针;但是指向本身将包含 nPages 指针的数组的指针:

http://man7.org/linux/man-pages/man2/move_pages.2.html

  long move_pages(int pid, unsigned long count, void **pages,
                   const int *nodes, int *status, int flags);

   pages is an array of pointers to the pages that should be moved.
   These are pointers that should be aligned to page boundaries.
   Addresses are specified as seen by the process specified by pid.

如果“页面”中没有正确的指针,您将得到 -14,根据 errno 14(来自 moreutils 包),这是 EFAULT。

//
//g++ 54546367.move_pages.cc -o 54546367.move_pages -lnuma -lrt
#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
    const constexpr uint64_t size = 256lu * 1024;// * 1024;
    const constexpr uint32_t nPages = size / (4lu * 1024);
    void * pages[nPages];
    int32_t status[nPages];
    std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());

//  auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
//  void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
    std::cout << "Ptr is " << ptr << std::endl;
    if (ptr == MAP_FAILED) {
//      if (fd > 0) close(fd);
        throw "failed to map hugepages";
    }
    memset(ptr, 0x41, nPages*4096);
    for(uint32_t i = 0; i<nPages; i++) {
        pages[i] = &((char*)ptr)[i*4096];
    }

    if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
        std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
    }
    else {
        for (uint32_t i = 0; i < nPages; i++) {
            std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
        }
    }
    munmap(ptr, size);
//  close(fd);
}

对于 NUMA 机器,它在 taskset -c 7 ./54546367.move_pages 启动时输出相同的节点,在 numactl -i all ./54546367.move_pages 时输出相同的节点 (0 1 0 1)。