当我仅从静态构建程序使用共享内存时出现分段错误

Segmentation fault appears when I use shared memory only from statically build program

当我使用 --static 选项构建程序并调用 shm_open() 函数时,出现分段错误。没有 -static 选项,一切都像一个魅力。

有人知道为什么吗?

下面我引用了一个大项目的调试信息和部分截断的源代码。

你可以comment/uncomment

#STATIC = -static

来自 Makefile 的字符串以重现错误。

$ gdb --args ./debug/example sample017

GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.5) 7.11.1
Reading symbols from ./debug/example...done.
(gdb) run
Starting program: ./example sample017

Program received signal SIGSEGV, Segmentation fault.
0x0000000000000000 in ?? ()
(gdb) backtrace
#0  0x0000000000000000 in ?? ()
#1  0x000000000049a2e3 in __shm_directory (len=0x7fffffffdca8) at ../sysdeps/unix/sysv/linux/shm-directory.c:124
#2  0x0000000000499ff3 in shm_open ()
#3  0x0000000000499d55 in read_shm (memory=0x6d1be0, share_name=0x6d1d20 "sample017") at main.c:51
#4  0x0000000000499efe in read_memory (memory=0x6d1be0, argc=0x7fffffffde0c, argv=0x7fffffffdf68) at main.c:98
#5  0x0000000000499f70 in main (argc=2, argv=0x7fffffffdf68) at main.c:112

用于静态链接的 Strace

$strace ./debug/example sample017

execve("./debug/example", ["./debug/example", "sample017"], [/* 64 vars */]) = 0
uname({sysname="Linux", nodename="Lubuntu", ...}) = 0
brk(NULL)                               = 0xc5e000
brk(0xc5f1c0)                           = 0xc5f1c0
arch_prctl(ARCH_SET_FS, 0xc5e880)       = 0
readlink("/proc/self/exe", "/home/52034/111/debug/example", 4096) = 29
brk(0xc801c0)                           = 0xc801c0
brk(0xc81000)                           = 0xc81000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0} ---
+++ killed by SIGSEGV +++
Segmentation fault

用于动态链接的 Strace:

$ strace ./debug/example sample017

execve("./debug/example", ["./debug/example", "sample017"], [/* 64 vars */]) = 0
brk(NULL)                               = 0x740000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f78fa50e000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=135773, ...}) = 0
mmap(NULL, 135773, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f78fa4ec000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "7ELF[=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=]>[=13=][=13=][=13=][=13=]P\t[=13=][=13=][=13=][=13=][=13=]"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1868984, ...}) = 0
mmap(NULL, 3971488, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f78f9f21000
mprotect(0x7f78fa0e1000, 2097152, PROT_NONE) = 0
mmap(0x7f78fa2e1000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c0000) = 0x7f78fa2e1000
mmap(0x7f78fa2e7000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f78fa2e7000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "7ELF[=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=]>[=13=][=13=][=13=][=13=][=13=]![=13=][=13=][=13=][=13=][=13=][=13=]"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=31712, ...}) = 0
mmap(NULL, 2128832, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f78f9d19000
mprotect(0x7f78f9d20000, 2093056, PROT_NONE) = 0
mmap(0x7f78f9f1f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f78f9f1f000
close(3)                                = 0
... end etc...

编译器:

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.5' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.5) 

Linux:

$ cat /etc/issue
Ubuntu 16.04.3 LTS \n \l

$猫main.c

#define _GNU_SOURCE

#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>

// String library
#include <string.h>

// Shared memory
#include <sys/mman.h>
#include <sys/stat.h> /* For mode constants */
#include <fcntl.h>    /* For O_* constants */

typedef struct {
    size_t length;

    int fd;

    float *array;

    #define MAX_NAME_LENGTH 255
    char name[MAX_NAME_LENGTH];

} Memory;

static Memory *new_memory(void)
{
    Memory *structure = (Memory*)malloc(sizeof(Memory));
    if(structure == NULL){
        exit(EXIT_FAILURE);
    }
    memset(structure, 0, sizeof(Memory));

    return structure;
}

static void detach_memory(Memory *memory)
{
    munmap(memory->array,(memory->length)*sizeof(float));
    free(memory);
}

static bool read_shm(
    Memory *memory,
    char *share_name
)
{
    if ((memory->fd = shm_open(share_name, O_RDONLY|O_EXCL, 0400)) == -1){
        return(false);
    }

    memcpy(memory->name,share_name,strlen(share_name)+1);

    struct stat info;

    if (fstat(memory->fd, &info) == -1 || errno == EBADF){
        return(false);
    }

    memory->length = (size_t)info.st_size / sizeof(float);

    float *array = mmap(0, (size_t)info.st_size, PROT_READ, MAP_SHARED,memory->fd, 0);
    if (array == (float*)-1){
        exit(EXIT_FAILURE);
    }

    if (memory->length < 2){
        exit(EXIT_FAILURE);
    }

    memory->array = array;

    return(true);
}

void read_memory(
    Memory *memory,
    int *argc,
    char **argv
)
{
    if (*argc == 1){
        exit(EXIT_FAILURE);
    }

    char *path = strdup(argv[1]);
    const char * const file = basename(path);

    char *share_name = strdup(file);
    char *ext = strchr(share_name, '.');
    if(ext){
        *ext = '[=16=]';
    }

    if(!(read_shm(memory,share_name))){
        free(path);
        free(share_name);
        exit(EXIT_FAILURE);
    }

    free(path);
    free(share_name);
}

int main(int argc, char **argv)
{
    Memory *memory = new_memory();

    read_memory(memory,&argc,argv);

    for(size_t i = 0; i < memory->length;i++){
        printf("memory: %.5f\n",memory->array[i]);
    }

    detach_memory(memory);

    return(EXIT_SUCCESS);
}

$cat 生成文件

CC ?= cc

CFLAGS += -pipe -std=c11
CFLAGS += -fbuiltin
LDFLAGS += -lm -lrt -lpthread
EXE = example
ARGUMENTS ?=
#STATIC = -static
STRIP = -s
WFLAGS += -Wall -Wextra -Wpedantic -Wshadow
WFLAGS += -Wconversion -Wsign-conversion -Winit-self -Wunreachable-code -Wformat-y2k
WFLAGS += -Wformat-nonliteral -Wformat-security -Wmissing-include-dirs
WFLAGS += -Wswitch-default -Wtrigraphs -Wstrict-overflow=5
WFLAGS += -Wfloat-equal -Wundef -Wshadow
WFLAGS += -Wbad-function-cast -Wcast-qual -Wcast-align
WFLAGS += -Wwrite-strings
WFLAGS += -Winline
ifneq ($(CC), clang)
WFLAGS += -Wlogical-op
CFLAGS += -finline-functions
CFLAGS += -flto
endif

MAKEFLAGS += --no-print-directory
CONFIG += ordered

#
# Project files
#
SRCS = $(wildcard *.c)
# Exclude a file
OBJS = $(SRCS:.c=.o)

#
# Debug build settings
#
DBGDIR = debug
DBGEXE = $(DBGDIR)/$(EXE)
DBGOBJS = $(addprefix $(DBGDIR)/, $(OBJS))
DBGLIBPATH = ../../logger/lib/release
DBGCFLAGS += -g -ggdb -ggdb1 -ggdb2 -ggdb3 -O0 -DDEBUG
DBGLDFLAGS += -lm -lc

.PHONY: all clean debug prep release remake clang openmp one liblogger

# Default build
all: debug

#
# Debug rules
#
debug: liblogger $(DBGEXE)

$(DBGEXE): $(DBGOBJS)
    $(CC) $(CFLAGS) $(DBGCFLAGS) -L$(DBGLIBPATH) $(WFLAGS) $(STATIC) $(DBGLDFLAGS) -o $(DBGEXE) $^ $(LDFLAGS)
    @echo "$(DBGEXE) linked."

-include $(DBGDEP)

$(DBGDIR)/%.o: %.c
    @mkdir -p $(DBGDIR)
    @$(CC) -MM $(CFLAGS) $(DBGCFLAGS) $(WFLAGS) $< | sed '1s/^/$$\(DBGDIR\)\//' > $(@D)/$(*F).d
    $(CC) -c $(CFLAGS) $(DBGCFLAGS) $(WFLAGS) -o $@ $<
    @echo $<" compiled."

clean:
    @rm -rf *.out.* doc $(DBGDEP) $(DBGEXE) $(DBGOBJS) $(DBGDIR)/*.d
    @test -d $(DBGDIR) && rm -d $(DBGDIR) || true

顺便说一句,它在旧平台上运行良好:

Linux:

$cat /etc/issue
Ubuntu 13.10 \n \l

编译器:

$gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.8/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro 4.8.1-10ubuntu9' --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.8 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.8.1 (Ubuntu/Linaro 4.8.1-10ubuntu9) 

您发现了静态 librt.alibpthread.a 交互方式中的错误。 (librt.a:shm_open()调用它,在libpthread.a中定义。他们的动态版本link就好了。)

具体来说,libpthread.a 确实实现了 const char *__shm_directory(size_t *len);,但无论出于何种原因,它都会以某种方式被误 link,从而导致段错误。我怀疑某种弱符号阴影,但还没有进一步调查。

修复方法是自己实现函数。这是一个简单的函数,returns 目录的路径,包括最后一个斜杠,共享内存文件将在其中创建。在 Linux 系统上,这应该始终是 tmpfs 安装在 /dev/shm/.

我建议创建一个新的 C 源文件,也许 shm_open_fix.c:

#include <stdlib.h>
#include <string.h>

/* This avoids a segfault when code using shm_open()
   is compiled statically. (For some reason, compiling
   the code statically causes the __shm_directory()
   function calls in librt.a to not reach the implementation
   in libpthread.a. Implementing the function ourselves
   fixes this issue.)
*/

#ifndef  SHM_MOUNT
#define  SHM_MOUNT "/dev/shm/"
#endif
static const char  shm_mount[] = SHM_MOUNT;

const char *__shm_directory(size_t *len)
{
    if (len)
        *len = strlen(shm_mount);
    return shm_mount;
}

编译并 link 将其编译成最终的二进制文件。这应该解决段错误。此外,生成的二进制文件不再依赖于 libpthread.a,除非您在代码的其他地方使用 pthread 函数。

我在 Ubuntu 16.04 和 GCC 5.4

中遇到了同样的问题

我在__shm_directory.__shm_directory.

中静态 link 一个可执行文件并在同一点发生段错误

我已经调查过了,linked 可执行文件中似乎缺少定义为弱符号的函数 __pthread_once。因此,调用了地址 0x0。

一位同事用 Ubuntu 14.04 和 GCC 4.8.4 编译了它,并且运行良好。

这看起来肯定像是 GCC 或 Glibc 中的错误

编辑:

在编译器命令行中添加 -Wl,--whole-archive -lpthread -Wl,--no-whole-archive 就可以了。它强制 linker 包含来自 libpthread 的所有符号。否则,linker 会留下弱引用,这是动态 linking 的预期行为(符号在运行时解析),但它对静态库无效。无论如何,尽管有解决方法,我仍然认为这种行为是一个错误。