ffmpeg avcodec lib 在 ff_hevc_sao_edge_filter_32_8_avx2() 上崩溃

ffmpeg avcodec lib crashed on ff_hevc_sao_edge_filter_32_8_avx2()

我正在使用 avcodec 解码 linux 上的一些 hevc 剪辑,avcodec 库是使用命令从源包 ffmpeg-4.3.1 构建的:

configure --prefix=/mnt/projects/ffmpeg-4.3.1/build --disable-static --enable-shared --disable-stripping && make

我的应用程序初始化编解码器和编解码器上下文如下:

AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H265); 
AVCodecContext* avContext = avcodec_alloc_context3(codec);
avcodec_open2(_private->_avContext, codec, NULL);

我的开发环境:

oap-dev@oap-dev:ffmpeg-4.3.1$ lsb_release -d
Description:        Ubuntu 18.04.5 LTS
oap-dev@oap-dev:ffmpeg-4.3.1$ uname -a
Linux oap-dev 4.15.0-163-generic #171-Ubuntu SMP Fri Nov 5 11:55:11 UTC 2021 x86_64 x86_64 
x86_64 GNU/Linux

从相同源代码构建的 ffplay 可以无错误地播放剪辑,但我的应用程序 运行 进入以下段错误:

#0  ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
337        HEVC_SAO_EDGE_FILTER 32, 1, a
[Current thread is 1 (Thread 0x7f26e2814700 (LWP 99189))]
(gdb) bt
#0  ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
#1  0x00007f26f9425853 in sao_filter_CTB (s=s@entry=0x7f26dc0069c0, x=x@entry=0, y=y@entry=0) at libavcodec/hevc_filter.c:436
#2  0x00007f26f9427e04 in ff_hevc_hls_filter (s=0x7f26dc0069c0, x=x@entry=64, y=y@entry=64, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:861
#3  0x00007f26f9428fa5 in ff_hevc_hls_filters (s=s@entry=0x7f26dc0069c0, x_ctb=x_ctb@entry=128, y_ctb=y_ctb@entry=128, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:883
#4  0x00007f26f94405ae in hls_decode_entry (avctxt=<optimized out>, isFilterThread=<optimized out>) at libavcodec/hevcdec.c:2462
#5  0x00007f26f970dbf5 in avcodec_default_execute (c=0x7f26dc006180, func=0x7f26f9440300 <hls_decode_entry>, arg=<optimized out>, ret=<optimized out>, count=<optimized out>, size=4) at libavcodec/utils.c:446
#6  0x00007f26f9444c18 in hls_slice_data (s=0x7f26dc0069c0) at libavcodec/hevcdec.c:2480
#7  decode_nal_unit (nal=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3015
#8  decode_nal_units (length=<optimized out>, buf=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3088
#9  hevc_decode_frame (avctx=<optimized out>, data=<optimized out>, got_output=<optimized out>, avpkt=<optimized out>) at libavcodec/hevcdec.c:3226
#10 0x00007f26f96148a3 in frame_worker_thread (arg=0x7f26dc005dc0) at libavcodec/pthread_frame.c:201
#11 0x00007f26fbbd26db in start_thread (arg=0x7f26e2814700) at pthread_create.c:463
#12 0x00007f26f7d5971f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

更多信息: 如果我添加选项“--disable-avx2”并重建 avcodec 库,我的应用程序可以成功播放剪辑。

为什么启用 avx2 会出现段错误问题?感谢任何帮助。

==========================================
根据 Peter Corde 的回复更新:

(gdb) bt
#0  0x00007f174d91ce3e in ff_hevc_add_residual_32_8_avx2 () from /mnt/projects/ffmpeg-4.3.1/build/lib/libavcodec.so.58
#1  0x00007f174d46c401 in hls_transform_tree (s=0x7f17300069c0, x0=0, y0=960, xBase=2880, yBase=0, cb_xBase=0, cb_yBase=240, log2_cb_size=914341248, log2_trafo_size=1340287338, trafo_depth=51, blk_idx=3, base_cbf_cb=0x5f15, base_cbf_cr=0x0) at libavcodec/hevcdec.c:1122
#2  0x00007f174d46f993 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2246
#3  0x00007f174d46f578 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2312
#4  0x00007f174d47157f in hls_decode_entry (avctxt=0x7f17481a0010, isFilterThread=0x7f1730009a00) at libavcodec/hevcdec.c:2453
#5  0x00007f174d73ebf5 in avcodec_default_execute (c=0x7f17481a0010, func=0x7f1730009a00, arg=0x3c0, ret=0xb40, count=8, size=0) at libavcodec/utils.c:446
#6  0x00007f174d475c18 in hevc_decode_frame (avctx=0x7f17481a0010, data=0x7f1730009a00, got_output=0x3c0, avpkt=0x7f1730009880) at libavcodec/hevcdec.c:2480
#7  0x00007f174d6458a3 in frame_worker_thread (arg=0x7f1730005dc0) at libavcodec/pthread_frame.c:201
#8  0x00007f174fc036db in start_thread (arg=0x7f17367fc700) at pthread_create.c:463
#9  0x00007f174bd8a71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disas
Dump of assembler code for function ff_hevc_add_residual_32_8_avx2:
   0x00007f174d91ce30 <+0>: vpxor  %ymm0,%ymm0,%ymm0
   0x00007f174d91ce34 <+4>: lea    (%rdx,%rdx,2),%rcx
   0x00007f174d91ce38 <+8>: mov    [=15=]x8,%r8d
=> 0x00007f174d91ce3e <+14>:    vmovdqa (%rdi),%ymm1
   0x00007f174d91ce42 <+18>:    vmovdqa %ymm1,%ymm2
   0x00007f174d91ce46 <+22>:    vpunpcklbw %ymm0,%ymm1,%ymm1
   0x00007f174d91ce4a <+26>:    vpunpckhbw %ymm0,%ymm2,%ymm2
   0x00007f174d91ce4e <+30>:    vmovdqa (%rsi),%xmm5
   0x00007f174d91ce52 <+34>:    vmovdqa 0x10(%rsi),%xmm6
   0x00007f174d91ce57 <+39>:    vinserti128 [=15=]x1,0x20(%rsi),%ymm5,%ymm5
   0x00007f174d91ce5e <+46>:    vinserti128 [=15=]x1,0x30(%rsi),%ymm6,%ymm6
   0x00007f174d91ce65 <+53>:    vpaddsw %ymm5,%ymm1,%ymm1
   0x00007f174d91ce69 <+57>:    vpaddsw %ymm6,%ymm2,%ymm2
   0x00007f174d91ce6d <+61>:    vmovdqa (%rdi,%rdx,1),%ymm3
   0x00007f174d91ce72 <+66>:    vmovdqa %ymm3,%ymm4
   0x00007f174d91ce76 <+70>:    vpunpcklbw %ymm0,%ymm3,%ymm3
   0x00007f174d91ce7a <+74>:    vpunpckhbw %ymm0,%ymm4,%ymm4
   0x00007f174d91ce7e <+78>:    vmovdqa 0x40(%rsi),%xmm5
   0x00007f174d91ce83 <+83>:    vmovdqa 0x50(%rsi),%xmm6
   0x00007f174d91ce88 <+88>:    vinserti128 [=15=]x1,0x60(%rsi),%ymm5,%ymm5
   0x00007f174d91ce8f <+95>:    vinserti128 [=15=]x1,0x70(%rsi),%ymm6,%ymm6
   0x00007f174d91ce96 <+102>:   vpaddsw %ymm5,%ymm3,%ymm3
   0x00007f174d91ce9a <+106>:   vpaddsw %ymm6,%ymm4,%ymm4
   0x00007f174d91ce9e <+110>:   vpackuswb %ymm2,%ymm1,%ymm1
   0x00007f174d91cea2 <+114>:   vpackuswb %ymm4,%ymm3,%ymm3
   0x00007f174d91cea6 <+118>:   vmovdqa %ymm1,(%rdi)
   0x00007f174d91ceaa <+122>:   vmovdqa %ymm3,(%rdi,%rdx,1)
   0x00007f174d91ceaf <+127>:   vmovdqa (%rdi,%rdx,2),%ymm1
   0x00007f174d91ceb4 <+132>:   vmovdqa %ymm1,%ymm2
   0x00007f174d91ceb8 <+136>:   vpunpcklbw %ymm0,%ymm1,%ymm1
   0x00007f174d91cebc <+140>:   vpunpckhbw %ymm0,%ymm2,%ymm2
   0x00007f174d91cec0 <+144>:   vmovdqa 0x80(%rsi),%xmm5
   0x00007f174d91cec8 <+152>:   vmovdqa 0x90(%rsi),%xmm6
   0x00007f174d91ced0 <+160>:   vinserti128 [=15=]x1,0xa0(%rsi),%ymm5,%ymm5
   0x00007f174d91ceda <+170>:   vinserti128 [=15=]x1,0xb0(%rsi),%ymm6,%ymm6
   0x00007f174d91cee4 <+180>:   vpaddsw %ymm5,%ymm1,%ymm1
   0x00007f174d91cee8 <+184>:   vpaddsw %ymm6,%ymm2,%ymm2
   0x00007f174d91ceec <+188>:   vmovdqa (%rdi,%rcx,1),%ymm3
   0x00007f174d91cef1 <+193>:   vmovdqa %ymm3,%ymm4
   0x00007f174d91cef5 <+197>:   vpunpcklbw %ymm0,%ymm3,%ymm3
   0x00007f174d91cef9 <+201>:   vpunpckhbw %ymm0,%ymm4,%ymm4
---Type <return> to continue, or q <return> to quit---
   0x00007f174d91cefd <+205>:   vmovdqa 0xc0(%rsi),%xmm5
   0x00007f174d91cf05 <+213>:   vmovdqa 0xd0(%rsi),%xmm6
   0x00007f174d91cf0d <+221>:   vinserti128 [=15=]x1,0xe0(%rsi),%ymm5,%ymm5
   0x00007f174d91cf17 <+231>:   vinserti128 [=15=]x1,0xf0(%rsi),%ymm6,%ymm6
   0x00007f174d91cf21 <+241>:   vpaddsw %ymm5,%ymm3,%ymm3
   0x00007f174d91cf25 <+245>:   vpaddsw %ymm6,%ymm4,%ymm4
   0x00007f174d91cf29 <+249>:   vpackuswb %ymm2,%ymm1,%ymm1
   0x00007f174d91cf2d <+253>:   vpackuswb %ymm4,%ymm3,%ymm3
   0x00007f174d91cf31 <+257>:   vmovdqa %ymm1,(%rdi,%rdx,2)
   0x00007f174d91cf36 <+262>:   vmovdqa %ymm3,(%rdi,%rcx,1)
   0x00007f174d91cf3b <+267>:   add    [=15=]x100,%rsi
   0x00007f174d91cf42 <+274>:   lea    (%rdi,%rdx,4),%rdi
   0x00007f174d91cf46 <+278>:   dec    %r8d
   0x00007f174d91cf49 <+281>:   jg     0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
   0x00007f174d91cf4f <+287>:   vzeroupper 
   0x00007f174d91cf52 <+290>:   retq   
End of assembler dump.

(gdb) info reg
rax            0x7f174d91ce30   139738062376496
rbx            0x7f17300069c0   139737566308800
rcx            0xb40    2880
rdx            0x3c0    960
rsi            0x7f1730009a00   139737566321152
rdi            0x7f17481a0010   139737970638864
rbp            0x7f1730009880   0x7f1730009880
rsp            0x7f17367fb9b8   0x7f17367fb9b8
r8             0x8  8
r9             0x0  0
r10            0xa  10
r11            0xa  10
r12            0x0  0
r13            0x2  2
r14            0x0  0
r15            0x0  0
rip            0x7f174d91ce3e   0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
eflags         0x10206  [ PF IF RF ]
cs             0x33 51
ss             0x2b 43
ds             0x0  0
es             0x0  0
fs             0x0  0
gs             0x0  0

我解码如下:

while (!exit) {
    avcodec_send_packet(AVCodecConext * ctx, AVPacket* pkt);
    // check return value ...

    avcodec_receive_frame(AVCodecConext * ctx, AVFrame* frame);
    //check return value ...
}

解码前,AVCodecContext的public成员,get_buffer2,是一个回调函数,调用avcodec_receive_frame()时会被调用。我将以下功能分配给 get_buffer2:

int get_frame_buffer(struct AVCodecContext *c, AVFrame *frame, int flags) {
    
    frame->data[0] = posix_memalign(32, frmaeSize); // previously, I used operator new,which will trigger coredump when decoding HEVC
    
    // ...       
}