C/C++ 封装 H.264 的 FLV 多路复用器未按预期工作

Question

我正在尝试制作 FLV 混合器。我开始测试从我的相机 (c920) 捕获的 h264 流。 .flv文件编码后无法正常播放

首先，我尝试在 h264 中查找 NAL，搜索模式 0x00 0x00 0x00 0x01...但我在互联网上发现有两种模式可以找到 NAL...所以我实现了对 0x00 0x00 0x01 的搜索和另一个...

在第一次测试中，我发现很少有 NAL 以四个字节开头，但是在更改代码以搜索 3 字节模式后，我发现了很多 NAL...

我在网上找到的参考资料显示使用很少的NAL类型来封装FLV文件的代码，但是更改为检测3个字节后，程序发现了很多NAL，我不知道如何流式传输它们...

一些代码:-)

我正在用 C++ 实现，所以我有 classes.

FLV 作者

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((uint8_t)(v >> 24));
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
        buffer.push_back((uint8_t)(v >> 24));
    }

};

h264 解析器：

class ParserH264 {

    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;
            
            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){
                
                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00
                
                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();
                
                chunkDetectedH264(&buffer[0],buffer.size());
                buffer.clear();
                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    virtual void chunkDetectedH264(const uint8_t* ibuffer, int size){
        
    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(&buffer[0],buffer.size());
        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }
};

最后是主要的 class:

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> lastSPS;

public:
    FLVWritter mFLVWritter;
    int fd;
    bool firstAudioWrite;
    uint32_t audioTimestamp_ms;
    uint32_t videoTimestamp_ms;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        firstAudioWrite = true;
        audioTimestamp_ms = 0;
        videoTimestamp_ms = 0;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){
            mFLVWritter.flushToFD(fd);
            close(fd);
        }
    }

    void chunkDetectedH264(const uint8_t* ibuffer, int size) {
        printf("[debug] Detected NAL chunk size: %i\n",size);
        
        if (size <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = ibuffer[0];
        uint8_t nal_type = (nal_bit & 0x1f);
        
        //0x67
        //if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_SPS) ) {
        if ( nal_type == (NAL_TYPE_SPS) ) {

            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            //store information to use when arrise PPS nal_bit, probably the next NAL detection
            lastSPS.clear();
            for(int i=0;i<size;i++)
                lastSPS.push_back(ibuffer[i]);
        }
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_PPS) ) {
        else if ( nal_type == (NAL_TYPE_PPS) ) {

            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            //must be called just after the SPS detection
            int32_t bodyLength = lastSPS.size() + size + 16;

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = 16 bytes + SPS bytes + PPS bytes
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
            mFLVWritter.writeUInt8(0x00);//avc sequence header
            mFLVWritter.writeUInt24( 0x00 );//composit time ??????????

            //flv VideoTagBody --AVCDecoderCOnfigurationRecord
            mFLVWritter.writeUInt8(0x01);//configurationversion
            mFLVWritter.writeUInt8(lastSPS[1]);//avcprofileindication
            mFLVWritter.writeUInt8(lastSPS[2]);//profilecompatibilty
            mFLVWritter.writeUInt8(lastSPS[3]);//avclevelindication
            mFLVWritter.writeUInt8(0xFC | 0x03); //reserved + lengthsizeminusone
            mFLVWritter.writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

            mFLVWritter.writeUInt16( lastSPS.size() ); //sequence parameter set length
            //H264 sequence parameter set raw data
            for(int i=0;i<lastSPS.size();i++)
                mFLVWritter.writeUInt8(lastSPS[i]);

            mFLVWritter.writeUInt8(0x01); // number of PPS

            //sanity check with the packet size...
            if ( size-4 > 0xffff ){
                fprintf(stderr, "PPS Greater than 64k. This muxer does not support it.\n");
                exit(-1);
            }

            mFLVWritter.writeUInt16(size); //picture parameter set length
            //H264 picture parameter set raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_PPS\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);


            videoTimestamp_ms += 1000/30;
        }
        //0x65
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_CSIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSIDRP) ) {
        

            fprintf(stdout, " processing: 0x%x (0x65)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            //nal raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;
        }
        //0x61
        //else if (nal_bit == (NAL_IDC_FRAME | NAL_TYPE_CSNIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSNIDRP) ) {

            fprintf(stdout, " processing: 0x%x (0x61)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            // raw nal data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSNIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;

        }

        else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }
    }

};

要在 main 中使用这些 classes，我们可以这样写：

volatile bool exit_requested = false;
void signal_handler(int signal) {
    exit_requested = true;
}

int main(int argc, char* argv[]) {
    int fd_stdin = fileno(stdin);

    signal(SIGINT,  signal_handler);
    signal(SIGTERM, signal_handler);
    signal(SIGQUIT, signal_handler);

    unsigned char buffer[65536];
    
    FLVFileWriter flv("out.flv");

    while (!exit_requested) {

        int readedSize = read(fd_stdin,buffer,65536);
        if (readedSize==0)
            break;
        
        flv.parseH264(buffer,readedSize);
    }
    
    flv.endOfStreamH264();

    signal(SIGINT,  SIG_DFL);
    signal(SIGTERM, SIG_DFL);
    signal(SIGQUIT, SIG_DFL);

    return 0;
}

编译上面的程序我们可以执行如下：

cat test.h264 | ./flv

然后我明白了：

我从命令行使用 ffmpeg 检查我的源 h264 流文件是否损坏。所以我运行以下内容：

cat test.h264 | ffmpeg -i - -c:v copy out2.flv

使用 ffmpeg 结果正常：

我把我测试的视频和完整的源代码放在一起here。

The h264 file I used for the test.

Answer 1

WIP（进行中）

但是我发现了很多新信息:-)。

首先：FLV 容器无法从 h264 逐一打包 NAL 缓冲区。

其二：只有在有新帧可用时才需要增加时间戳。

我遇到的绿屏问题与时间戳问题的 NAL 包碎片有关。

困难的部分是检测流何时获得新帧。 here.

有几种方法

但是 h264 流在其结构中具有不同位大小的数据成员。

读取位

ue(v) 数据类型。要读取此结构，您需要计算前导 0 位直到到达位 1，并在我们找到的 1 位之后读取相同数量的 0 位。

h264 的 headers 中该字段的内容可能会有所不同...一个常见的值称为 golomb，它是使用以下公式计算的：(1 << leadingZeros) - 1 + readbits (leadingZeros).

RBSP - 原始字节序列负载

我们不能直接读取流值，因为它可能包含防仿真字节。我们需要在读取 headers.

之前删除仿真预防字节

代码

所以为了开始缓解这个问题，我重写了 H264 解析器如下：

struct sps_info{
    uint8_t profile_idc;
    uint8_t constraints;
    uint8_t level_idc;
    uint8_t log2_max_frame_num;

    bool set;
};

class ParserH264 {
    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1,

        EMULATION0,
        EMULATION1,
        EMULATION_FORCE_SKIP
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;

            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){

                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00

                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer.size()-2 >=0 &&
                    buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();

                chunkDetectedH264(buffer);

                buffer.clear();

                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    // rawBuffer might has emulation bytes
    // Raw Byte Sequence Payload (RBSP)
    virtual void chunkDetectedH264(const std::vector<uint8_t> &nal){

    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(buffer);

        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }


    static inline uint32_t readbit(int bitPos,  const uint8_t* data, int size){
        int dataPosition = bitPos / 8;
        int bitPosition = 7 - bitPos % 8;
        if (dataPosition >= size){
            fprintf(stderr,"error to access bit...\n");
            exit(-1);
        }
        return (data[dataPosition] >> bitPosition) & 0x01;
    }

    // leading 0`s count equals the number of next bits after bit 1
    //
    //  Example: 01x  001xx 0001xxx 00001xxxx
    //
    //  The max number of bits is equal 32 in this sintax
    //
    static inline int bitsToSkip_ue( int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        int leadingZeros = bitPos - start;
        int totalBits = leadingZeros + 1 + leadingZeros;
        if (totalBits > 32){
            fprintf(stderr,"bitsToSkip_ue length greated than 32\n");
            exit(-1);
        }
        return totalBits;
    }

    static inline uint32_t read_golomb_ue(int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        uint32_t leadingZeros = (uint32_t)(bitPos - start);
        uint32_t num = readbits(bitPos+1, leadingZeros, data, size);
        num += (1 << leadingZeros) - 1;

        return num;
    }

    static inline uint32_t readbits(int bitPos, int length, const uint8_t* data, int size){
        if (length > 32){
            fprintf(stderr,"readbits length greated than 32\n");
            exit(-1);
        }
        uint32_t result = 0;
        for(int i=0;i<length;i++){
            result <<= 1;
            result = result | readbit(bitPos+i, data, size);
        }
        return result;
    }

    static inline sps_info parseSPS(const std::vector<uint8_t> sps_rbsp) {
        const uint8_t *data = &sps_rbsp[0];
        uint32_t size = sps_rbsp.size();

        sps_info result;

        result.profile_idc = sps_rbsp[1];
        result.constraints = sps_rbsp[2];
        result.level_idc = sps_rbsp[3];

        uint32_t startIndex = 8+24;//NAL bit + profile_idc (8bits) + constraints (8bits) + level_idc (8bits)
        startIndex += bitsToSkip_ue(startIndex, data, size);//seq_parameter_set_id (ue)

        uint32_t log2_max_frame_num_minus4 = read_golomb_ue(startIndex, data, size);

        if (log2_max_frame_num_minus4 < 0 ||
            log2_max_frame_num_minus4 > 12){
            fprintf(stderr,"parseSPS_log2_max_frame_num_minus4 value not in range [0-12] \n");
            exit(-1);
        }

        result.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
        result.set = true;
        return result;
    }

    // Raw Byte Sequence Payload (RBSP) -- without the emulation prevention bytes
    // maxSize is used to parse just the beggining of the nal structure...
    // avoid process all buffer size on NAL new frame check
    static inline void nal2RBSP(const std::vector<uint8_t> &buffer,
                                std::vector<uint8_t> *rbsp,
                                int maxSize = -1){
        if (maxSize <= 0)
            maxSize = buffer.size();
        rbsp->resize(maxSize);

        State emulationState = None;
        int count = 0;

        for(int i=0; i < maxSize ;i++){
            uint8_t byte = buffer[i];

            if (byte == 0x00 && emulationState == None)
                emulationState = EMULATION0;
            else if (byte == 0x00 && (emulationState == EMULATION0 || emulationState == EMULATION1) )
                emulationState = EMULATION1;
            else if (byte == 0x03 && emulationState == EMULATION1)
            {
                emulationState = EMULATION_FORCE_SKIP;
                continue;
            }
            else if (emulationState == EMULATION_FORCE_SKIP) { //skip 00 01 02 or 03
                if ( byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x03 ){
                    fprintf(stdout, "H264 NAL emulation prevention pattern detected error (%u)\n", byte);
                    exit(-1);
                }
                emulationState = None;
            } else
                emulationState = None;

            (*rbsp)[count] = byte;
            count++;
        }
        if (count != rbsp->size())
            rbsp->resize(count);
    }

};

我更新了 FLVWriter 以写入整个帧（具有 1 个或多个 NAL）并写入视频序列 header。

代码：

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((v >> 24) & 0xff);
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v, std::vector<uint8_t> *data){
        data->push_back((v >> 24) & 0xff);
        data->push_back((v >> 16) & 0xff);
        data->push_back((v >> 8) & 0xff);
        data->push_back((v) & 0xff);
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
        buffer.push_back((v >> 24) & 0xff);
    }

    void writeVideoSequenceHeader(const std::vector<uint8_t> &sps, const std::vector<uint8_t> &pps, const sps_info &spsinfo) {

        //
        // flv tag header = 11 bytes
        //
        writeUInt8(0x09);//tagtype video
        writeUInt24( sps.size() + pps.size() + 16 );//data len
        writeUInt32Timestamp( 0 );//timestamp
        writeUInt24( 0 );//stream id 0

        //
        // Message Body = 16 bytes + SPS bytes + PPS bytes
        //
        //flv VideoTagHeader
        writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
        writeUInt8(0x00);//avc sequence header
        writeUInt24( 0x00 );//composition time

        //flv VideoTagBody --AVCDecoderCOnfigurationRecord
        writeUInt8(0x01);//configurationversion
        writeUInt8(spsinfo.profile_idc);//avcprofileindication
        writeUInt8(spsinfo.constraints);//profilecompatibilty
        writeUInt8(spsinfo.level_idc);//avclevelindication
        writeUInt8(0xFC | 0x03); //reserved (6 bits), NULA length size - 1 (2 bits)
        writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

        writeUInt16( sps.size() ); //sequence parameter set length
        //H264 sequence parameter set raw data
        for(int i=0;i<sps.size();i++)
            writeUInt8(sps[i]);

        writeUInt8(0x01); // number of PPS

        writeUInt16(pps.size()); //picture parameter set length
        //H264 picture parameter set raw data
        for(int i=0;i<pps.size();i++)
            writeUInt8(pps[i]);

        if (buffer.size() != sps.size() + pps.size() + 16 + 11 ){
            fprintf(stderr, "error writeVideoSequenceHeader\n");
            exit(-1);
        }

        // previous tag size
        writeUInt32(buffer.size());
    }

    void writeVideoFrame(const std::vector<uint8_t> &data, bool keyframe, uint32_t timestamp_ms, int streamID){

        writeUInt8(0x09);//tagtype video
        writeUInt24( data.size() + 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        if (keyframe)
            writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        else
            writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264

        writeUInt8(0x01);//avc NALU unit
        writeUInt24(0x00);//composit time ??????????

        for(int i=0;i<data.size();i++)
            writeUInt8(data[i]);

        if (buffer.size() != data.size() + 5 + 11 ){
            fprintf(stderr, "error writeVideoFrame\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());

    }

    void writeVideoEndOfStream(uint32_t timestamp_ms, int streamID){
        writeUInt8(0x09);//tagtype video
        writeUInt24( 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        writeUInt8(0x02);//avc EOS
        writeUInt24(0x00);//composit time ??????????

        if (buffer.size() != 5 + 11 ){
            fprintf(stderr, "error writeVideoEOS\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());
    }

};

接下来我们需要检测流中的新帧。

我已经实现了几种方法和 frame_num 方法。

要读取 frame_num，我们需要在读取 frame_num 位之前跳过 3 个 ue(v) 结构。

现在h264新帧检测器class:

class H264NewFrameDetection {
    std::vector<uint8_t> aux;

    bool newFrameOnNextIDR;
    uint32_t old_frame_num;
    bool spsinfo_set;

    bool firstFrame;
public:
    uint32_t currentFrame;
    bool newFrameFound;

    H264NewFrameDetection() {
        currentFrame = 0;
        newFrameOnNextIDR = false;
        old_frame_num = 0;
        spsinfo_set = false;
        firstFrame = true;
    }

    void analyseBufferForNewFrame(const std::vector<uint8_t> &nal, const sps_info &spsinfo){

        uint8_t nal_bit = nal[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        if ( nal_type == (NAL_TYPE_SPS) ||
            nal_type == (NAL_TYPE_PPS) ||
            nal_type == (NAL_TYPE_AUD) ||
            nal_type == (NAL_TYPE_SEI) ||
            (nal_type >= 14 && nal_type <= 18)
            ) {
            newFrameOnNextIDR = true;
        }

        if ((nal_type == NAL_TYPE_CSIDRP ||
             nal_type == NAL_TYPE_CSNIDRP)
            && spsinfo.set ){

            aux.clear();
            //(8 + 3*(32*2+1) + 16) = max header per NALU slice bits = 27.375 bytes
            // 32 bytes + 8 (Possibility of Emulation in 32 bytes)
            int RBSPMaxBytes = 32 + 8;
            if (nal.size() < (32 + 8))
                RBSPMaxBytes = nal.size();
            ParserH264::nal2RBSP(nal, &aux, RBSPMaxBytes );
            uint8_t * rbspBuffer = &aux[0];
            uint32_t rbspBufferSize = aux.size();

            uint32_t frame_num_index = 8;//start counting after the nal_bit
            //first_mb_in_slice (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //slice_type (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //pic_parameter_set_id (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //now can read frame_num
            uint32_t frame_num = ParserH264::readbits(frame_num_index,
                                                      spsinfo.log2_max_frame_num,
                                                      rbspBuffer, rbspBufferSize);

            if (!spsinfo_set){
                old_frame_num = frame_num;
                spsinfo_set = true;//spsinfo.set
            }

            if (old_frame_num != frame_num){
                newFrameOnNextIDR = true;
                old_frame_num = frame_num;
            }
        }


        if (newFrameOnNextIDR &&(nal_type == NAL_TYPE_CSIDRP ||
                                 nal_type == NAL_TYPE_CSNIDRP)) {
            newFrameOnNextIDR = false;
            if (firstFrame){//skip the first frame
                firstFrame = false;
            } else {
                newFrameFound = true;
                currentFrame++;
            }
        }
    }

    void reset(){
        newFrameFound = false;
    }

};

最后写手class.

在这个实现中，我存储 NAL，直到检测到新帧。当它找到一个新帧时，我用 NAL 集写入 FLV 标签（所有 NAL 都存储在列表中）。

代码：

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> sps;
    std::vector<uint8_t> pps;

    sps_info spsInfo;

    H264NewFrameDetection mH264NewFrameDetection;

public:
    FLVWritter mFLVWritter;
    int fd;
    uint32_t videoTimestamp_ms;

    //contains the several NALs until complete a frame... after that can write to FLV
    std::vector<uint8_t> nalBuffer;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        videoTimestamp_ms = 0;        
        spsInfo.set = false;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){

            //force write the last frame
            if (nalBuffer.size() > 0){
                uint8_t firstNALtype = nalBuffer[4] & 0x1f;

                bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
                mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
                mFLVWritter.flushToFD(fd);

                nalBuffer.clear();

                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            } else {
                if (mH264NewFrameDetection.currentFrame > 0)
                    videoTimestamp_ms = ((mH264NewFrameDetection.currentFrame-1) * 1000)/30;
                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            }
            close(fd);
        }
    }


    //decoding time stamp (DTS) and presentation time stamp (PTS)
    void chunkDetectedH264(const std::vector<uint8_t> &data) {

        if (data.size() <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = data[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        mH264NewFrameDetection.analyseBufferForNewFrame(data, spsInfo);

        if (mH264NewFrameDetection.newFrameFound){
            mH264NewFrameDetection.reset();

            uint8_t firstNALtype = nalBuffer[4] & 0x1f;

            bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
            mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
            mFLVWritter.flushToFD(fd);

            nalBuffer.clear();

            videoTimestamp_ms = (mH264NewFrameDetection.currentFrame * 1000)/30;
        }

        //0x67
        if ( nal_type == (NAL_TYPE_SPS) ) {
            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            sps.clear();
            //max 26 bytes on sps header (read until log2_max_frame_num_minus4)
            nal2RBSP(data, &sps, 26 );
            spsInfo = parseSPS(sps);

            sps.clear();
            for(int i=0;i<data.size();i++)
                sps.push_back(data[i]);

        }
        //0x68
        else if ( nal_type == (NAL_TYPE_PPS) ) {
            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            pps.clear();
            for(int i=0;i<data.size();i++)
                pps.push_back(data[i]);

            mFLVWritter.writeVideoSequenceHeader(sps, pps, spsInfo);
            mFLVWritter.flushToFD(fd);

        }
        //0x65, 0x61, 0x41
        else if ( nal_type == NAL_TYPE_CSIDRP ||
                  nal_type == NAL_TYPE_CSNIDRP ) {
            //convert annexb to AVCC (length before the NAL structure)
            mFLVWritter.writeUInt32( data.size(), &nalBuffer );
            for(int i=0;i<data.size();i++)
                nalBuffer.push_back(data[i]);

        } else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }

    }
};

结论

经过测试，该视频可以在这个新实现中正常运行。

我现在唯一的问题是与 PTS（演示时间戳）和 DTS（解码器时间戳）有关。我不知道如何从流中提取它们以及如何使用它们来计算帧时间戳。

此当前实现使用硬编码比率 1000/30（例如 30fps）作为增加时间戳的基础。

时间戳以毫秒为单位。

C/C++ 封装 H.264 的 FLV 多路复用器未按预期工作

C/C++ FLV muxer encapsulating H.264 not working as expected

c++

flv

video-streaming

h.264

muxer