C/C++ 封装 H.264 的 FLV 多路复用器未按预期工作

C/C++ FLV muxer encapsulating H.264 not working as expected

我正在尝试制作 FLV 混合器。我开始测试从我的相机 (c920) 捕获的 h264 流。 .flv文件编码后无法正常播放

首先,我尝试在 h264 中查找 NAL,搜索模式 0x00 0x00 0x00 0x01...但我在互联网上发现有两种模式可以找到 NAL...所以我实现了对 0x00 0x00 0x01 的搜索和另一个...

在第一次测试中,我发现很少有 NAL 以四个字节开头,但是在更改代码以搜索 3 字节模式后,我发现了很多 NAL...

我在网上找到的参考资料显示使用很少的NAL类型来封装FLV文件的代码,但是更改为检测3个字节后,程序发现了很多NAL,我不知道如何流式传输它们...

一些代码:-)

我正在用 C++ 实现,所以我有 classes.

FLV 作者

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((uint8_t)(v >> 24));
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
        buffer.push_back((uint8_t)(v >> 24));
    }

};

h264 解析器:

class ParserH264 {

    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;
            
            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){
                
                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00
                
                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();
                
                chunkDetectedH264(&buffer[0],buffer.size());
                buffer.clear();
                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    virtual void chunkDetectedH264(const uint8_t* ibuffer, int size){
        
    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(&buffer[0],buffer.size());
        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }
};

最后是主要的 class:

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> lastSPS;

public:
    FLVWritter mFLVWritter;
    int fd;
    bool firstAudioWrite;
    uint32_t audioTimestamp_ms;
    uint32_t videoTimestamp_ms;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        firstAudioWrite = true;
        audioTimestamp_ms = 0;
        videoTimestamp_ms = 0;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){
            mFLVWritter.flushToFD(fd);
            close(fd);
        }
    }

    void chunkDetectedH264(const uint8_t* ibuffer, int size) {
        printf("[debug] Detected NAL chunk size: %i\n",size);
        
        if (size <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = ibuffer[0];
        uint8_t nal_type = (nal_bit & 0x1f);
        
        //0x67
        //if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_SPS) ) {
        if ( nal_type == (NAL_TYPE_SPS) ) {

            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            //store information to use when arrise PPS nal_bit, probably the next NAL detection
            lastSPS.clear();
            for(int i=0;i<size;i++)
                lastSPS.push_back(ibuffer[i]);
        }
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_PPS) ) {
        else if ( nal_type == (NAL_TYPE_PPS) ) {

            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            //must be called just after the SPS detection
            int32_t bodyLength = lastSPS.size() + size + 16;

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = 16 bytes + SPS bytes + PPS bytes
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
            mFLVWritter.writeUInt8(0x00);//avc sequence header
            mFLVWritter.writeUInt24( 0x00 );//composit time ??????????

            //flv VideoTagBody --AVCDecoderCOnfigurationRecord
            mFLVWritter.writeUInt8(0x01);//configurationversion
            mFLVWritter.writeUInt8(lastSPS[1]);//avcprofileindication
            mFLVWritter.writeUInt8(lastSPS[2]);//profilecompatibilty
            mFLVWritter.writeUInt8(lastSPS[3]);//avclevelindication
            mFLVWritter.writeUInt8(0xFC | 0x03); //reserved + lengthsizeminusone
            mFLVWritter.writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

            mFLVWritter.writeUInt16( lastSPS.size() ); //sequence parameter set length
            //H264 sequence parameter set raw data
            for(int i=0;i<lastSPS.size();i++)
                mFLVWritter.writeUInt8(lastSPS[i]);

            mFLVWritter.writeUInt8(0x01); // number of PPS

            //sanity check with the packet size...
            if ( size-4 > 0xffff ){
                fprintf(stderr, "PPS Greater than 64k. This muxer does not support it.\n");
                exit(-1);
            }

            mFLVWritter.writeUInt16(size); //picture parameter set length
            //H264 picture parameter set raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_PPS\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);


            videoTimestamp_ms += 1000/30;
        }
        //0x65
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_CSIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSIDRP) ) {
        

            fprintf(stdout, " processing: 0x%x (0x65)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            //nal raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;
        }
        //0x61
        //else if (nal_bit == (NAL_IDC_FRAME | NAL_TYPE_CSNIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSNIDRP) ) {

            fprintf(stdout, " processing: 0x%x (0x61)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            // raw nal data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSNIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;

        }

        else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }
    }

};

要在 main 中使用这些 classes,我们可以这样写:

volatile bool exit_requested = false;
void signal_handler(int signal) {
    exit_requested = true;
}

int main(int argc, char* argv[]) {
    int fd_stdin = fileno(stdin);

    signal(SIGINT,  signal_handler);
    signal(SIGTERM, signal_handler);
    signal(SIGQUIT, signal_handler);

    unsigned char buffer[65536];
    
    FLVFileWriter flv("out.flv");

    while (!exit_requested) {

        int readedSize = read(fd_stdin,buffer,65536);
        if (readedSize==0)
            break;
        
        flv.parseH264(buffer,readedSize);
    }
    
    flv.endOfStreamH264();

    signal(SIGINT,  SIG_DFL);
    signal(SIGTERM, SIG_DFL);
    signal(SIGQUIT, SIG_DFL);

    return 0;
}

编译上面的程序我们可以执行如下:

cat test.h264 | ./flv

然后我明白了:

我从命令行使用 ffmpeg 检查我的源 h264 流文件是否损坏。所以我 运行 以下内容:

cat test.h264 | ffmpeg -i - -c:v copy out2.flv

使用 ffmpeg 结果正常:

我把我测试的视频和完整的源代码放在一起here

The h264 file I used for the test.

WIP(进行中)

但是我发现了很多新信息:-)。

首先:FLV 容器无法从 h264 逐一打包 NAL 缓冲区。

其二:只有在有新帧可用时才需要增加时间戳。

我遇到的绿屏问题与时间戳问题的 NAL 包碎片有关。

困难的部分是检测流何时获得新帧。 here.

有几种方法

但是 h264 流在其结构中具有不同位大小的数据成员。

读取位

ue(v) 数据类型。要读取此结构,您需要计算前导 0 位直到到达位 1,并在我们找到的 1 位之后读取相同数量的 0 位。

h264 的 headers 中该字段的内容可能会有所不同...一个常见的值称为 golomb,它是使用以下公式计算的:(1 << leadingZeros) - 1 + readbits (leadingZeros).

RBSP - 原始字节序列负载

我们不能直接读取流值,因为它可能包含防仿真字节。我们需要在读取 headers.

之前删除仿真预防字节

代码

所以为了开始缓解这个问题,我重写了 H264 解析器如下:

struct sps_info{
    uint8_t profile_idc;
    uint8_t constraints;
    uint8_t level_idc;
    uint8_t log2_max_frame_num;

    bool set;
};

class ParserH264 {
    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1,

        EMULATION0,
        EMULATION1,
        EMULATION_FORCE_SKIP
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;

            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){

                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00

                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer.size()-2 >=0 &&
                    buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();

                chunkDetectedH264(buffer);

                buffer.clear();

                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    // rawBuffer might has emulation bytes
    // Raw Byte Sequence Payload (RBSP)
    virtual void chunkDetectedH264(const std::vector<uint8_t> &nal){

    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(buffer);

        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }


    static inline uint32_t readbit(int bitPos,  const uint8_t* data, int size){
        int dataPosition = bitPos / 8;
        int bitPosition = 7 - bitPos % 8;
        if (dataPosition >= size){
            fprintf(stderr,"error to access bit...\n");
            exit(-1);
        }
        return (data[dataPosition] >> bitPosition) & 0x01;
    }

    // leading 0`s count equals the number of next bits after bit 1
    //
    //  Example: 01x  001xx 0001xxx 00001xxxx
    //
    //  The max number of bits is equal 32 in this sintax
    //
    static inline int bitsToSkip_ue( int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        int leadingZeros = bitPos - start;
        int totalBits = leadingZeros + 1 + leadingZeros;
        if (totalBits > 32){
            fprintf(stderr,"bitsToSkip_ue length greated than 32\n");
            exit(-1);
        }
        return totalBits;
    }

    static inline uint32_t read_golomb_ue(int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        uint32_t leadingZeros = (uint32_t)(bitPos - start);
        uint32_t num = readbits(bitPos+1, leadingZeros, data, size);
        num += (1 << leadingZeros) - 1;

        return num;
    }

    static inline uint32_t readbits(int bitPos, int length, const uint8_t* data, int size){
        if (length > 32){
            fprintf(stderr,"readbits length greated than 32\n");
            exit(-1);
        }
        uint32_t result = 0;
        for(int i=0;i<length;i++){
            result <<= 1;
            result = result | readbit(bitPos+i, data, size);
        }
        return result;
    }

    static inline sps_info parseSPS(const std::vector<uint8_t> sps_rbsp) {
        const uint8_t *data = &sps_rbsp[0];
        uint32_t size = sps_rbsp.size();

        sps_info result;

        result.profile_idc = sps_rbsp[1];
        result.constraints = sps_rbsp[2];
        result.level_idc = sps_rbsp[3];

        uint32_t startIndex = 8+24;//NAL bit + profile_idc (8bits) + constraints (8bits) + level_idc (8bits)
        startIndex += bitsToSkip_ue(startIndex, data, size);//seq_parameter_set_id (ue)

        uint32_t log2_max_frame_num_minus4 = read_golomb_ue(startIndex, data, size);

        if (log2_max_frame_num_minus4 < 0 ||
            log2_max_frame_num_minus4 > 12){
            fprintf(stderr,"parseSPS_log2_max_frame_num_minus4 value not in range [0-12] \n");
            exit(-1);
        }

        result.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
        result.set = true;
        return result;
    }

    // Raw Byte Sequence Payload (RBSP) -- without the emulation prevention bytes
    // maxSize is used to parse just the beggining of the nal structure...
    // avoid process all buffer size on NAL new frame check
    static inline void nal2RBSP(const std::vector<uint8_t> &buffer,
                                std::vector<uint8_t> *rbsp,
                                int maxSize = -1){
        if (maxSize <= 0)
            maxSize = buffer.size();
        rbsp->resize(maxSize);

        State emulationState = None;
        int count = 0;

        for(int i=0; i < maxSize ;i++){
            uint8_t byte = buffer[i];

            if (byte == 0x00 && emulationState == None)
                emulationState = EMULATION0;
            else if (byte == 0x00 && (emulationState == EMULATION0 || emulationState == EMULATION1) )
                emulationState = EMULATION1;
            else if (byte == 0x03 && emulationState == EMULATION1)
            {
                emulationState = EMULATION_FORCE_SKIP;
                continue;
            }
            else if (emulationState == EMULATION_FORCE_SKIP) { //skip 00 01 02 or 03
                if ( byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x03 ){
                    fprintf(stdout, "H264 NAL emulation prevention pattern detected error (%u)\n", byte);
                    exit(-1);
                }
                emulationState = None;
            } else
                emulationState = None;

            (*rbsp)[count] = byte;
            count++;
        }
        if (count != rbsp->size())
            rbsp->resize(count);
    }

};

我更新了 FLVWriter 以写入整个帧(具有 1 个或多个 NAL)并写入视频序列 header。

代码:

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((v >> 24) & 0xff);
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v, std::vector<uint8_t> *data){
        data->push_back((v >> 24) & 0xff);
        data->push_back((v >> 16) & 0xff);
        data->push_back((v >> 8) & 0xff);
        data->push_back((v) & 0xff);
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
        buffer.push_back((v >> 24) & 0xff);
    }

    void writeVideoSequenceHeader(const std::vector<uint8_t> &sps, const std::vector<uint8_t> &pps, const sps_info &spsinfo) {

        //
        // flv tag header = 11 bytes
        //
        writeUInt8(0x09);//tagtype video
        writeUInt24( sps.size() + pps.size() + 16 );//data len
        writeUInt32Timestamp( 0 );//timestamp
        writeUInt24( 0 );//stream id 0

        //
        // Message Body = 16 bytes + SPS bytes + PPS bytes
        //
        //flv VideoTagHeader
        writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
        writeUInt8(0x00);//avc sequence header
        writeUInt24( 0x00 );//composition time

        //flv VideoTagBody --AVCDecoderCOnfigurationRecord
        writeUInt8(0x01);//configurationversion
        writeUInt8(spsinfo.profile_idc);//avcprofileindication
        writeUInt8(spsinfo.constraints);//profilecompatibilty
        writeUInt8(spsinfo.level_idc);//avclevelindication
        writeUInt8(0xFC | 0x03); //reserved (6 bits), NULA length size - 1 (2 bits)
        writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

        writeUInt16( sps.size() ); //sequence parameter set length
        //H264 sequence parameter set raw data
        for(int i=0;i<sps.size();i++)
            writeUInt8(sps[i]);

        writeUInt8(0x01); // number of PPS

        writeUInt16(pps.size()); //picture parameter set length
        //H264 picture parameter set raw data
        for(int i=0;i<pps.size();i++)
            writeUInt8(pps[i]);

        if (buffer.size() != sps.size() + pps.size() + 16 + 11 ){
            fprintf(stderr, "error writeVideoSequenceHeader\n");
            exit(-1);
        }

        // previous tag size
        writeUInt32(buffer.size());
    }

    void writeVideoFrame(const std::vector<uint8_t> &data, bool keyframe, uint32_t timestamp_ms, int streamID){

        writeUInt8(0x09);//tagtype video
        writeUInt24( data.size() + 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        if (keyframe)
            writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        else
            writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264

        writeUInt8(0x01);//avc NALU unit
        writeUInt24(0x00);//composit time ??????????

        for(int i=0;i<data.size();i++)
            writeUInt8(data[i]);

        if (buffer.size() != data.size() + 5 + 11 ){
            fprintf(stderr, "error writeVideoFrame\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());

    }

    void writeVideoEndOfStream(uint32_t timestamp_ms, int streamID){
        writeUInt8(0x09);//tagtype video
        writeUInt24( 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        writeUInt8(0x02);//avc EOS
        writeUInt24(0x00);//composit time ??????????

        if (buffer.size() != 5 + 11 ){
            fprintf(stderr, "error writeVideoEOS\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());
    }

};

接下来我们需要检测流中的新帧。

我已经实现了几种方法和 frame_num 方法。

要读取 frame_num,我们需要在读取 frame_num 位之前跳过 3 个 ue(v) 结构。

现在h264新帧检测器class:

class H264NewFrameDetection {
    std::vector<uint8_t> aux;

    bool newFrameOnNextIDR;
    uint32_t old_frame_num;
    bool spsinfo_set;

    bool firstFrame;
public:
    uint32_t currentFrame;
    bool newFrameFound;

    H264NewFrameDetection() {
        currentFrame = 0;
        newFrameOnNextIDR = false;
        old_frame_num = 0;
        spsinfo_set = false;
        firstFrame = true;
    }

    void analyseBufferForNewFrame(const std::vector<uint8_t> &nal, const sps_info &spsinfo){

        uint8_t nal_bit = nal[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        if ( nal_type == (NAL_TYPE_SPS) ||
            nal_type == (NAL_TYPE_PPS) ||
            nal_type == (NAL_TYPE_AUD) ||
            nal_type == (NAL_TYPE_SEI) ||
            (nal_type >= 14 && nal_type <= 18)
            ) {
            newFrameOnNextIDR = true;
        }

        if ((nal_type == NAL_TYPE_CSIDRP ||
             nal_type == NAL_TYPE_CSNIDRP)
            && spsinfo.set ){

            aux.clear();
            //(8 + 3*(32*2+1) + 16) = max header per NALU slice bits = 27.375 bytes
            // 32 bytes + 8 (Possibility of Emulation in 32 bytes)
            int RBSPMaxBytes = 32 + 8;
            if (nal.size() < (32 + 8))
                RBSPMaxBytes = nal.size();
            ParserH264::nal2RBSP(nal, &aux, RBSPMaxBytes );
            uint8_t * rbspBuffer = &aux[0];
            uint32_t rbspBufferSize = aux.size();

            uint32_t frame_num_index = 8;//start counting after the nal_bit
            //first_mb_in_slice (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //slice_type (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //pic_parameter_set_id (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //now can read frame_num
            uint32_t frame_num = ParserH264::readbits(frame_num_index,
                                                      spsinfo.log2_max_frame_num,
                                                      rbspBuffer, rbspBufferSize);

            if (!spsinfo_set){
                old_frame_num = frame_num;
                spsinfo_set = true;//spsinfo.set
            }

            if (old_frame_num != frame_num){
                newFrameOnNextIDR = true;
                old_frame_num = frame_num;
            }
        }


        if (newFrameOnNextIDR &&(nal_type == NAL_TYPE_CSIDRP ||
                                 nal_type == NAL_TYPE_CSNIDRP)) {
            newFrameOnNextIDR = false;
            if (firstFrame){//skip the first frame
                firstFrame = false;
            } else {
                newFrameFound = true;
                currentFrame++;
            }
        }
    }

    void reset(){
        newFrameFound = false;
    }

};

最后写手class.

在这个实现中,我存储 NAL,直到检测到新帧。当它找到一个新帧时,我用 NAL 集写入 FLV 标签(所有 NAL 都存储在列表中)。

代码:

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> sps;
    std::vector<uint8_t> pps;

    sps_info spsInfo;

    H264NewFrameDetection mH264NewFrameDetection;

public:
    FLVWritter mFLVWritter;
    int fd;
    uint32_t videoTimestamp_ms;

    //contains the several NALs until complete a frame... after that can write to FLV
    std::vector<uint8_t> nalBuffer;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        videoTimestamp_ms = 0;        
        spsInfo.set = false;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){

            //force write the last frame
            if (nalBuffer.size() > 0){
                uint8_t firstNALtype = nalBuffer[4] & 0x1f;

                bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
                mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
                mFLVWritter.flushToFD(fd);

                nalBuffer.clear();

                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            } else {
                if (mH264NewFrameDetection.currentFrame > 0)
                    videoTimestamp_ms = ((mH264NewFrameDetection.currentFrame-1) * 1000)/30;
                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            }
            close(fd);
        }
    }


    //decoding time stamp (DTS) and presentation time stamp (PTS)
    void chunkDetectedH264(const std::vector<uint8_t> &data) {

        if (data.size() <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = data[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        mH264NewFrameDetection.analyseBufferForNewFrame(data, spsInfo);

        if (mH264NewFrameDetection.newFrameFound){
            mH264NewFrameDetection.reset();

            uint8_t firstNALtype = nalBuffer[4] & 0x1f;

            bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
            mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
            mFLVWritter.flushToFD(fd);

            nalBuffer.clear();

            videoTimestamp_ms = (mH264NewFrameDetection.currentFrame * 1000)/30;
        }

        //0x67
        if ( nal_type == (NAL_TYPE_SPS) ) {
            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            sps.clear();
            //max 26 bytes on sps header (read until log2_max_frame_num_minus4)
            nal2RBSP(data, &sps, 26 );
            spsInfo = parseSPS(sps);

            sps.clear();
            for(int i=0;i<data.size();i++)
                sps.push_back(data[i]);

        }
        //0x68
        else if ( nal_type == (NAL_TYPE_PPS) ) {
            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            pps.clear();
            for(int i=0;i<data.size();i++)
                pps.push_back(data[i]);

            mFLVWritter.writeVideoSequenceHeader(sps, pps, spsInfo);
            mFLVWritter.flushToFD(fd);

        }
        //0x65, 0x61, 0x41
        else if ( nal_type == NAL_TYPE_CSIDRP ||
                  nal_type == NAL_TYPE_CSNIDRP ) {
            //convert annexb to AVCC (length before the NAL structure)
            mFLVWritter.writeUInt32( data.size(), &nalBuffer );
            for(int i=0;i<data.size();i++)
                nalBuffer.push_back(data[i]);

        } else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }

    }
};

结论

经过测试,该视频可以在这个新实现中正常运行。

我现在唯一的问题是与 PTS(演示时间戳)和 DTS(解码器时间戳)有关。我不知道如何从流中提取它们以及如何使用它们来计算帧时间戳。

此当前实现使用硬编码比率 1000/30(例如 30fps)作为增加时间戳的基础。

时间戳以毫秒为单位。