使用 libmp4v2 和 OpenH264 将 h264 混合为 mp4
Muxing h264 into mp4 using libmp4v2 and OpenH264
我正在使用 OpenH264 as encoder and I want to mux its output into a playable mp4 using libmp4v2
生成的 .mp4
仅部分起作用。它可以在 VLC and MPC-HC 中播放,但不能在 Windows 媒体播放器或 Windows 10 "Movie and Tv" 应用程序中播放。
我的目标是该文件适用于所有这些播放器。
两位 Windows 玩家都告诉我他们不知道编解码器所以他们无法播放:
这不是真的,因为我可以通过使用来自 cli:
的 FFmpeg 使用相同的 h264 比特流播放手动多路复用文件
ffmpeg -i "testenc.h264" -c:v copy -f mp4 "output.mp4"
根据这些知识,我认为我的编码过程工作正常,问题出在 muxing 过程中。
编辑:
感谢 Rudolfs Bundulis 的回答,他指出 SPS/PPS 数据丢失,我得以重组我的代码。它现在尝试通过分析编码器比特流并在必要时调用 MP4AddH264SequenceParameterSet
或 MP4AddH264PictureParameterSet
来包含丢失的数据。但是还是没有成功。
我的完整代码:
#include "stdafx.h"
#include <iostream>
#include <stdio.h>
#include <chrono>
#include "mp4v2/mp4v2.h"
#include "codec_api.h"
#define WIDTH 1280
#define HEIGHT 960
#define DURATION MP4_INVALID_DURATION
#define NAL_SPS 1
#define NAL_PPS 2
#define NAL_I 3
#define NAL_P 4
using namespace std;
using namespace chrono;
/* Just some dummy data to see artifacts ect */
void prepareFrame(int i, SSourcePicture* pic) {
for (int y = 0; y<HEIGHT; y++) {
for (int x = 0; x<WIDTH; x++) {
pic->pData[0][y * WIDTH + x] = x + y + i * 3;
}
}
for (int y = 0; y<HEIGHT / 2; y++) {
for (int x = 0; x<WIDTH / 2; x++) {
pic->pData[1][y * (WIDTH / 2) + x] = 128 + y + i * 2;
pic->pData[2][y * (WIDTH / 2) + x] = 64 + x + i * 5;
}
}
pic->uiTimeStamp = (i + 1) * 1000 / 75;
}
void printHex(const unsigned char* arr, int len) {
for (int i = 0; i < len; i++) {
if (arr[i] < 16) {
cout << "0";
}
cout << hex << (int)arr[i] << " ";
}
cout << endl;
}
void mp4Encode(MP4FileHandle mp4Handle, MP4TrackId track, uint8_t * bitstream, int length) {
int index = -1;
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x67) {
index = NAL_SPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x68) {
index = NAL_PPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x65) {
index = NAL_I;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x61) {
index = NAL_P;
}
switch (index) {
case NAL_SPS:
cout << "Detected SPS" << endl;
MP4AddH264SequenceParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_PPS:
cout << "Detected PPS" << endl;
MP4AddH264PictureParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_I:
{
cout << "Detected I" << endl;
uint8_t * IFrameData = (uint8_t *) malloc(length + 1);
IFrameData[0] = (length - 3) >> 24;
IFrameData[1] = (length - 3) >> 16;
IFrameData[2] = (length - 3) >> 8;
IFrameData[3] = (length - 3) & 0xff;
memcpy(IFrameData + 4, bitstream + 3, length - 3);
if (!MP4WriteSample(mp4Handle, track, IFrameData, length + 1, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
free(IFrameData);
break;
}
case NAL_P:
{
cout << "Detected P" << endl;
bitstream[0] = (length - 4) >> 24;
bitstream[1] = (length - 4) >> 16;
bitstream[2] = (length - 4) >> 8;
bitstream[3] = (length - 4) & 0xff;
if (!MP4WriteSample(mp4Handle, track, bitstream, length, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
break;
}
}
if (index == -1) {
cout << "Could not detect nal type" << endl;
system("pause");
exit(1);
}
}
int main()
{
//just to measure performance
high_resolution_clock::time_point time = high_resolution_clock::now();
//Create MP4
MP4FileHandle mp4Handle = MP4Create("test.mp4", 0);
MP4SetTimeScale(mp4Handle, 90000);
//Create filestream for binary h264 output for testing
FILE* targetFile;
targetFile = fopen("testenc.h264", "wb");
if (!targetFile) {
cout << "failed to create file" << endl;
system("pause");
return 1;
}
ISVCEncoder *encoder;
int rv = WelsCreateSVCEncoder(&encoder);
//Encoder params
SEncParamExt param;
encoder->GetDefaultParams(¶m);
param.iUsageType = CAMERA_VIDEO_REAL_TIME;
param.fMaxFrameRate = 75.f;
param.iLtrMarkPeriod = 75;
param.iPicWidth = WIDTH;
param.iPicHeight = HEIGHT;
param.iTargetBitrate = 40000000;
param.bEnableDenoise = false;
param.iSpatialLayerNum = 1;
param.bUseLoadBalancing = false;
param.bEnableSceneChangeDetect = false;
param.bEnableBackgroundDetection = false;
param.bEnableAdaptiveQuant = false;
param.bEnableFrameSkip = false;
param.iMultipleThreadIdc = 16;
//param.uiIntraPeriod = 10;
for (int i = 0; i < param.iSpatialLayerNum; i++) {
param.sSpatialLayers[i].iVideoWidth = WIDTH >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].iVideoHeight = HEIGHT >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].fFrameRate = 75.f;
param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
param.sSpatialLayers[i].uiProfileIdc = PRO_BASELINE;
param.sSpatialLayers[i].uiLevelIdc = LEVEL_4_2;
param.sSpatialLayers[i].iDLayerQp = 42;
SSliceArgument sliceArg;
sliceArg.uiSliceMode = SM_FIXEDSLCNUM_SLICE;
sliceArg.uiSliceNum = 16;
param.sSpatialLayers[i].sSliceArgument = sliceArg;
}
param.uiMaxNalSize = 1500;
param.iTargetBitrate *= param.iSpatialLayerNum;
encoder->InitializeExt(¶m);
int videoFormat = videoFormatI420;
encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &videoFormat);
MP4TrackId track = MP4AddH264VideoTrack(mp4Handle, 90000, 90000/25, WIDTH, HEIGHT, 66, 192, 42, 3);
MP4SetVideoProfileLevel(mp4Handle, 0x7f);
SFrameBSInfo info;
memset(&info, 0, sizeof(SFrameBSInfo));
SSourcePicture pic;
memset(&pic, 0, sizeof(SSourcePicture));
pic.iPicWidth = WIDTH;
pic.iPicHeight = HEIGHT;
pic.iColorFormat = videoFormatI420;
pic.iStride[0] = pic.iPicWidth;
pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
int frameSize = WIDTH * HEIGHT * 3 / 2;
pic.pData[0] = new unsigned char[frameSize];
pic.pData[1] = pic.pData[0] + WIDTH * HEIGHT;
pic.pData[2] = pic.pData[1] + (WIDTH * HEIGHT >> 2);
for (int num = 0; num<75; num++) {
cout << "-------FRAME " << dec << num << "-------" << endl;
prepareFrame(num, &pic);
rv = encoder->EncodeFrame(&pic, &info);
if (!rv == cmResultSuccess) {
cout << "encode failed" << endl;
continue;
}
if (info.eFrameType != videoFrameTypeSkip) {
for (int i = 0; i < info.iLayerNum; ++i) {
int len = 0;
const SLayerBSInfo& layerInfo = info.sLayerInfo[i];
for (int j = 0; j < layerInfo.iNalCount; ++j) {
cout << "Layer: " << dec << i << "| Nal: " << j << endl << "Hex: ";
printHex(info.sLayerInfo[i].pBsBuf + len, 20);
mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf + len, layerInfo.pNalLengthInByte[j]);
len += layerInfo.pNalLengthInByte[j];
}
//mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf, len);
}
//fwrite(info.sLayerInfo[0].pBsBuf, 1, len, targetFile);
}
}
int res = 0;
encoder->GetOption(ENCODER_OPTION_PROFILE, &res);
cout << res << endl;
fflush(targetFile);
fclose(targetFile);
encoder->Uninitialize();
WelsDestroySVCEncoder(encoder);
//Close MP4
MP4Close(mp4Handle);
cout << "done in: ";
cout << duration_cast<milliseconds>(high_resolution_clock::now() - time).count() << endl;
system("pause");
return 0;
}
您可以使用 GPAC 中的 MP4Box 来分析这两个文件的 MP4 框布局。
如此处所示,错误文件缺少 avcC 框中的 SPS/PPS 数据。相同的 NAL 单元也很可能存储在 NAL 单元中,但规范要求它们也存在于 avcC 框中(一些播放器处理流中内联的 SPS/PPS 但这是一个不好的做法,因为它会破坏寻求什么,而不是因为你不知道哪些样本组参考了哪些预先设置的参数。
快速 google 搜索 libmp4v2 给了我这个 example 它显示了如何实际调用 MP4AddH264SequenceParameterSet
/MP4AddH264PictureParameterSet
来提供 SPS/PPS,而你只调用 MP4WriteSample
这可能是问题所在。
我的主观意见 - 我从未使用过 libmp4v2,但如果你也不知道如何使用它,请改用 ffmpeg - 更多示例和社区将会更大。将 H.264 Muxing 成 mp4 非常简单,网上也有很多例子。
摘要
- MP4 要求 SPS/PPS 信息位于
avcC
框中 - 如果将这些单元与样本放在一起,某些播放器可能能够解码流,但要符合规范, 应始终显示 avcC
框,否则播放器可能无法播放流。
- 根据所使用的库,可能有不同的技术如何将 SPS/PPS 信号发送给多路复用器,但正如此处使用 libmp4v2 所见,必须使用
P4AddH264SequenceParameterSet/MP4AddH264PictureParameterSet
。要获得 SPS/PPS 数据,应该解析比特流。这取决于比特流格式(如果使用具有起始代码的附件 b 格式或具有交错长度的 avcc 格式 - 请参阅 this 了解更多信息)。当提取 SPS/PPS 信息时,应该将其传递给 muxing 库。
- 小心处理 SPS/PPS 变化。该规范实际上指出您可以有多个
stsd
流描述框,然后引用它们,但据我所知,Windows Media Player 处理这个很差,所以如果可能的话坚持一个 SPS/PPS 设置。人们应该能够将编码器配置为不在每个关键帧上发出重复的 SPS/PPS 条目。
我正在使用 OpenH264 as encoder and I want to mux its output into a playable mp4 using libmp4v2
生成的 .mp4
仅部分起作用。它可以在 VLC and MPC-HC 中播放,但不能在 Windows 媒体播放器或 Windows 10 "Movie and Tv" 应用程序中播放。
我的目标是该文件适用于所有这些播放器。
两位 Windows 玩家都告诉我他们不知道编解码器所以他们无法播放:
这不是真的,因为我可以通过使用来自 cli:
ffmpeg -i "testenc.h264" -c:v copy -f mp4 "output.mp4"
根据这些知识,我认为我的编码过程工作正常,问题出在 muxing 过程中。
编辑:
感谢 Rudolfs Bundulis 的回答,他指出 SPS/PPS 数据丢失,我得以重组我的代码。它现在尝试通过分析编码器比特流并在必要时调用 MP4AddH264SequenceParameterSet
或 MP4AddH264PictureParameterSet
来包含丢失的数据。但是还是没有成功。
我的完整代码:
#include "stdafx.h"
#include <iostream>
#include <stdio.h>
#include <chrono>
#include "mp4v2/mp4v2.h"
#include "codec_api.h"
#define WIDTH 1280
#define HEIGHT 960
#define DURATION MP4_INVALID_DURATION
#define NAL_SPS 1
#define NAL_PPS 2
#define NAL_I 3
#define NAL_P 4
using namespace std;
using namespace chrono;
/* Just some dummy data to see artifacts ect */
void prepareFrame(int i, SSourcePicture* pic) {
for (int y = 0; y<HEIGHT; y++) {
for (int x = 0; x<WIDTH; x++) {
pic->pData[0][y * WIDTH + x] = x + y + i * 3;
}
}
for (int y = 0; y<HEIGHT / 2; y++) {
for (int x = 0; x<WIDTH / 2; x++) {
pic->pData[1][y * (WIDTH / 2) + x] = 128 + y + i * 2;
pic->pData[2][y * (WIDTH / 2) + x] = 64 + x + i * 5;
}
}
pic->uiTimeStamp = (i + 1) * 1000 / 75;
}
void printHex(const unsigned char* arr, int len) {
for (int i = 0; i < len; i++) {
if (arr[i] < 16) {
cout << "0";
}
cout << hex << (int)arr[i] << " ";
}
cout << endl;
}
void mp4Encode(MP4FileHandle mp4Handle, MP4TrackId track, uint8_t * bitstream, int length) {
int index = -1;
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x67) {
index = NAL_SPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x68) {
index = NAL_PPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x65) {
index = NAL_I;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x61) {
index = NAL_P;
}
switch (index) {
case NAL_SPS:
cout << "Detected SPS" << endl;
MP4AddH264SequenceParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_PPS:
cout << "Detected PPS" << endl;
MP4AddH264PictureParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_I:
{
cout << "Detected I" << endl;
uint8_t * IFrameData = (uint8_t *) malloc(length + 1);
IFrameData[0] = (length - 3) >> 24;
IFrameData[1] = (length - 3) >> 16;
IFrameData[2] = (length - 3) >> 8;
IFrameData[3] = (length - 3) & 0xff;
memcpy(IFrameData + 4, bitstream + 3, length - 3);
if (!MP4WriteSample(mp4Handle, track, IFrameData, length + 1, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
free(IFrameData);
break;
}
case NAL_P:
{
cout << "Detected P" << endl;
bitstream[0] = (length - 4) >> 24;
bitstream[1] = (length - 4) >> 16;
bitstream[2] = (length - 4) >> 8;
bitstream[3] = (length - 4) & 0xff;
if (!MP4WriteSample(mp4Handle, track, bitstream, length, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
break;
}
}
if (index == -1) {
cout << "Could not detect nal type" << endl;
system("pause");
exit(1);
}
}
int main()
{
//just to measure performance
high_resolution_clock::time_point time = high_resolution_clock::now();
//Create MP4
MP4FileHandle mp4Handle = MP4Create("test.mp4", 0);
MP4SetTimeScale(mp4Handle, 90000);
//Create filestream for binary h264 output for testing
FILE* targetFile;
targetFile = fopen("testenc.h264", "wb");
if (!targetFile) {
cout << "failed to create file" << endl;
system("pause");
return 1;
}
ISVCEncoder *encoder;
int rv = WelsCreateSVCEncoder(&encoder);
//Encoder params
SEncParamExt param;
encoder->GetDefaultParams(¶m);
param.iUsageType = CAMERA_VIDEO_REAL_TIME;
param.fMaxFrameRate = 75.f;
param.iLtrMarkPeriod = 75;
param.iPicWidth = WIDTH;
param.iPicHeight = HEIGHT;
param.iTargetBitrate = 40000000;
param.bEnableDenoise = false;
param.iSpatialLayerNum = 1;
param.bUseLoadBalancing = false;
param.bEnableSceneChangeDetect = false;
param.bEnableBackgroundDetection = false;
param.bEnableAdaptiveQuant = false;
param.bEnableFrameSkip = false;
param.iMultipleThreadIdc = 16;
//param.uiIntraPeriod = 10;
for (int i = 0; i < param.iSpatialLayerNum; i++) {
param.sSpatialLayers[i].iVideoWidth = WIDTH >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].iVideoHeight = HEIGHT >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].fFrameRate = 75.f;
param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
param.sSpatialLayers[i].uiProfileIdc = PRO_BASELINE;
param.sSpatialLayers[i].uiLevelIdc = LEVEL_4_2;
param.sSpatialLayers[i].iDLayerQp = 42;
SSliceArgument sliceArg;
sliceArg.uiSliceMode = SM_FIXEDSLCNUM_SLICE;
sliceArg.uiSliceNum = 16;
param.sSpatialLayers[i].sSliceArgument = sliceArg;
}
param.uiMaxNalSize = 1500;
param.iTargetBitrate *= param.iSpatialLayerNum;
encoder->InitializeExt(¶m);
int videoFormat = videoFormatI420;
encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &videoFormat);
MP4TrackId track = MP4AddH264VideoTrack(mp4Handle, 90000, 90000/25, WIDTH, HEIGHT, 66, 192, 42, 3);
MP4SetVideoProfileLevel(mp4Handle, 0x7f);
SFrameBSInfo info;
memset(&info, 0, sizeof(SFrameBSInfo));
SSourcePicture pic;
memset(&pic, 0, sizeof(SSourcePicture));
pic.iPicWidth = WIDTH;
pic.iPicHeight = HEIGHT;
pic.iColorFormat = videoFormatI420;
pic.iStride[0] = pic.iPicWidth;
pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
int frameSize = WIDTH * HEIGHT * 3 / 2;
pic.pData[0] = new unsigned char[frameSize];
pic.pData[1] = pic.pData[0] + WIDTH * HEIGHT;
pic.pData[2] = pic.pData[1] + (WIDTH * HEIGHT >> 2);
for (int num = 0; num<75; num++) {
cout << "-------FRAME " << dec << num << "-------" << endl;
prepareFrame(num, &pic);
rv = encoder->EncodeFrame(&pic, &info);
if (!rv == cmResultSuccess) {
cout << "encode failed" << endl;
continue;
}
if (info.eFrameType != videoFrameTypeSkip) {
for (int i = 0; i < info.iLayerNum; ++i) {
int len = 0;
const SLayerBSInfo& layerInfo = info.sLayerInfo[i];
for (int j = 0; j < layerInfo.iNalCount; ++j) {
cout << "Layer: " << dec << i << "| Nal: " << j << endl << "Hex: ";
printHex(info.sLayerInfo[i].pBsBuf + len, 20);
mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf + len, layerInfo.pNalLengthInByte[j]);
len += layerInfo.pNalLengthInByte[j];
}
//mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf, len);
}
//fwrite(info.sLayerInfo[0].pBsBuf, 1, len, targetFile);
}
}
int res = 0;
encoder->GetOption(ENCODER_OPTION_PROFILE, &res);
cout << res << endl;
fflush(targetFile);
fclose(targetFile);
encoder->Uninitialize();
WelsDestroySVCEncoder(encoder);
//Close MP4
MP4Close(mp4Handle);
cout << "done in: ";
cout << duration_cast<milliseconds>(high_resolution_clock::now() - time).count() << endl;
system("pause");
return 0;
}
您可以使用 GPAC 中的 MP4Box 来分析这两个文件的 MP4 框布局。
快速 google 搜索 libmp4v2 给了我这个 example 它显示了如何实际调用 MP4AddH264SequenceParameterSet
/MP4AddH264PictureParameterSet
来提供 SPS/PPS,而你只调用 MP4WriteSample
这可能是问题所在。
我的主观意见 - 我从未使用过 libmp4v2,但如果你也不知道如何使用它,请改用 ffmpeg - 更多示例和社区将会更大。将 H.264 Muxing 成 mp4 非常简单,网上也有很多例子。
摘要
- MP4 要求 SPS/PPS 信息位于
avcC
框中 - 如果将这些单元与样本放在一起,某些播放器可能能够解码流,但要符合规范, 应始终显示avcC
框,否则播放器可能无法播放流。 - 根据所使用的库,可能有不同的技术如何将 SPS/PPS 信号发送给多路复用器,但正如此处使用 libmp4v2 所见,必须使用
P4AddH264SequenceParameterSet/MP4AddH264PictureParameterSet
。要获得 SPS/PPS 数据,应该解析比特流。这取决于比特流格式(如果使用具有起始代码的附件 b 格式或具有交错长度的 avcc 格式 - 请参阅 this 了解更多信息)。当提取 SPS/PPS 信息时,应该将其传递给 muxing 库。 - 小心处理 SPS/PPS 变化。该规范实际上指出您可以有多个
stsd
流描述框,然后引用它们,但据我所知,Windows Media Player 处理这个很差,所以如果可能的话坚持一个 SPS/PPS 设置。人们应该能够将编码器配置为不在每个关键帧上发出重复的 SPS/PPS 条目。