DX11VideoRenderer 中的 ProcessFrameUsingD3D11() 与 ProcessFrameUsingXVP() 对比?
ProcessFrameUsingD3D11() vs ProcessFrameUsingXVP() in DX11VideoRenderer?
我正在尝试使用 Microsoft 示例 DX11VideoRenderer 渲染视频,该示例位于:https://github.com/Microsoft/Windows-classic-samples/tree/master/Samples/DX11VideoRenderer
从我的广泛研究来看,使用带有硬件加速的 DirectX 11 似乎是最新的方法(最不可能被弃用)并提供最佳性能解决方案。
Presenter.cpp 中有 2 个类似的函数处理帧,但我无法弄清楚它们之间的区别。 ProcessFrameUsingD3D11()
使用 VideoProcessorBlt()
实际进行渲染。神秘之处在于 ProcessFrameUsingXVP()
不使用此功能,那么它实际上是如何进行渲染的呢?还是它完全在做其他事情?此外,我的实现似乎使用 ProcessFrameUsingXVP()
基于变量 m_useXVP
的值,该值默认设置为“1”。这是代码示例:
if (m_useXVP)
{
BOOL bInputFrameUsed = FALSE;
hr = ProcessFrameUsingXVP( pCurrentType, pSample, pTexture2D, rcDest, ppOutputSample, &bInputFrameUsed );
if (SUCCEEDED(hr) && !bInputFrameUsed)
{
*pbProcessAgain = TRUE;
}
}
else
{
hr = ProcessFrameUsingD3D11( pTexture2D, pEVTexture2D, dwViewIndex, dwEVViewIndex, rcDest, *punInterlaceMode, ppOutputSample );
LONGLONG hnsDuration = 0;
LONGLONG hnsTime = 0;
DWORD dwSampleFlags = 0;
if (ppOutputSample != NULL && *ppOutputSample != NULL)
{
if (SUCCEEDED(pSample->GetSampleDuration(&hnsDuration)))
{
(*ppOutputSample)->SetSampleDuration(hnsDuration);
}
if (SUCCEEDED(pSample->GetSampleTime(&hnsTime)))
{
(*ppOutputSample)->SetSampleTime(hnsTime);
}
if (SUCCEEDED(pSample->GetSampleFlags(&dwSampleFlags)))
{
(*ppOutputSample)->SetSampleFlags(dwSampleFlags);
}
}
}
设置m_useXVP
的原因对我来说也是一个谜,我在研究中找不到答案。它使用的注册表项在我的特定 Windows10 PC 上不存在,因此该值未被修改。
const TCHAR* lpcszInVP = TEXT("XVP");
const TCHAR* lpcszREGKEY = TEXT("SOFTWARE\Microsoft\Scrunch\CodecPack\MSDVD");
if(0 == RegOpenKeyEx(HKEY_CURRENT_USER, lpcszREGKEY, 0, KEY_READ, &hk))
{
dwData = 0;
cbData = sizeof(DWORD);
if (0 == RegQueryValueEx(hk, lpcszInVP, 0, &cbType, (LPBYTE)&dwData, &cbData))
{
m_useXVP = dwData;
}
}
因此,由于我的电脑没有此密钥,代码默认使用 ProcessFrameUsingXVP()
。这是定义:
HRESULT DX11VideoRenderer::CPresenter::ProcessFrameUsingXVP(IMFMediaType* pCurrentType, IMFSample* pVideoFrame, ID3D11Texture2D* pTexture2D, RECT rcDest, IMFSample** ppVideoOutFrame, BOOL* pbInputFrameUsed)
{
HRESULT hr = S_OK;
ID3D11VideoContext* pVideoContext = NULL;
ID3D11Texture2D* pDXGIBackBuffer = NULL;
IMFSample* pRTSample = NULL;
IMFMediaBuffer* pBuffer = NULL;
IMFAttributes* pAttributes = NULL;
D3D11_VIDEO_PROCESSOR_CAPS vpCaps = { 0 };
do
{
if (!m_pDX11VideoDevice)
{
hr = m_pD3D11Device->QueryInterface(__uuidof(ID3D11VideoDevice), (void**)&m_pDX11VideoDevice);
if (FAILED(hr))
{
break;
}
}
hr = m_pD3DImmediateContext->QueryInterface(__uuidof(ID3D11VideoContext), (void**)&pVideoContext);
if (FAILED(hr))
{
break;
}
// remember the original rectangles
RECT TRectOld = m_rcDstApp;
RECT SRectOld = m_rcSrcApp;
UpdateRectangles(&TRectOld, &SRectOld);
//Update destination rect with current client rect
m_rcDstApp = rcDest;
D3D11_TEXTURE2D_DESC surfaceDesc;
pTexture2D->GetDesc(&surfaceDesc);
BOOL fTypeChanged = FALSE;
if (!m_pVideoProcessorEnum || !m_pSwapChain1 || m_imageWidthInPixels != surfaceDesc.Width || m_imageHeightInPixels != surfaceDesc.Height)
{
SafeRelease(m_pVideoProcessorEnum);
SafeRelease(m_pSwapChain1);
m_imageWidthInPixels = surfaceDesc.Width;
m_imageHeightInPixels = surfaceDesc.Height;
fTypeChanged = TRUE;
D3D11_VIDEO_PROCESSOR_CONTENT_DESC ContentDesc;
ZeroMemory(&ContentDesc, sizeof(ContentDesc));
ContentDesc.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
ContentDesc.InputWidth = surfaceDesc.Width;
ContentDesc.InputHeight = surfaceDesc.Height;
ContentDesc.OutputWidth = surfaceDesc.Width;
ContentDesc.OutputHeight = surfaceDesc.Height;
ContentDesc.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL;
hr = m_pDX11VideoDevice->CreateVideoProcessorEnumerator(&ContentDesc, &m_pVideoProcessorEnum);
if (FAILED(hr))
{
break;
}
m_rcSrcApp.left = 0;
m_rcSrcApp.top = 0;
m_rcSrcApp.right = m_uiRealDisplayWidth;
m_rcSrcApp.bottom = m_uiRealDisplayHeight;
if (m_b3DVideo)
{
hr = m_pVideoProcessorEnum->GetVideoProcessorCaps(&vpCaps);
if (FAILED(hr))
{
break;
}
if (vpCaps.FeatureCaps & D3D11_VIDEO_PROCESSOR_FEATURE_CAPS_STEREO)
{
m_bStereoEnabled = TRUE;
}
DXGI_MODE_DESC1 modeFilter = { 0 };
modeFilter.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
modeFilter.Width = surfaceDesc.Width;
modeFilter.Height = surfaceDesc.Height;
modeFilter.Stereo = m_bStereoEnabled;
DXGI_MODE_DESC1 matchedMode;
if (m_bFullScreenState)
{
hr = m_pDXGIOutput1->FindClosestMatchingMode1(&modeFilter, &matchedMode, m_pD3D11Device);
if (FAILED(hr))
{
break;
}
}
hr = m_pXVP->GetAttributes(&pAttributes);
if (FAILED(hr))
{
break;
}
hr = pAttributes->SetUINT32(MF_ENABLE_3DVIDEO_OUTPUT, (0 != m_vp3DOutput) ? MF3DVideoOutputType_Stereo : MF3DVideoOutputType_BaseView);
if (FAILED(hr))
{
break;
}
}
}
// now create the input and output media types - these need to reflect
// the src and destination rectangles that we have been given.
RECT TRect = m_rcDstApp;
RECT SRect = m_rcSrcApp;
UpdateRectangles(&TRect, &SRect);
const BOOL fDestRectChanged = !EqualRect(&TRect, &TRectOld);
const BOOL fSrcRectChanged = !EqualRect(&SRect, &SRectOld);
if (!m_pSwapChain1 || fDestRectChanged)
{
hr = UpdateDXGISwapChain();
if (FAILED(hr))
{
break;
}
}
if (fTypeChanged || fSrcRectChanged || fDestRectChanged)
{
// stop streaming to avoid multiple start\stop calls internally in XVP
hr = m_pXVP->ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
if (FAILED(hr))
{
break;
}
if (fTypeChanged)
{
hr = SetXVPOutputMediaType(pCurrentType, DXGI_FORMAT_B8G8R8A8_UNORM);
if (FAILED(hr))
{
break;
}
}
if (fDestRectChanged)
{
hr = m_pXVPControl->SetDestinationRectangle(&m_rcDstApp);
if (FAILED(hr))
{
break;
}
}
if (fSrcRectChanged)
{
hr = m_pXVPControl->SetSourceRectangle(&SRect);
if (FAILED(hr))
{
break;
}
}
hr = m_pXVP->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
if (FAILED(hr))
{
break;
}
}
m_bCanProcessNextSample = FALSE;
// Get Backbuffer
hr = m_pSwapChain1->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&pDXGIBackBuffer);
if (FAILED(hr))
{
break;
}
// create the output media sample
hr = MFCreateSample(&pRTSample);
if (FAILED(hr))
{
break;
}
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 0, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo && 0 != m_vp3DOutput)
{
SafeRelease(pBuffer);
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 1, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
}
DWORD dwStatus = 0;
MFT_OUTPUT_DATA_BUFFER outputDataBuffer = {};
outputDataBuffer.pSample = pRTSample;
hr = m_pXVP->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
{
//call process input on the MFT to deliver the YUV video sample
// and the call process output to extract of newly processed frame
hr = m_pXVP->ProcessInput(0, pVideoFrame, 0);
if (FAILED(hr))
{
break;
}
*pbInputFrameUsed = TRUE;
hr = m_pXVP->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
if (FAILED(hr))
{
break;
}
}
else
{
*pbInputFrameUsed = FALSE;
}
if (ppVideoOutFrame != NULL)
{
*ppVideoOutFrame = pRTSample;
(*ppVideoOutFrame)->AddRef();
}
} while (FALSE);
SafeRelease(pAttributes);
SafeRelease(pBuffer);
SafeRelease(pRTSample);
SafeRelease(pDXGIBackBuffer);
SafeRelease(pVideoContext);
return hr;
}
这里是 ProcessFrameUsingD3D11()
的定义:
HRESULT DX11VideoRenderer::CPresenter::ProcessFrameUsingD3D11( ID3D11Texture2D* pLeftTexture2D, ID3D11Texture2D* pRightTexture2D, UINT dwLeftViewIndex, UINT dwRightViewIndex,
RECT rcDest, UINT32 unInterlaceMode, IMFSample** ppVideoOutFrame )
{
HRESULT hr = S_OK;
ID3D11VideoContext* pVideoContext = NULL;
ID3D11VideoProcessorInputView* pLeftInputView = NULL;
ID3D11VideoProcessorInputView* pRightInputView = NULL;
ID3D11VideoProcessorOutputView* pOutputView = NULL;
ID3D11Texture2D* pDXGIBackBuffer = NULL;
ID3D11RenderTargetView* pRTView = NULL;
IMFSample* pRTSample = NULL;
IMFMediaBuffer* pBuffer = NULL;
D3D11_VIDEO_PROCESSOR_CAPS vpCaps = {0};
LARGE_INTEGER lpcStart,lpcEnd;
do
{
if (!m_pDX11VideoDevice)
{
hr = m_pD3D11Device->QueryInterface(__uuidof(ID3D11VideoDevice), (void**)&m_pDX11VideoDevice);
if (FAILED(hr))
{
break;
}
}
hr = m_pD3DImmediateContext->QueryInterface(__uuidof( ID3D11VideoContext ), (void**)&pVideoContext);
if (FAILED(hr))
{
break;
}
// remember the original rectangles
RECT TRectOld = m_rcDstApp;
RECT SRectOld = m_rcSrcApp;
UpdateRectangles(&TRectOld, &SRectOld);
//Update destination rect with current client rect
m_rcDstApp = rcDest;
D3D11_TEXTURE2D_DESC surfaceDesc;
pLeftTexture2D->GetDesc(&surfaceDesc);
if (!m_pVideoProcessorEnum || !m_pVideoProcessor || m_imageWidthInPixels != surfaceDesc.Width || m_imageHeightInPixels != surfaceDesc.Height)
{
SafeRelease(m_pVideoProcessorEnum);
SafeRelease(m_pVideoProcessor);
m_imageWidthInPixels = surfaceDesc.Width;
m_imageHeightInPixels = surfaceDesc.Height;
D3D11_VIDEO_PROCESSOR_CONTENT_DESC ContentDesc;
ZeroMemory( &ContentDesc, sizeof( ContentDesc ) );
ContentDesc.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
ContentDesc.InputWidth = surfaceDesc.Width;
ContentDesc.InputHeight = surfaceDesc.Height;
ContentDesc.OutputWidth = surfaceDesc.Width;
ContentDesc.OutputHeight = surfaceDesc.Height;
ContentDesc.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL;
hr = m_pDX11VideoDevice->CreateVideoProcessorEnumerator(&ContentDesc, &m_pVideoProcessorEnum);
if (FAILED(hr))
{
break;
}
UINT uiFlags;
DXGI_FORMAT VP_Output_Format = DXGI_FORMAT_B8G8R8A8_UNORM;
hr = m_pVideoProcessorEnum->CheckVideoProcessorFormat(VP_Output_Format, &uiFlags);
if (FAILED(hr) || 0 == (uiFlags & D3D11_VIDEO_PROCESSOR_FORMAT_SUPPORT_OUTPUT))
{
hr = MF_E_UNSUPPORTED_D3D_TYPE;
break;
}
m_rcSrcApp.left = 0;
m_rcSrcApp.top = 0;
m_rcSrcApp.right = m_uiRealDisplayWidth;
m_rcSrcApp.bottom = m_uiRealDisplayHeight;
DWORD index;
hr = FindBOBProcessorIndex(&index); // GG This may not be needed. BOB is something to do with deinterlacing.
if (FAILED(hr))
{
break;
}
hr = m_pDX11VideoDevice->CreateVideoProcessor(m_pVideoProcessorEnum, index, &m_pVideoProcessor);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo)
{
hr = m_pVideoProcessorEnum->GetVideoProcessorCaps(&vpCaps);
if (FAILED(hr))
{
break;
}
if (vpCaps.FeatureCaps & D3D11_VIDEO_PROCESSOR_FEATURE_CAPS_STEREO)
{
m_bStereoEnabled = TRUE;
}
DXGI_MODE_DESC1 modeFilter = { 0 };
modeFilter.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
modeFilter.Width = surfaceDesc.Width;
modeFilter.Height = surfaceDesc.Height;
modeFilter.Stereo = m_bStereoEnabled;
DXGI_MODE_DESC1 matchedMode;
if (m_bFullScreenState)
{
hr = m_pDXGIOutput1->FindClosestMatchingMode1(&modeFilter, &matchedMode, m_pD3D11Device);
if (FAILED(hr))
{
break;
}
}
}
}
// now create the input and output media types - these need to reflect
// the src and destination rectangles that we have been given.
RECT TRect = m_rcDstApp;
RECT SRect = m_rcSrcApp;
UpdateRectangles(&TRect, &SRect);
const BOOL fDestRectChanged = !EqualRect(&TRect, &TRectOld);
if (!m_pSwapChain1 || fDestRectChanged)
{
hr = UpdateDXGISwapChain();
if (FAILED(hr))
{
break;
}
}
m_bCanProcessNextSample = FALSE;
// Get Backbuffer
hr = m_pSwapChain1->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&pDXGIBackBuffer);
if (FAILED(hr))
{
break;
}
// create the output media sample
hr = MFCreateSample(&pRTSample);
if (FAILED(hr))
{
break;
}
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 0, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
// GG For 3D - don't need.
if (m_b3DVideo && 0 != m_vp3DOutput)
{
SafeRelease(pBuffer);
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 1, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
}
QueryPerformanceCounter(&lpcStart);
QueryPerformanceCounter(&lpcEnd);
//
// Create Output View of Output Surfaces.
//
D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC OutputViewDesc;
ZeroMemory( &OutputViewDesc, sizeof( OutputViewDesc ) );
if (m_b3DVideo && m_bStereoEnabled)
{
OutputViewDesc.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2DARRAY;
}
else
{
OutputViewDesc.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D;
}
OutputViewDesc.Texture2D.MipSlice = 0;
OutputViewDesc.Texture2DArray.MipSlice = 0;
OutputViewDesc.Texture2DArray.FirstArraySlice = 0;
if (m_b3DVideo && 0 != m_vp3DOutput)
{
OutputViewDesc.Texture2DArray.ArraySize = 2; // STEREO
}
QueryPerformanceCounter(&lpcStart);
hr = m_pDX11VideoDevice->CreateVideoProcessorOutputView(pDXGIBackBuffer, m_pVideoProcessorEnum, &OutputViewDesc, &pOutputView);
if (FAILED(hr))
{
break;
}
D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC InputLeftViewDesc;
ZeroMemory( &InputLeftViewDesc, sizeof( InputLeftViewDesc ) );
InputLeftViewDesc.FourCC = 0;
InputLeftViewDesc.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D;
InputLeftViewDesc.Texture2D.MipSlice = 0;
InputLeftViewDesc.Texture2D.ArraySlice = dwLeftViewIndex;
hr = m_pDX11VideoDevice->CreateVideoProcessorInputView(pLeftTexture2D, m_pVideoProcessorEnum, &InputLeftViewDesc, &pLeftInputView);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo && MFVideo3DSampleFormat_MultiView == m_vp3DOutput && pRightTexture2D)
{
D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC InputRightViewDesc;
ZeroMemory( &InputRightViewDesc, sizeof( InputRightViewDesc ) );
InputRightViewDesc.FourCC = 0;
InputRightViewDesc.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D;
InputRightViewDesc.Texture2D.MipSlice = 0;
InputRightViewDesc.Texture2D.ArraySlice = dwRightViewIndex;
hr = m_pDX11VideoDevice->CreateVideoProcessorInputView(pRightTexture2D, m_pVideoProcessorEnum, &InputRightViewDesc, &pRightInputView);
if (FAILED(hr))
{
break;
}
}
QueryPerformanceCounter(&lpcEnd);
QueryPerformanceCounter(&lpcStart);
SetVideoContextParameters(pVideoContext, &SRect, &TRect, unInterlaceMode);
// Enable/Disable Stereo
if (m_b3DVideo)
{
pVideoContext->VideoProcessorSetOutputStereoMode(m_pVideoProcessor, m_bStereoEnabled);
D3D11_VIDEO_PROCESSOR_STEREO_FORMAT vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_SEPARATE;
if (MFVideo3DSampleFormat_Packed_LeftRight == m_vp3DOutput)
{
vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_HORIZONTAL;
}
else if (MFVideo3DSampleFormat_Packed_TopBottom == m_vp3DOutput)
{
vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_VERTICAL;
}
pVideoContext->VideoProcessorSetStreamStereoFormat(m_pVideoProcessor,
0, m_bStereoEnabled, vpStereoFormat, TRUE, TRUE, D3D11_VIDEO_PROCESSOR_STEREO_FLIP_NONE, 0);
}
QueryPerformanceCounter(&lpcEnd);
QueryPerformanceCounter(&lpcStart);
D3D11_VIDEO_PROCESSOR_STREAM StreamData;
ZeroMemory( &StreamData, sizeof( StreamData ) );
StreamData.Enable = TRUE;
StreamData.OutputIndex = 0;
StreamData.InputFrameOrField = 0;
StreamData.PastFrames = 0;
StreamData.FutureFrames = 0;
StreamData.ppPastSurfaces = NULL;
StreamData.ppFutureSurfaces = NULL;
StreamData.pInputSurface = pLeftInputView;
StreamData.ppPastSurfacesRight = NULL;
StreamData.ppFutureSurfacesRight = NULL;
if (m_b3DVideo && MFVideo3DSampleFormat_MultiView == m_vp3DOutput && pRightTexture2D)
{
StreamData.pInputSurfaceRight = pRightInputView;
}
hr = pVideoContext->VideoProcessorBlt(m_pVideoProcessor, pOutputView, 0, 1, &StreamData );
if (FAILED(hr))
{
break;
}
QueryPerformanceCounter(&lpcEnd);
if (ppVideoOutFrame != NULL)
{
*ppVideoOutFrame = pRTSample;
(*ppVideoOutFrame)->AddRef();
}
}
while (FALSE);
SafeRelease(pBuffer);
SafeRelease(pRTSample);
SafeRelease(pDXGIBackBuffer);
SafeRelease(pOutputView);
SafeRelease(pLeftInputView);
SafeRelease(pRightInputView);
SafeRelease(pVideoContext);
return hr;
}
最后一点,文档指出:
Specifically, this sample shows how to:
- Decode the video using the Media Foundation APIs
- Render the decoded video using the DirectX 11 APIs
- Output the video stream to multi-monitor displays
除非通过一些我还没有偶然发现的 MF 魔法吟唱短语,否则我找不到任何可以解码的东西。但这不是一个障碍,因为我可以在前面安装一个 H.264 解码器 MFT 没问题。我只想澄清文档。
如有任何帮助,我们将不胜感激。谢谢!
There are 2 similar functions within Presenter.cpp that process frames but I cannot figure out what the difference is between them. ProcessFrameUsingD3D11()uses VideoProcessorBlt() to actually do the render.
这些函数不是渲染 - 它们是缩放视频帧的两种方法。缩放可以使用由渲染器的呈现器内部管理的随时可用的媒体基础转换来完成,或者缩放可以在 Direct3D 11 处理器的帮助下完成。实际上两者都是使用Direct3D 11,所以这两种方法彼此接近,只是渲染过程中的一个步骤。
I cannot find anything that does decoding unless by some MF magic chant phrase that I haven't stumbled across yet.
没有解码,StreamSink.cpp 中的接收器视频格式列表建议只列出未压缩的视频格式。渲染器呈现由 Direct3D 11 纹理携带的帧,这反过来又假定解码,尤其是。基于 DXVA2 的硬件解码器已经在渲染器输入上提供解码纹理。
我正在尝试使用 Microsoft 示例 DX11VideoRenderer 渲染视频,该示例位于:https://github.com/Microsoft/Windows-classic-samples/tree/master/Samples/DX11VideoRenderer 从我的广泛研究来看,使用带有硬件加速的 DirectX 11 似乎是最新的方法(最不可能被弃用)并提供最佳性能解决方案。
Presenter.cpp 中有 2 个类似的函数处理帧,但我无法弄清楚它们之间的区别。 ProcessFrameUsingD3D11()
使用 VideoProcessorBlt()
实际进行渲染。神秘之处在于 ProcessFrameUsingXVP()
不使用此功能,那么它实际上是如何进行渲染的呢?还是它完全在做其他事情?此外,我的实现似乎使用 ProcessFrameUsingXVP()
基于变量 m_useXVP
的值,该值默认设置为“1”。这是代码示例:
if (m_useXVP)
{
BOOL bInputFrameUsed = FALSE;
hr = ProcessFrameUsingXVP( pCurrentType, pSample, pTexture2D, rcDest, ppOutputSample, &bInputFrameUsed );
if (SUCCEEDED(hr) && !bInputFrameUsed)
{
*pbProcessAgain = TRUE;
}
}
else
{
hr = ProcessFrameUsingD3D11( pTexture2D, pEVTexture2D, dwViewIndex, dwEVViewIndex, rcDest, *punInterlaceMode, ppOutputSample );
LONGLONG hnsDuration = 0;
LONGLONG hnsTime = 0;
DWORD dwSampleFlags = 0;
if (ppOutputSample != NULL && *ppOutputSample != NULL)
{
if (SUCCEEDED(pSample->GetSampleDuration(&hnsDuration)))
{
(*ppOutputSample)->SetSampleDuration(hnsDuration);
}
if (SUCCEEDED(pSample->GetSampleTime(&hnsTime)))
{
(*ppOutputSample)->SetSampleTime(hnsTime);
}
if (SUCCEEDED(pSample->GetSampleFlags(&dwSampleFlags)))
{
(*ppOutputSample)->SetSampleFlags(dwSampleFlags);
}
}
}
设置m_useXVP
的原因对我来说也是一个谜,我在研究中找不到答案。它使用的注册表项在我的特定 Windows10 PC 上不存在,因此该值未被修改。
const TCHAR* lpcszInVP = TEXT("XVP");
const TCHAR* lpcszREGKEY = TEXT("SOFTWARE\Microsoft\Scrunch\CodecPack\MSDVD");
if(0 == RegOpenKeyEx(HKEY_CURRENT_USER, lpcszREGKEY, 0, KEY_READ, &hk))
{
dwData = 0;
cbData = sizeof(DWORD);
if (0 == RegQueryValueEx(hk, lpcszInVP, 0, &cbType, (LPBYTE)&dwData, &cbData))
{
m_useXVP = dwData;
}
}
因此,由于我的电脑没有此密钥,代码默认使用 ProcessFrameUsingXVP()
。这是定义:
HRESULT DX11VideoRenderer::CPresenter::ProcessFrameUsingXVP(IMFMediaType* pCurrentType, IMFSample* pVideoFrame, ID3D11Texture2D* pTexture2D, RECT rcDest, IMFSample** ppVideoOutFrame, BOOL* pbInputFrameUsed)
{
HRESULT hr = S_OK;
ID3D11VideoContext* pVideoContext = NULL;
ID3D11Texture2D* pDXGIBackBuffer = NULL;
IMFSample* pRTSample = NULL;
IMFMediaBuffer* pBuffer = NULL;
IMFAttributes* pAttributes = NULL;
D3D11_VIDEO_PROCESSOR_CAPS vpCaps = { 0 };
do
{
if (!m_pDX11VideoDevice)
{
hr = m_pD3D11Device->QueryInterface(__uuidof(ID3D11VideoDevice), (void**)&m_pDX11VideoDevice);
if (FAILED(hr))
{
break;
}
}
hr = m_pD3DImmediateContext->QueryInterface(__uuidof(ID3D11VideoContext), (void**)&pVideoContext);
if (FAILED(hr))
{
break;
}
// remember the original rectangles
RECT TRectOld = m_rcDstApp;
RECT SRectOld = m_rcSrcApp;
UpdateRectangles(&TRectOld, &SRectOld);
//Update destination rect with current client rect
m_rcDstApp = rcDest;
D3D11_TEXTURE2D_DESC surfaceDesc;
pTexture2D->GetDesc(&surfaceDesc);
BOOL fTypeChanged = FALSE;
if (!m_pVideoProcessorEnum || !m_pSwapChain1 || m_imageWidthInPixels != surfaceDesc.Width || m_imageHeightInPixels != surfaceDesc.Height)
{
SafeRelease(m_pVideoProcessorEnum);
SafeRelease(m_pSwapChain1);
m_imageWidthInPixels = surfaceDesc.Width;
m_imageHeightInPixels = surfaceDesc.Height;
fTypeChanged = TRUE;
D3D11_VIDEO_PROCESSOR_CONTENT_DESC ContentDesc;
ZeroMemory(&ContentDesc, sizeof(ContentDesc));
ContentDesc.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
ContentDesc.InputWidth = surfaceDesc.Width;
ContentDesc.InputHeight = surfaceDesc.Height;
ContentDesc.OutputWidth = surfaceDesc.Width;
ContentDesc.OutputHeight = surfaceDesc.Height;
ContentDesc.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL;
hr = m_pDX11VideoDevice->CreateVideoProcessorEnumerator(&ContentDesc, &m_pVideoProcessorEnum);
if (FAILED(hr))
{
break;
}
m_rcSrcApp.left = 0;
m_rcSrcApp.top = 0;
m_rcSrcApp.right = m_uiRealDisplayWidth;
m_rcSrcApp.bottom = m_uiRealDisplayHeight;
if (m_b3DVideo)
{
hr = m_pVideoProcessorEnum->GetVideoProcessorCaps(&vpCaps);
if (FAILED(hr))
{
break;
}
if (vpCaps.FeatureCaps & D3D11_VIDEO_PROCESSOR_FEATURE_CAPS_STEREO)
{
m_bStereoEnabled = TRUE;
}
DXGI_MODE_DESC1 modeFilter = { 0 };
modeFilter.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
modeFilter.Width = surfaceDesc.Width;
modeFilter.Height = surfaceDesc.Height;
modeFilter.Stereo = m_bStereoEnabled;
DXGI_MODE_DESC1 matchedMode;
if (m_bFullScreenState)
{
hr = m_pDXGIOutput1->FindClosestMatchingMode1(&modeFilter, &matchedMode, m_pD3D11Device);
if (FAILED(hr))
{
break;
}
}
hr = m_pXVP->GetAttributes(&pAttributes);
if (FAILED(hr))
{
break;
}
hr = pAttributes->SetUINT32(MF_ENABLE_3DVIDEO_OUTPUT, (0 != m_vp3DOutput) ? MF3DVideoOutputType_Stereo : MF3DVideoOutputType_BaseView);
if (FAILED(hr))
{
break;
}
}
}
// now create the input and output media types - these need to reflect
// the src and destination rectangles that we have been given.
RECT TRect = m_rcDstApp;
RECT SRect = m_rcSrcApp;
UpdateRectangles(&TRect, &SRect);
const BOOL fDestRectChanged = !EqualRect(&TRect, &TRectOld);
const BOOL fSrcRectChanged = !EqualRect(&SRect, &SRectOld);
if (!m_pSwapChain1 || fDestRectChanged)
{
hr = UpdateDXGISwapChain();
if (FAILED(hr))
{
break;
}
}
if (fTypeChanged || fSrcRectChanged || fDestRectChanged)
{
// stop streaming to avoid multiple start\stop calls internally in XVP
hr = m_pXVP->ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
if (FAILED(hr))
{
break;
}
if (fTypeChanged)
{
hr = SetXVPOutputMediaType(pCurrentType, DXGI_FORMAT_B8G8R8A8_UNORM);
if (FAILED(hr))
{
break;
}
}
if (fDestRectChanged)
{
hr = m_pXVPControl->SetDestinationRectangle(&m_rcDstApp);
if (FAILED(hr))
{
break;
}
}
if (fSrcRectChanged)
{
hr = m_pXVPControl->SetSourceRectangle(&SRect);
if (FAILED(hr))
{
break;
}
}
hr = m_pXVP->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
if (FAILED(hr))
{
break;
}
}
m_bCanProcessNextSample = FALSE;
// Get Backbuffer
hr = m_pSwapChain1->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&pDXGIBackBuffer);
if (FAILED(hr))
{
break;
}
// create the output media sample
hr = MFCreateSample(&pRTSample);
if (FAILED(hr))
{
break;
}
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 0, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo && 0 != m_vp3DOutput)
{
SafeRelease(pBuffer);
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 1, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
}
DWORD dwStatus = 0;
MFT_OUTPUT_DATA_BUFFER outputDataBuffer = {};
outputDataBuffer.pSample = pRTSample;
hr = m_pXVP->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
{
//call process input on the MFT to deliver the YUV video sample
// and the call process output to extract of newly processed frame
hr = m_pXVP->ProcessInput(0, pVideoFrame, 0);
if (FAILED(hr))
{
break;
}
*pbInputFrameUsed = TRUE;
hr = m_pXVP->ProcessOutput(0, 1, &outputDataBuffer, &dwStatus);
if (FAILED(hr))
{
break;
}
}
else
{
*pbInputFrameUsed = FALSE;
}
if (ppVideoOutFrame != NULL)
{
*ppVideoOutFrame = pRTSample;
(*ppVideoOutFrame)->AddRef();
}
} while (FALSE);
SafeRelease(pAttributes);
SafeRelease(pBuffer);
SafeRelease(pRTSample);
SafeRelease(pDXGIBackBuffer);
SafeRelease(pVideoContext);
return hr;
}
这里是 ProcessFrameUsingD3D11()
的定义:
HRESULT DX11VideoRenderer::CPresenter::ProcessFrameUsingD3D11( ID3D11Texture2D* pLeftTexture2D, ID3D11Texture2D* pRightTexture2D, UINT dwLeftViewIndex, UINT dwRightViewIndex,
RECT rcDest, UINT32 unInterlaceMode, IMFSample** ppVideoOutFrame )
{
HRESULT hr = S_OK;
ID3D11VideoContext* pVideoContext = NULL;
ID3D11VideoProcessorInputView* pLeftInputView = NULL;
ID3D11VideoProcessorInputView* pRightInputView = NULL;
ID3D11VideoProcessorOutputView* pOutputView = NULL;
ID3D11Texture2D* pDXGIBackBuffer = NULL;
ID3D11RenderTargetView* pRTView = NULL;
IMFSample* pRTSample = NULL;
IMFMediaBuffer* pBuffer = NULL;
D3D11_VIDEO_PROCESSOR_CAPS vpCaps = {0};
LARGE_INTEGER lpcStart,lpcEnd;
do
{
if (!m_pDX11VideoDevice)
{
hr = m_pD3D11Device->QueryInterface(__uuidof(ID3D11VideoDevice), (void**)&m_pDX11VideoDevice);
if (FAILED(hr))
{
break;
}
}
hr = m_pD3DImmediateContext->QueryInterface(__uuidof( ID3D11VideoContext ), (void**)&pVideoContext);
if (FAILED(hr))
{
break;
}
// remember the original rectangles
RECT TRectOld = m_rcDstApp;
RECT SRectOld = m_rcSrcApp;
UpdateRectangles(&TRectOld, &SRectOld);
//Update destination rect with current client rect
m_rcDstApp = rcDest;
D3D11_TEXTURE2D_DESC surfaceDesc;
pLeftTexture2D->GetDesc(&surfaceDesc);
if (!m_pVideoProcessorEnum || !m_pVideoProcessor || m_imageWidthInPixels != surfaceDesc.Width || m_imageHeightInPixels != surfaceDesc.Height)
{
SafeRelease(m_pVideoProcessorEnum);
SafeRelease(m_pVideoProcessor);
m_imageWidthInPixels = surfaceDesc.Width;
m_imageHeightInPixels = surfaceDesc.Height;
D3D11_VIDEO_PROCESSOR_CONTENT_DESC ContentDesc;
ZeroMemory( &ContentDesc, sizeof( ContentDesc ) );
ContentDesc.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
ContentDesc.InputWidth = surfaceDesc.Width;
ContentDesc.InputHeight = surfaceDesc.Height;
ContentDesc.OutputWidth = surfaceDesc.Width;
ContentDesc.OutputHeight = surfaceDesc.Height;
ContentDesc.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL;
hr = m_pDX11VideoDevice->CreateVideoProcessorEnumerator(&ContentDesc, &m_pVideoProcessorEnum);
if (FAILED(hr))
{
break;
}
UINT uiFlags;
DXGI_FORMAT VP_Output_Format = DXGI_FORMAT_B8G8R8A8_UNORM;
hr = m_pVideoProcessorEnum->CheckVideoProcessorFormat(VP_Output_Format, &uiFlags);
if (FAILED(hr) || 0 == (uiFlags & D3D11_VIDEO_PROCESSOR_FORMAT_SUPPORT_OUTPUT))
{
hr = MF_E_UNSUPPORTED_D3D_TYPE;
break;
}
m_rcSrcApp.left = 0;
m_rcSrcApp.top = 0;
m_rcSrcApp.right = m_uiRealDisplayWidth;
m_rcSrcApp.bottom = m_uiRealDisplayHeight;
DWORD index;
hr = FindBOBProcessorIndex(&index); // GG This may not be needed. BOB is something to do with deinterlacing.
if (FAILED(hr))
{
break;
}
hr = m_pDX11VideoDevice->CreateVideoProcessor(m_pVideoProcessorEnum, index, &m_pVideoProcessor);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo)
{
hr = m_pVideoProcessorEnum->GetVideoProcessorCaps(&vpCaps);
if (FAILED(hr))
{
break;
}
if (vpCaps.FeatureCaps & D3D11_VIDEO_PROCESSOR_FEATURE_CAPS_STEREO)
{
m_bStereoEnabled = TRUE;
}
DXGI_MODE_DESC1 modeFilter = { 0 };
modeFilter.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
modeFilter.Width = surfaceDesc.Width;
modeFilter.Height = surfaceDesc.Height;
modeFilter.Stereo = m_bStereoEnabled;
DXGI_MODE_DESC1 matchedMode;
if (m_bFullScreenState)
{
hr = m_pDXGIOutput1->FindClosestMatchingMode1(&modeFilter, &matchedMode, m_pD3D11Device);
if (FAILED(hr))
{
break;
}
}
}
}
// now create the input and output media types - these need to reflect
// the src and destination rectangles that we have been given.
RECT TRect = m_rcDstApp;
RECT SRect = m_rcSrcApp;
UpdateRectangles(&TRect, &SRect);
const BOOL fDestRectChanged = !EqualRect(&TRect, &TRectOld);
if (!m_pSwapChain1 || fDestRectChanged)
{
hr = UpdateDXGISwapChain();
if (FAILED(hr))
{
break;
}
}
m_bCanProcessNextSample = FALSE;
// Get Backbuffer
hr = m_pSwapChain1->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&pDXGIBackBuffer);
if (FAILED(hr))
{
break;
}
// create the output media sample
hr = MFCreateSample(&pRTSample);
if (FAILED(hr))
{
break;
}
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 0, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
// GG For 3D - don't need.
if (m_b3DVideo && 0 != m_vp3DOutput)
{
SafeRelease(pBuffer);
hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pDXGIBackBuffer, 1, FALSE, &pBuffer);
if (FAILED(hr))
{
break;
}
hr = pRTSample->AddBuffer(pBuffer);
if (FAILED(hr))
{
break;
}
}
QueryPerformanceCounter(&lpcStart);
QueryPerformanceCounter(&lpcEnd);
//
// Create Output View of Output Surfaces.
//
D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC OutputViewDesc;
ZeroMemory( &OutputViewDesc, sizeof( OutputViewDesc ) );
if (m_b3DVideo && m_bStereoEnabled)
{
OutputViewDesc.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2DARRAY;
}
else
{
OutputViewDesc.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D;
}
OutputViewDesc.Texture2D.MipSlice = 0;
OutputViewDesc.Texture2DArray.MipSlice = 0;
OutputViewDesc.Texture2DArray.FirstArraySlice = 0;
if (m_b3DVideo && 0 != m_vp3DOutput)
{
OutputViewDesc.Texture2DArray.ArraySize = 2; // STEREO
}
QueryPerformanceCounter(&lpcStart);
hr = m_pDX11VideoDevice->CreateVideoProcessorOutputView(pDXGIBackBuffer, m_pVideoProcessorEnum, &OutputViewDesc, &pOutputView);
if (FAILED(hr))
{
break;
}
D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC InputLeftViewDesc;
ZeroMemory( &InputLeftViewDesc, sizeof( InputLeftViewDesc ) );
InputLeftViewDesc.FourCC = 0;
InputLeftViewDesc.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D;
InputLeftViewDesc.Texture2D.MipSlice = 0;
InputLeftViewDesc.Texture2D.ArraySlice = dwLeftViewIndex;
hr = m_pDX11VideoDevice->CreateVideoProcessorInputView(pLeftTexture2D, m_pVideoProcessorEnum, &InputLeftViewDesc, &pLeftInputView);
if (FAILED(hr))
{
break;
}
if (m_b3DVideo && MFVideo3DSampleFormat_MultiView == m_vp3DOutput && pRightTexture2D)
{
D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC InputRightViewDesc;
ZeroMemory( &InputRightViewDesc, sizeof( InputRightViewDesc ) );
InputRightViewDesc.FourCC = 0;
InputRightViewDesc.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D;
InputRightViewDesc.Texture2D.MipSlice = 0;
InputRightViewDesc.Texture2D.ArraySlice = dwRightViewIndex;
hr = m_pDX11VideoDevice->CreateVideoProcessorInputView(pRightTexture2D, m_pVideoProcessorEnum, &InputRightViewDesc, &pRightInputView);
if (FAILED(hr))
{
break;
}
}
QueryPerformanceCounter(&lpcEnd);
QueryPerformanceCounter(&lpcStart);
SetVideoContextParameters(pVideoContext, &SRect, &TRect, unInterlaceMode);
// Enable/Disable Stereo
if (m_b3DVideo)
{
pVideoContext->VideoProcessorSetOutputStereoMode(m_pVideoProcessor, m_bStereoEnabled);
D3D11_VIDEO_PROCESSOR_STEREO_FORMAT vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_SEPARATE;
if (MFVideo3DSampleFormat_Packed_LeftRight == m_vp3DOutput)
{
vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_HORIZONTAL;
}
else if (MFVideo3DSampleFormat_Packed_TopBottom == m_vp3DOutput)
{
vpStereoFormat = D3D11_VIDEO_PROCESSOR_STEREO_FORMAT_VERTICAL;
}
pVideoContext->VideoProcessorSetStreamStereoFormat(m_pVideoProcessor,
0, m_bStereoEnabled, vpStereoFormat, TRUE, TRUE, D3D11_VIDEO_PROCESSOR_STEREO_FLIP_NONE, 0);
}
QueryPerformanceCounter(&lpcEnd);
QueryPerformanceCounter(&lpcStart);
D3D11_VIDEO_PROCESSOR_STREAM StreamData;
ZeroMemory( &StreamData, sizeof( StreamData ) );
StreamData.Enable = TRUE;
StreamData.OutputIndex = 0;
StreamData.InputFrameOrField = 0;
StreamData.PastFrames = 0;
StreamData.FutureFrames = 0;
StreamData.ppPastSurfaces = NULL;
StreamData.ppFutureSurfaces = NULL;
StreamData.pInputSurface = pLeftInputView;
StreamData.ppPastSurfacesRight = NULL;
StreamData.ppFutureSurfacesRight = NULL;
if (m_b3DVideo && MFVideo3DSampleFormat_MultiView == m_vp3DOutput && pRightTexture2D)
{
StreamData.pInputSurfaceRight = pRightInputView;
}
hr = pVideoContext->VideoProcessorBlt(m_pVideoProcessor, pOutputView, 0, 1, &StreamData );
if (FAILED(hr))
{
break;
}
QueryPerformanceCounter(&lpcEnd);
if (ppVideoOutFrame != NULL)
{
*ppVideoOutFrame = pRTSample;
(*ppVideoOutFrame)->AddRef();
}
}
while (FALSE);
SafeRelease(pBuffer);
SafeRelease(pRTSample);
SafeRelease(pDXGIBackBuffer);
SafeRelease(pOutputView);
SafeRelease(pLeftInputView);
SafeRelease(pRightInputView);
SafeRelease(pVideoContext);
return hr;
}
最后一点,文档指出:
Specifically, this sample shows how to:
- Decode the video using the Media Foundation APIs
- Render the decoded video using the DirectX 11 APIs
- Output the video stream to multi-monitor displays
除非通过一些我还没有偶然发现的 MF 魔法吟唱短语,否则我找不到任何可以解码的东西。但这不是一个障碍,因为我可以在前面安装一个 H.264 解码器 MFT 没问题。我只想澄清文档。
如有任何帮助,我们将不胜感激。谢谢!
There are 2 similar functions within Presenter.cpp that process frames but I cannot figure out what the difference is between them. ProcessFrameUsingD3D11()uses VideoProcessorBlt() to actually do the render.
这些函数不是渲染 - 它们是缩放视频帧的两种方法。缩放可以使用由渲染器的呈现器内部管理的随时可用的媒体基础转换来完成,或者缩放可以在 Direct3D 11 处理器的帮助下完成。实际上两者都是使用Direct3D 11,所以这两种方法彼此接近,只是渲染过程中的一个步骤。
I cannot find anything that does decoding unless by some MF magic chant phrase that I haven't stumbled across yet.
没有解码,StreamSink.cpp 中的接收器视频格式列表建议只列出未压缩的视频格式。渲染器呈现由 Direct3D 11 纹理携带的帧,这反过来又假定解码,尤其是。基于 DXVA2 的硬件解码器已经在渲染器输入上提供解码纹理。