IMFTransform::SetInputType() 'MF_E_TRANSFORM_TYPE_NOT_SET '

IMFTransform::SetInputType() 'MF_E_TRANSFORM_TYPE_NOT_SET '

我正在尝试设置编码器 IMFTransform。在这种情况下,它恰好是 NVENC H264 Encoder。但是,IMFTransform::SetInputType() 不适用于 ConfigureEncoder(),并返回 MF_E_TRANSFORM_TYPE_NOT_SET。尽管代码几乎相同,但 ConfigureColorConversion() 函数中不会出现此行为。我什么都试过了,一点用都没有。

Full project link

// libs
#pragma comment(lib, "D3D11.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "evr.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "Winmm.lib")
// std
#include <iostream>
#include <string>
// Windows
#include <windows.h>
#include <atlbase.h>
// DirectX
#include <d3d11.h>
// Media Foundation
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <Codecapi.h>
// Custom
#include "DDAImpl.h"
#include "Defs.h"

// Constants
constexpr UINT ENCODE_WIDTH = 1920;
constexpr UINT ENCODE_HEIGHT = 1080;

CComPtr<IMFVideoSampleAllocatorEx> allocator;

CComPtr<IMFAttributes> transformAttrs;
CComQIPtr<IMFMediaEventGenerator> eventGen;

HRESULT InitMF()
{
    HRESULT hr = S_OK;
    if (FAILED(hr = CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED)))
        return hr;
    if (FAILED(hr = MFStartup(MF_VERSION)))
        return hr;

    std::cout << "- Initialized Media Foundation" << std::endl;

    return hr;
}

HRESULT InitDXGI(CComPtr<ID3D11Device>& outDevice, CComPtr<ID3D11DeviceContext>& inContext)
{
    HRESULT hr = S_OK;

    if (FAILED(hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT |
        D3D11_CREATE_DEVICE_DEBUG, nullptr, 0, D3D11_SDK_VERSION, &outDevice, nullptr, &inContext)))
        return hr;

    // Probably not necessary in this application, but maybe the MFT requires it?
    CComQIPtr<ID3D10Multithread> mt(outDevice);
    mt->SetMultithreadProtected(TRUE);

    std::cout << "- Initialized DXGI" << std::endl;

    return hr;
}

HRESULT GetEncoder(const CComPtr<ID3D11Device>& inDevice, CComPtr<IMFTransform>& outTransform, CComPtr<IMFActivate>& outActivate)
{
    HRESULT hr = S_OK;
    // Find the encoder
    CComHeapPtr<IMFActivate*> activateRaw;
    UINT32 activateCount = 0;

    // Input & output types
    MFT_REGISTER_TYPE_INFO inInfo = { MFMediaType_Video, MFVideoFormat_NV12 };
    MFT_REGISTER_TYPE_INFO outInfo = { MFMediaType_Video, MFVideoFormat_H264 };

    // Query for the adapter LUID to get a matching encoder for the device.
    CComQIPtr<IDXGIDevice> dxgiDevice(inDevice);

    CComPtr<IDXGIAdapter> adapter;
    if (FAILED(hr = dxgiDevice->GetAdapter(&adapter)))
        return hr;

    DXGI_ADAPTER_DESC adapterDesc;
    if (FAILED(hr = adapter->GetDesc(&adapterDesc)))
        return hr;

    CComPtr<IMFAttributes> enumAttrs;
    if (FAILED(hr = MFCreateAttributes(&enumAttrs, 1)))
        return hr;

    if (FAILED(hr = enumAttrs->SetBlob(MFT_ENUM_ADAPTER_LUID, (BYTE*)&adapterDesc.AdapterLuid, sizeof(LUID))))
        return hr;

    if (FAILED(hr = MFTEnum2(MFT_CATEGORY_VIDEO_ENCODER, MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER, &inInfo, &outInfo,
        enumAttrs, &activateRaw, &activateCount)))
        return hr;

    // Choose the first returned encoder
    outActivate = activateRaw[0];

    // Memory management - this is wrong
    /*for (UINT32 i = 0; i < activateCount; i++)
        activateRaw[i]->Release();*/

    // Activate
    if (FAILED(hr = outActivate->ActivateObject(IID_PPV_ARGS(&outTransform))))
        return hr;

    // Get attributes
    if (FAILED(hr = outTransform->GetAttributes(&transformAttrs)))
        return hr;

    std::cout << "- GetEncoder() Found " << activateCount << " encoders" << std::endl;

    return hr;
}

HRESULT ConfigureEncoder(CComPtr<IMFTransform>& inTransform, CComPtr<IMFDXGIDeviceManager>& inDeviceManager, DWORD inInputStreamID,
    DWORD outputStreamID
)
{
    HRESULT hr = S_OK;

    // Sets or clears the Direct3D Device Manager for DirectX Video Accereration (DXVA).
    if (FAILED(hr = inTransform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(inDeviceManager.p))))
        return hr;

    // Create output type
    CComPtr<IMFMediaType> outputType;
    if (FAILED(hr = MFCreateMediaType(&outputType)))
        return hr;

    // Set output type
    if (FAILED(hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
        return hr;
    if (FAILED(hr = outputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264)))
        return hr;
    if (FAILED(hr = outputType->SetUINT32(MF_MT_AVG_BITRATE, 30000000)))
        return hr;
    if (FAILED(hr = MFSetAttributeSize(outputType, MF_MT_FRAME_SIZE, ENCODE_WIDTH, ENCODE_HEIGHT)))
        return hr;
    if (FAILED(hr = MFSetAttributeRatio(outputType, MF_MT_FRAME_RATE, 60, 1)))
        return hr;
    if (FAILED(hr = outputType->SetUINT32(MF_MT_INTERLACE_MODE, 2)))
        return hr;
    if (FAILED(hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, true)))
        return hr;

    // Set output type
    if (FAILED(hr = inTransform->SetOutputType(outputStreamID, outputType, 0)))
        return hr;

    // Input type, I have no idea how to do this
    CComPtr<IMFMediaType> inputType;
    /*if (FAILED(hr = MFCreateMediaType(&inputType)))
        return hr;*/

    if (FAILED(hr = inTransform->GetInputAvailableType(inInputStreamID, 0, &inputType)))
        return hr;

    // Input type settings
    if (FAILED(hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
        return hr;
    if (FAILED(hr = inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
        return hr;
    if (FAILED(hr = MFSetAttributeSize(inputType, MF_MT_FRAME_SIZE, ENCODE_WIDTH, ENCODE_HEIGHT)))
        return hr;
    if (FAILED(hr = MFSetAttributeRatio(inputType, MF_MT_FRAME_RATE, 60, 1)))
        return hr;

    // Set input type
    if (FAILED(hr = inTransform->SetInputType(inInputStreamID, inputType, 0)))
        return hr;

    std::cout << "- Set encoder configuration" << std::endl;

    DWORD flags;
    if (FAILED(hr = inTransform->GetInputStatus(0, &flags)))
        return hr;
}

HRESULT ConfigureColorConversion(IMFTransform* m_pXVP)
{
    HRESULT hr = S_OK;

    CComPtr<IMFMediaType> inputType;
    if (FAILED(hr = MFCreateMediaType(&inputType)))
        return hr;
    if (FAILED(hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
        return hr;
    if (FAILED(hr = inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32)))
        return hr;
    if (FAILED(hr = inputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 1)))
        return hr;
    if (FAILED(hr = MFSetAttributeSize(inputType, MF_MT_FRAME_SIZE, 3840, 2160)))
        return hr;
    if (FAILED(hr = MFSetAttributeRatio(inputType, MF_MT_FRAME_RATE, 1, 1)))
        return hr;

    if (FAILED(hr = m_pXVP->SetInputType(0, inputType, 0)))
        return hr;

    CComPtr<IMFMediaType> outputType;
    if (FAILED(hr = MFCreateMediaType(&outputType)))
        return hr;
    if (FAILED(hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
        return hr;
    if (FAILED(hr = outputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
        return hr;
    if (FAILED(hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 1)))
        return hr;
    if (FAILED(hr = MFSetAttributeSize(outputType, MF_MT_FRAME_SIZE, 3840, 2160)))
        return hr;

    if (FAILED(hr = m_pXVP->SetOutputType(0, outputType, 0)))
        return hr;

    return hr;
}

HRESULT ColorConvert(IMFTransform* inTransform, ID3D11Texture2D* inTexture, IMFSample** pSampleOut)
{
    HRESULT hr = S_OK;

    CD3D11_TEXTURE2D_DESC Desc;
    inTexture->GetDesc(&Desc);
    CComPtr<ID3D11Device> Device;
    inTexture->GetDevice(&Device);
    CD3D11_TEXTURE2D_DESC CopyDesc(Desc.Format, Desc.Width, Desc.Height, 1, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET);
    CComPtr<ID3D11Texture2D> CopyTexture;
    Device->CreateTexture2D(&CopyDesc, nullptr, &CopyTexture);
    CComPtr<ID3D11DeviceContext> DeviceContext;
    Device->GetImmediateContext(&DeviceContext);
    DeviceContext->CopyResource(CopyTexture, inTexture);
    inTexture = CopyTexture;

    // Create buffer
    CComPtr<IMFMediaBuffer> inputBuffer;
    if (FAILED(hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), inTexture, 0, false, &inputBuffer)))
        return hr;

    // Create sample
    CComPtr<IMFSample> inputSample;
    if (FAILED(hr = MFCreateSample(&inputSample)))
        return hr;
    if (FAILED(hr = inputSample->AddBuffer(inputBuffer)))
        return hr;

    // Set input sample times
    if (FAILED(hr = inputSample->SetSampleTime(0)))
        return hr;
    if (FAILED(hr = inputSample->SetSampleDuration(1)))
        return hr;

    // Process input
    if (FAILED(hr = inTransform->ProcessInput(0, inputSample, 0)))
        return hr;

    // Process output
    DWORD status;
    MFT_OUTPUT_DATA_BUFFER outputBuffer;
    outputBuffer.pSample = nullptr;
    outputBuffer.pEvents = nullptr;
    outputBuffer.dwStreamID = 0;
    outputBuffer.dwStatus = 0;

    MFT_OUTPUT_STREAM_INFO mftStreamInfo;
    ZeroMemory(&mftStreamInfo, sizeof(MFT_OUTPUT_STREAM_INFO));

    if (FAILED(hr = inTransform->GetOutputStreamInfo(0, &mftStreamInfo)))
        return hr;

    ATLASSERT(mftStreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES);

    if (FAILED(hr = inTransform->ProcessOutput(0, 1, &outputBuffer, &status)))
        return hr;

    *pSampleOut = outputBuffer.pSample;

    // Test output to file
    IMFMediaBuffer* buffer;
    if (FAILED(hr = outputBuffer.pSample->ConvertToContiguousBuffer(&buffer)))
        return hr;

    unsigned char* data;
    DWORD length;
    if (FAILED(hr = buffer->GetCurrentLength(&length)))
        return hr;

    if (FAILED(hr = buffer->Lock(&data, nullptr, &length)))
        return hr;

    std::ofstream fout;
    fout.open("raw_pixels", std::ios::binary | std::ios::out);
    fout.write((char*)data, length);
    fout.close();

    return hr;
}

HRESULT Encode(IMFTransform* inTransform, IMFSample* inpSample)
{
    HRESULT hr = S_OK;

    // Process input
    if (FAILED(hr = inTransform->ProcessInput(0, inpSample, 0)))
        return hr;

    // Process output
    DWORD status;
    MFT_OUTPUT_DATA_BUFFER outputBuffer;
    outputBuffer.pSample = nullptr;
    outputBuffer.pEvents = nullptr;
    outputBuffer.dwStreamID = 0;
    outputBuffer.dwStatus = 0;

    MFT_OUTPUT_STREAM_INFO mftStreamInfo;
    ZeroMemory(&mftStreamInfo, sizeof(MFT_OUTPUT_STREAM_INFO));

    if (FAILED(hr = inTransform->GetOutputStreamInfo(0, &mftStreamInfo)))
        return hr;

    ATLASSERT(mftStreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES);

    if (FAILED(hr = inTransform->ProcessOutput(0, 1, &outputBuffer, &status)))
        return hr;
}

int main()
{
    HRESULT hr;

    if (FAILED(hr = InitMF()))
        return hr;

    // Initialize DXGI
    CComPtr<ID3D11Device> device;
    CComPtr<ID3D11DeviceContext> context;
    if (FAILED(hr = InitDXGI(device, context)))
        return hr;

    // Create device manager
    CComPtr<IMFDXGIDeviceManager> deviceManager;
    UINT resetToken;
    if (FAILED(hr = MFCreateDXGIDeviceManager(&resetToken, &deviceManager)))
        return hr;

    // https://docs.microsoft.com/en-us/windows/win32/api/dxva2api/nf-dxva2api-idirect3ddevicemanager9-resetdevice
    // When you first create the Direct3D device manager, call this method with a pointer to the Direct3D device.
    if (FAILED(hr = deviceManager->ResetDevice(device, resetToken)))
        return hr;

    // Get encoder
    CComPtr<IMFTransform> transform;
    CComPtr<IMFActivate> activate;
    if (FAILED(hr = GetEncoder(device, transform, activate)))
        return hr;

    // Get the name of the encoder
    CComHeapPtr<WCHAR> friendlyName;
    UINT32 friendlyNameLength;
    if (FAILED(hr = activate->GetAllocatedString(MFT_FRIENDLY_NAME_Attribute, &friendlyName, &friendlyNameLength)))
        return hr;
    std::wcout << "- Selected encoder: " << static_cast<WCHAR const*>(friendlyName) << std::endl;

    // Unlock the transform for async use and get event generator
    if (FAILED(hr = transformAttrs->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, true)))
        return hr;

    // Get Stream IDs
    DWORD inputStreamID, outputStreamID;
    hr = transform->GetStreamIDs(1, &inputStreamID, 1, &outputStreamID);
    if (hr == E_NOTIMPL) // Doesn't mean failed, see remarks
    {                    // https://docs.microsoft.com/en-us/windows/win32/api/mftransform/nf-mftransform-imftransform-getstreamids
        inputStreamID = 0;
        outputStreamID = 0;
        hr = S_OK;
    }

    if (FAILED(hr))
        return hr;

    if (FAILED(hr = ConfigureEncoder(transform, deviceManager, inputStreamID, outputStreamID)))
        return hr;

    eventGen = transform;

    // Create DDAImpl Class
    DDAImpl d(device, context);
    if (FAILED(hr = d.Init()))
        return hr;

    // Init color conversion-related variables
    IMFTransform* m_pXVP;
    if (FAILED(hr = CoCreateInstance(CLSID_VideoProcessorMFT, nullptr, CLSCTX_INPROC_SERVER,
        IID_IMFTransform, (void**)&m_pXVP)))
        return hr;

    if (FAILED(hr = m_pXVP->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(deviceManager.p))))
        return hr;

    if (FAILED(hr = ConfigureColorConversion(m_pXVP)))
        return hr;

    if (FAILED(hr = transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL)))
        return hr;
    if (FAILED(hr = transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL)))
        return hr;
    if (FAILED(hr = transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL)))
        return hr;

    // Capture loop
    const int nFrames = 60;
    const int WAIT_BASE = 17;
    int capturedFrames = 0;
    LARGE_INTEGER start = { 0 };
    LARGE_INTEGER end = { 0 };
    LARGE_INTEGER interval = { 0 };
    LARGE_INTEGER freq = { 0 };
    int wait = WAIT_BASE;

    // Failure count from Capture API
    UINT failCount = 0;

    ID3D11Texture2D* pDupTex2D;

    QueryPerformanceFrequency(&freq);

    // Reset waiting time for the next screen capture attempt
#define RESET_WAIT_TIME(start, end, interval, freq)         \
    QueryPerformanceCounter(&end);                          \
    interval.QuadPart = end.QuadPart - start.QuadPart;      \
    MICROSEC_TIME(interval, freq);                          \
    wait = (int)(WAIT_BASE - (interval.QuadPart * 1000));

    // Run capture loop
    do
    {
        // get start timestamp. 
        // use this to adjust the waiting period in each capture attempt to approximately attempt 60 captures in a second
        QueryPerformanceCounter(&start);
        // Get a frame from DDA
        if (FAILED(hr = d.GetCapturedFrame(&pDupTex2D, wait))) // Release after preproc
            failCount++;

        if (hr == DXGI_ERROR_WAIT_TIMEOUT)
        {
            // retry if there was no new update to the screen during our specific timeout interval
            // reset our waiting time
            RESET_WAIT_TIME(start, end, interval, freq);
            continue;
        }
        else
        {
            if (FAILED(hr))
            {
                // Re-try with a new DDA object
                printf("Capture failed with error 0x%08x. Re-create DDA and try again.\n", hr);
                __debugbreak();
                /*Demo.Cleanup();
                hr = Demo.Init();*/
                if (FAILED(hr))
                {
                    // Could not initialize DDA, bail out/
                    printf("Failed to Init DDDemo. return error 0x%08x\n", hr);
                    return -1;
                }
                RESET_WAIT_TIME(start, end, interval, freq);
                QueryPerformanceCounter(&start);
                // Get a frame from DDA
                //Demo.Capture(wait);
            }
            RESET_WAIT_TIME(start, end, interval, freq);

            // Color conversion for encoding
            // [Insert preprocessing code here]
            IMFSample* nv12sample = nullptr;
            if (FAILED(hr = ColorConvert(m_pXVP, pDupTex2D, &nv12sample)))
                return hr;

            // Encode
            // [Insert encoding code here]
            
            CComPtr<IMFMediaEvent> event;
            if (FAILED(hr = eventGen->GetEvent(0, &event)))
                return hr;

            MediaEventType eventType;
            if (FAILED(hr = event->GetType(&eventType)))
                return hr;
            switch (eventType)
            {
                case METransformNeedInput:
                {
                    Encode(transform, nv12sample);
                }
            }

            capturedFrames++;
        }
    } while (capturedFrames <= nFrames);

    // Shutdown
    if (FAILED(hr = MFShutdown()))
        return hr;

    return 0;
}

MF_E_TRANSFORM_TYPE_NOT_SET You must set the output types before setting the input types.

这是设计使然。您通常会沿着数据流构建管道,但在编码器的情况下,由于我在这里省略的原因,它通常是相反的。如记录的那样,错误代码提示您需要先 SetOutputType 然后才能执行 SetInputType.

虽然Video Processor MFT也是一种MFT,但类型设置更为宽松。