如何从 C++ 程序中检索 DOM 个元素?

How can DOM elements be retrieved from a C++ program?

我需要从 C++ 程序中获取在 IE 中显示的网页的所有元素。 我试着用 spy++ 查看,但只有 IEFrame。

所以我正在考虑使用开发工具(IE 中的 F12),我听说有一种方法可以使它自动化,是个好主意吗?

谢谢

你可以获得一个IHtmlDocument2 reference from an IE's window handle, even out-of-process. This is documented here https://support.microsoft.com/en-us/help/249232/how-to-get-ihtmldocument2-from-a-hwnd,但微软并不真正支持。 然而它看起来今天仍然有效,我已经用 Windows 10 盒对其进行了测试,IE 现在是一个冻结的应用程序,所以不会很快改变。

一旦您拥有适用于 Internet Explorer 的正确 HWND,您就可以使用这样的代码获得 DOM。确保 IE 和您的程序 运行 处于同一安全级别 DOM 与编写 IE inprocess(主机、activex 等)时相同,但是出于安全原因,有些事情可能无法正常工作:

void DoSomeDomOperations(HWND hwnd)
{
    UINT msg = RegisterWindowMessage(L"WM_HTML_GETOBJECT");
    LRESULT result = 0;
    SendMessageTimeout(hwnd, msg, NULL, NULL, SMTO_ABORTIFHUNG, 1000, (PDWORD_PTR)&result);
    if (!result)
        return;

    // get main document object
    IHTMLDocument2 *doc = NULL;
    ObjectFromLresult(result, IID_IHTMLDocument2, NULL, (void**)&doc);
    if (!doc)
        return;

    // get document's url
    BSTR url = NULL;
    doc->get_URL(&url);
    wprintf(L"url:%s\n", url);
    SysFreeString(url);

    // get body element
    IHTMLElement *element = NULL;
    doc->get_body(&element);
    if (element)
    {
        BSTR text = NULL;
        element->get_innerText(&text);
        wprintf(L"text:%s\n", text);
        SysFreeString(text);
        element->Release();
    }

    // etc.
    // etc.

    doc->Release();
}

这里是一个完整的示例控制台应用程序,它扫描所有当前 IE 进程运行ning:

BOOL CALLBACK GetIEServerWindowProc(HWND hwnd, LPARAM lParam)
{
    // enumerate all child windows to find IE's COM server
    wchar_t className[100];
    GetClassName(hwnd, className, 100);
    if (!wcscmp(className, L"Internet Explorer_Server"))
    {
        *((HWND*)lParam) = hwnd;
        return FALSE;
    }
    return TRUE;
}

HWND GetIEServerWindow(HWND hwnd)
{
    HWND serverHwnd = NULL;
    EnumChildWindows(hwnd, GetIEServerWindowProc, (LPARAM)&serverHwnd);
    return serverHwnd;
}

struct IEServer
{
    DWORD processId;
    HWND serverHwnd;
};

BOOL CALLBACK GetIEProcessServerWindowProc(HWND hwnd, LPARAM lParam)
{
    DWORD processId = ((IEServer*)lParam)->processId;
    DWORD pid;
    GetWindowThreadProcessId(hwnd, &pid);
    if (pid == processId)
    {
        HWND serverHwnd = GetIEServerWindow(hwnd);
        if (serverHwnd)
        {
            ((IEServer*)lParam)->serverHwnd = serverHwnd;
            return FALSE;
        }
    }
    return TRUE;
}

HWND GetIEProcessServerWindow(DWORD processId)
{
    IEServer ie = { processId, NULL };
    EnumWindows(GetIEProcessServerWindowProc, (LPARAM)&ie);
    return ie.serverHwnd;
}

void EnumerateIEProcesses()
{
    HANDLE h = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
    if (h == INVALID_HANDLE_VALUE)
        return;

    PROCESSENTRY32 process;
    process.dwSize = sizeof(PROCESSENTRY32);
    if (Process32First(h, &process))
    {
        do
        {
            // we only consider IE processes
            if (!wcscmp(process.szExeFile, L"iexplore.exe"))
            {
                HWND serverHwnd = GetIEProcessServerWindow(process.th32ProcessID);
                if (serverHwnd)
                {
                    DoSomeDomOperations(serverHwnd);
                }
            }
        } while (Process32Next(h, &process));
    }
    CloseHandle(h);
}

int main()
{
    CoInitialize(NULL);

    EnumerateIEProcesses();
    CoUninitialize();
    return 0;
}