IE Explore 11 < c++ ATL COM Browser Helper Object (Add-on) 替换 DOM 中的文本
IE Explore 11 < c++ ATL COM Browser Helper Object (Add-on) to replace text in the DOM
我正在尝试使用 BHO 从 IE 11 的 Dom 中删除一行 Javascript。 (Internet Explorer 加载项)
记录如此糟糕,很难找到最好的前进方向。
我已经设法用 C++ 编写了 BHO ATL/COM 并且它工作正常,但我无法找到从正文中实际删除/替换文本然后将更改注入回页面的最佳方法。
老实说,我没有时间阅读这本 1000 页的过时 COM 书 :-)。
这是我目前为 OnDocumentComplete 事件准备的内容:
void STDMETHODCALLTYPE CMyFooBHO::OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL)
{
BSTR bstrURL = pvarURL->bstrVal;
if (_wcsicmp(bstrURL, ABOUT_BLANK) == 0)
{
return;
}
HRESULT hr = S_OK;
// Query for the IWebBrowser2 interface.
CComQIPtr<IWebBrowser2> spTempWebBrowser = pDisp;
// Is this event associated with the top-level browser?
if (spTempWebBrowser && m_spWebBrowser && m_spWebBrowser.IsEqualObject(spTempWebBrowser))
{
// Get the current document object from browser.
CComPtr<IDispatch> spDispDoc;
hr = m_spWebBrowser->get_Document(&spDispDoc);
if (SUCCEEDED(hr))
{
// Verify that what we get is a pointer to a IHTMLDocument2 interface.
// To be sure, let's query for the IHTMLDocument2 interface (through smart pointers).
CComQIPtr<IHTMLDocument2, &IID_IHTMLDocument2> spHTML;
spHTML = spDispDoc;
// Extract the source of the document if its HTML.
if (spHTML)
{
// Get the BODY object.
CComPtr<IHTMLElement> m_pBody;
hr = spHTML->get_body(&m_pBody);
if (SUCCEEDED(hr))
{
// Get the HTML text.
BSTR bstrHTMLText;
hr = m_pBody->get_outerHTML(&bstrHTMLText);
if (SUCCEEDED(hr))
{
// bstrHTMLText now contains the <body> ...whatever... </body> of the html page.
// ******** HERE ********
// What I want to do here is replace some text contained in bstrHTMLText
// i.e. Replace "ABC" with "DEF" if it exists in bstrHTMLText.
// Then replace the body of the original page with the edited bstrHTMLText.
// My actual goal is to remove one line of javascript.
}
}
}
}
}
}
欢迎对现有代码的任何改进发表评论。
这是另一种方法,在 JavaScript.
var oCollection = document.getElementsByTagName("script");
var nColCount = oCollection.length;
var nIndex;
for ( nIndex = 0; nIndex < nColCount; ++nIndex ) {
var oScript = oCollection[ nIndex ];
var strScriptText = oScript.innerHTML;
if ( strScriptText.indexOf( "alert(\"hello\");" ) != -1 ) {
var strNewText = strScriptText.replace( "alert(\"hello\");", "" );
var oNewScript = document.createElement("script");
oNewScript.type = "text\/javascript";
oNewScript.text = strNewText;
document.getElementsByTagName("head")[0].appendChild(oNewScript);
console.log ("DONE!");
}
}
你可以用IHTMLWindow2::execScript
执行那个JS
您必须将上述代码存储在 CComBSTR
中。小心逃逸
示例:
CComBSTR bstrScript = L"var strWithOneDoubleQuote = \"\\"\";"; // -:)
这不符合正常(应该做)的做法。
如果没有更好的答案,那么我认为这是最佳答案,我会标记为最佳答案。
我很乐意听到任何意见或更新,以改进或向我展示一个更好的工作示例。
这是针对 IE 11 的,在 Visual Studio 2015 年使用 C++ ATL / COM 编译。
我已经尝试迭代 DOM 并更改它以及几乎所有其他记录非常糟糕的变体。
阅读 html 似乎从来没有问题,即 get_innerText get_innerHTML get_outerHTML
它的形式多种多样,但 put_*** 似乎从来没有在大多数情况下起作用。为什么?似乎没有人能说或给我
一个有效的例子。
我发现 get_body > get_innerHTML > put_innerHTML 确实有效。
所以找到这个我只是写了一个函数来在 CComBSTR 中搜索和替换。
这对我有用,但我想你可以将返回的内容作为主体内部 HTML 和 运行 其他一些
DOM 如果您的要求不同,可以在上面操作代码(不是内置的东西)。
这种做事方式的主要优点是不依赖似乎有效的 c**p 未记录代码
在 MS 想要的时候用一些神秘的方法。
这是测试 html 页面。我正在尝试删除 "alert("Hello")" 即
当页面完成加载时执行。
<!doctype html>
<head>
<title>Site</title>
<meta http-equiv="cache-control" content="max-age=0" />
<meta http-equiv="cache-control" content="no-cache" />
<meta http-equiv="expires" content="0" />
<meta http-equiv="expires" content="Tue, 01 Jan 1980 1:00:00 GMT" />
<meta http-equiv="pragma" content="no-cache" />
</head>
<body>
<div>If a dialog with hello appears then the BHO failed</div>
<script type="text/javascript">
window.onload = function(){
window.document.body.onload = foo;
};
function foo()
{
alert("hello");
}
</script>
</body>
<html>
// FooBHO.h : Declaration of the CFooBHO
#pragma once
#include "resource.h" // main symbols
#include "FooIEAddOn_i.h"
#include <shlguid.h> // IID_IWebBrowser2, DIID_DWebBrowserEvents2, etc.
#include <exdispid.h> // DISPID_DOCUMENTCOMPLETE, etc.
#include <mshtml.h> // DOM interfaces
#include <string>
#if defined(_WIN32_WCE) && !defined(_CE_DCOM) && !defined(_CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA)
#error "Single-threaded COM objects are not properly supported on Windows CE platform, such as the Windows Mobile platforms that do not include full DCOM support. Define _CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA to force ATL to support creating single-thread COM object's and allow use of it's single-threaded COM object implementations. The threading model in your rgs file was set to 'Free' as that is the only threading model supported in non DCOM Windows CE platforms."
#endif
#define DISPID_DOCUMENTRELOAD 282
using namespace ATL;
using namespace std;
// CFooBHO
class ATL_NO_VTABLE CFooBHO : public CComObjectRootEx<CComSingleThreadModel>,
public CComCoClass<CFooBHO, &CLSID_FooBHO>,
public IObjectWithSiteImpl<CFooBHO>,
public IDispatchImpl<IFooBHO, &IID_IFooBHO, &LIBID_FooIEAddOnLib, /*wMajor =*/ 1, /*wMinor =*/ 0>,
public IDispEventImpl<1, CFooBHO, &DIID_DWebBrowserEvents2, &LIBID_SHDocVw, 1, 1>
{
public:
CFooBHO()
{
}
// The STDMETHOD macro is an ATL convention that marks the method as virtual and ensures that it has the right calling convention for the public
// COM interface.It helps to demarcate COM interfaces from other public methods that may exist on the class.The STDMETHODIMP macro is likewise used
// when implementing the member method.
STDMETHOD(SetSite)(IUnknown *pUnkSite);
DECLARE_REGISTRY_RESOURCEID(IDR_FooBHO)
DECLARE_NOT_AGGREGATABLE(CFooBHO)
BEGIN_COM_MAP(CFooBHO)
COM_INTERFACE_ENTRY(IFooBHO)
COM_INTERFACE_ENTRY(IDispatch)
COM_INTERFACE_ENTRY(IObjectWithSite)
END_COM_MAP()
DECLARE_PROTECT_FINAL_CONSTRUCT()
BEGIN_SINK_MAP(CFooBHO)
SINK_ENTRY_EX(1, DIID_DWebBrowserEvents2, DISPID_DOCUMENTCOMPLETE, OnDocumentComplete)
END_SINK_MAP()
void STDMETHODCALLTYPE OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL);
HRESULT FinalConstruct()
{
return S_OK;
}
void FinalRelease()
{
}
private:
CComPtr<IWebBrowser2> m_spWebBrowser;
BOOL m_fAdvised;
static const wchar_t* ABOUT_BLANK;
void CFooBHO::ReplaceInCComBSTR(CComBSTR &strInput, const wstring &strOld, const wstring &strNew);
};
OBJECT_ENTRY_AUTO(__uuidof(FooBHO), CFooBHO)
// FooBHO.cpp : Implementation of CFooBHO
#include "stdafx.h"
#include "FooBHO.h"
#include "Strsafe.h"
const wchar_t* CFooBHO::ABOUT_BLANK = L"about:blank";
// The SetSite() method is where the BHO is initialized and where you would perform all the tasks that happen only
// once. When you navigate to a URL with Internet Explorer, you should wait for a couple of events to make sure the
// required document has been completely downloaded and then initialized. Only at this point can you safely access
// its content through the exposed object model, if any.
STDMETHODIMP CFooBHO::SetSite(IUnknown* pUnkSite)
{
if (pUnkSite != NULL)
{
// Cache the pointer to IWebBrowser2.
HRESULT hr = pUnkSite->QueryInterface(IID_IWebBrowser2, (void **)&m_spWebBrowser);
if (SUCCEEDED(hr))
{
// Register to sink events from DWebBrowserEvents2.
hr = DispEventAdvise(m_spWebBrowser);
if (SUCCEEDED(hr))
{
m_fAdvised = TRUE;
}
}
}
else
{
// Unregister event sink.
if (m_fAdvised)
{
DispEventUnadvise(m_spWebBrowser);
m_fAdvised = FALSE;
}
// Release cached pointers and other resources here.
m_spWebBrowser.Release();
}
// Call base class implementation.
return IObjectWithSiteImpl<CFooBHO>::SetSite(pUnkSite);
}
void STDMETHODCALLTYPE CFooBHO::OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL)
{
BSTR bstrURL = pvarURL->bstrVal;
// Test for any specific URL here.
// Currently we are ignoring ABOUT:BLANK but allowing everything else.
if (_wcsicmp(bstrURL, ABOUT_BLANK) == 0)
{
return;
}
HRESULT hr = S_OK;
// Query for the IWebBrowser2 interface.
CComQIPtr<IWebBrowser2> spTempWebBrowser = pDisp;
// Is this event associated with the top-level browser?
if (spTempWebBrowser && m_spWebBrowser && m_spWebBrowser.IsEqualObject(spTempWebBrowser))
{
// Get the current document object from browser.
CComPtr<IDispatch> spDispDoc;
if (SUCCEEDED(m_spWebBrowser->get_Document(&spDispDoc)))
{
// Verify that what we get is a pointer to a IHTMLDocument2 interface.
// To be sure, let's query for the IHTMLDocument2 interface (through smart pointers).
CComQIPtr<IHTMLDocument2, &IID_IHTMLDocument2> spHTMLDocument2 = spDispDoc;
// Extract the source of the document if its HTML.
if (spHTMLDocument2)
{
// Get the BODY object.
CComPtr<IHTMLElement> spBody;
if (SUCCEEDED(spHTMLDocument2->get_body(&spBody)))
{
// Get the Body HTML text.
CComBSTR bstrBodyHTMLText;
if (SUCCEEDED(spBody->get_innerHTML(&bstrBodyHTMLText)))
{
ReplaceInCComBSTR(bstrBodyHTMLText, L"alert(\"hello\");", L"");
spBody->put_innerHTML(bstrBodyHTMLText);
}
}
}
}
}
}
void CFooBHO::ReplaceInCComBSTR(CComBSTR &bstrInput, const wstring &strOld, const wstring &strNew)
{
wstring strOutput(bstrInput);
size_t iPos = 0;
size_t iLpos = 0;
while ((iPos = strOutput.find(strOld, iLpos)) != string::npos)
{
strOutput.replace(iPos, strOld.length(), strNew);
iLpos = iPos + 1;
}
::SysFreeString(bstrInput.m_str);
// Find and replace is complete; now update the CComBSTR.
bstrInput.m_str = ::SysAllocString(strOutput.c_str());
}
我正在尝试使用 BHO 从 IE 11 的 Dom 中删除一行 Javascript。 (Internet Explorer 加载项)
记录如此糟糕,很难找到最好的前进方向。
我已经设法用 C++ 编写了 BHO ATL/COM 并且它工作正常,但我无法找到从正文中实际删除/替换文本然后将更改注入回页面的最佳方法。
老实说,我没有时间阅读这本 1000 页的过时 COM 书 :-)。
这是我目前为 OnDocumentComplete 事件准备的内容:
void STDMETHODCALLTYPE CMyFooBHO::OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL)
{
BSTR bstrURL = pvarURL->bstrVal;
if (_wcsicmp(bstrURL, ABOUT_BLANK) == 0)
{
return;
}
HRESULT hr = S_OK;
// Query for the IWebBrowser2 interface.
CComQIPtr<IWebBrowser2> spTempWebBrowser = pDisp;
// Is this event associated with the top-level browser?
if (spTempWebBrowser && m_spWebBrowser && m_spWebBrowser.IsEqualObject(spTempWebBrowser))
{
// Get the current document object from browser.
CComPtr<IDispatch> spDispDoc;
hr = m_spWebBrowser->get_Document(&spDispDoc);
if (SUCCEEDED(hr))
{
// Verify that what we get is a pointer to a IHTMLDocument2 interface.
// To be sure, let's query for the IHTMLDocument2 interface (through smart pointers).
CComQIPtr<IHTMLDocument2, &IID_IHTMLDocument2> spHTML;
spHTML = spDispDoc;
// Extract the source of the document if its HTML.
if (spHTML)
{
// Get the BODY object.
CComPtr<IHTMLElement> m_pBody;
hr = spHTML->get_body(&m_pBody);
if (SUCCEEDED(hr))
{
// Get the HTML text.
BSTR bstrHTMLText;
hr = m_pBody->get_outerHTML(&bstrHTMLText);
if (SUCCEEDED(hr))
{
// bstrHTMLText now contains the <body> ...whatever... </body> of the html page.
// ******** HERE ********
// What I want to do here is replace some text contained in bstrHTMLText
// i.e. Replace "ABC" with "DEF" if it exists in bstrHTMLText.
// Then replace the body of the original page with the edited bstrHTMLText.
// My actual goal is to remove one line of javascript.
}
}
}
}
}
}
欢迎对现有代码的任何改进发表评论。
这是另一种方法,在 JavaScript.
var oCollection = document.getElementsByTagName("script");
var nColCount = oCollection.length;
var nIndex;
for ( nIndex = 0; nIndex < nColCount; ++nIndex ) {
var oScript = oCollection[ nIndex ];
var strScriptText = oScript.innerHTML;
if ( strScriptText.indexOf( "alert(\"hello\");" ) != -1 ) {
var strNewText = strScriptText.replace( "alert(\"hello\");", "" );
var oNewScript = document.createElement("script");
oNewScript.type = "text\/javascript";
oNewScript.text = strNewText;
document.getElementsByTagName("head")[0].appendChild(oNewScript);
console.log ("DONE!");
}
}
你可以用IHTMLWindow2::execScript
您必须将上述代码存储在 CComBSTR
中。小心逃逸
示例:
CComBSTR bstrScript = L"var strWithOneDoubleQuote = \"\\"\";"; // -:)
这不符合正常(应该做)的做法。
如果没有更好的答案,那么我认为这是最佳答案,我会标记为最佳答案。
我很乐意听到任何意见或更新,以改进或向我展示一个更好的工作示例。
这是针对 IE 11 的,在 Visual Studio 2015 年使用 C++ ATL / COM 编译。
我已经尝试迭代 DOM 并更改它以及几乎所有其他记录非常糟糕的变体。
阅读 html 似乎从来没有问题,即 get_innerText get_innerHTML get_outerHTML 它的形式多种多样,但 put_*** 似乎从来没有在大多数情况下起作用。为什么?似乎没有人能说或给我 一个有效的例子。
我发现 get_body > get_innerHTML > put_innerHTML 确实有效。
所以找到这个我只是写了一个函数来在 CComBSTR 中搜索和替换。
这对我有用,但我想你可以将返回的内容作为主体内部 HTML 和 运行 其他一些 DOM 如果您的要求不同,可以在上面操作代码(不是内置的东西)。
这种做事方式的主要优点是不依赖似乎有效的 c**p 未记录代码 在 MS 想要的时候用一些神秘的方法。
这是测试 html 页面。我正在尝试删除 "alert("Hello")" 即 当页面完成加载时执行。
<!doctype html>
<head>
<title>Site</title>
<meta http-equiv="cache-control" content="max-age=0" />
<meta http-equiv="cache-control" content="no-cache" />
<meta http-equiv="expires" content="0" />
<meta http-equiv="expires" content="Tue, 01 Jan 1980 1:00:00 GMT" />
<meta http-equiv="pragma" content="no-cache" />
</head>
<body>
<div>If a dialog with hello appears then the BHO failed</div>
<script type="text/javascript">
window.onload = function(){
window.document.body.onload = foo;
};
function foo()
{
alert("hello");
}
</script>
</body>
<html>
// FooBHO.h : Declaration of the CFooBHO
#pragma once
#include "resource.h" // main symbols
#include "FooIEAddOn_i.h"
#include <shlguid.h> // IID_IWebBrowser2, DIID_DWebBrowserEvents2, etc.
#include <exdispid.h> // DISPID_DOCUMENTCOMPLETE, etc.
#include <mshtml.h> // DOM interfaces
#include <string>
#if defined(_WIN32_WCE) && !defined(_CE_DCOM) && !defined(_CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA)
#error "Single-threaded COM objects are not properly supported on Windows CE platform, such as the Windows Mobile platforms that do not include full DCOM support. Define _CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA to force ATL to support creating single-thread COM object's and allow use of it's single-threaded COM object implementations. The threading model in your rgs file was set to 'Free' as that is the only threading model supported in non DCOM Windows CE platforms."
#endif
#define DISPID_DOCUMENTRELOAD 282
using namespace ATL;
using namespace std;
// CFooBHO
class ATL_NO_VTABLE CFooBHO : public CComObjectRootEx<CComSingleThreadModel>,
public CComCoClass<CFooBHO, &CLSID_FooBHO>,
public IObjectWithSiteImpl<CFooBHO>,
public IDispatchImpl<IFooBHO, &IID_IFooBHO, &LIBID_FooIEAddOnLib, /*wMajor =*/ 1, /*wMinor =*/ 0>,
public IDispEventImpl<1, CFooBHO, &DIID_DWebBrowserEvents2, &LIBID_SHDocVw, 1, 1>
{
public:
CFooBHO()
{
}
// The STDMETHOD macro is an ATL convention that marks the method as virtual and ensures that it has the right calling convention for the public
// COM interface.It helps to demarcate COM interfaces from other public methods that may exist on the class.The STDMETHODIMP macro is likewise used
// when implementing the member method.
STDMETHOD(SetSite)(IUnknown *pUnkSite);
DECLARE_REGISTRY_RESOURCEID(IDR_FooBHO)
DECLARE_NOT_AGGREGATABLE(CFooBHO)
BEGIN_COM_MAP(CFooBHO)
COM_INTERFACE_ENTRY(IFooBHO)
COM_INTERFACE_ENTRY(IDispatch)
COM_INTERFACE_ENTRY(IObjectWithSite)
END_COM_MAP()
DECLARE_PROTECT_FINAL_CONSTRUCT()
BEGIN_SINK_MAP(CFooBHO)
SINK_ENTRY_EX(1, DIID_DWebBrowserEvents2, DISPID_DOCUMENTCOMPLETE, OnDocumentComplete)
END_SINK_MAP()
void STDMETHODCALLTYPE OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL);
HRESULT FinalConstruct()
{
return S_OK;
}
void FinalRelease()
{
}
private:
CComPtr<IWebBrowser2> m_spWebBrowser;
BOOL m_fAdvised;
static const wchar_t* ABOUT_BLANK;
void CFooBHO::ReplaceInCComBSTR(CComBSTR &strInput, const wstring &strOld, const wstring &strNew);
};
OBJECT_ENTRY_AUTO(__uuidof(FooBHO), CFooBHO)
// FooBHO.cpp : Implementation of CFooBHO
#include "stdafx.h"
#include "FooBHO.h"
#include "Strsafe.h"
const wchar_t* CFooBHO::ABOUT_BLANK = L"about:blank";
// The SetSite() method is where the BHO is initialized and where you would perform all the tasks that happen only
// once. When you navigate to a URL with Internet Explorer, you should wait for a couple of events to make sure the
// required document has been completely downloaded and then initialized. Only at this point can you safely access
// its content through the exposed object model, if any.
STDMETHODIMP CFooBHO::SetSite(IUnknown* pUnkSite)
{
if (pUnkSite != NULL)
{
// Cache the pointer to IWebBrowser2.
HRESULT hr = pUnkSite->QueryInterface(IID_IWebBrowser2, (void **)&m_spWebBrowser);
if (SUCCEEDED(hr))
{
// Register to sink events from DWebBrowserEvents2.
hr = DispEventAdvise(m_spWebBrowser);
if (SUCCEEDED(hr))
{
m_fAdvised = TRUE;
}
}
}
else
{
// Unregister event sink.
if (m_fAdvised)
{
DispEventUnadvise(m_spWebBrowser);
m_fAdvised = FALSE;
}
// Release cached pointers and other resources here.
m_spWebBrowser.Release();
}
// Call base class implementation.
return IObjectWithSiteImpl<CFooBHO>::SetSite(pUnkSite);
}
void STDMETHODCALLTYPE CFooBHO::OnDocumentComplete(IDispatch *pDisp, VARIANT *pvarURL)
{
BSTR bstrURL = pvarURL->bstrVal;
// Test for any specific URL here.
// Currently we are ignoring ABOUT:BLANK but allowing everything else.
if (_wcsicmp(bstrURL, ABOUT_BLANK) == 0)
{
return;
}
HRESULT hr = S_OK;
// Query for the IWebBrowser2 interface.
CComQIPtr<IWebBrowser2> spTempWebBrowser = pDisp;
// Is this event associated with the top-level browser?
if (spTempWebBrowser && m_spWebBrowser && m_spWebBrowser.IsEqualObject(spTempWebBrowser))
{
// Get the current document object from browser.
CComPtr<IDispatch> spDispDoc;
if (SUCCEEDED(m_spWebBrowser->get_Document(&spDispDoc)))
{
// Verify that what we get is a pointer to a IHTMLDocument2 interface.
// To be sure, let's query for the IHTMLDocument2 interface (through smart pointers).
CComQIPtr<IHTMLDocument2, &IID_IHTMLDocument2> spHTMLDocument2 = spDispDoc;
// Extract the source of the document if its HTML.
if (spHTMLDocument2)
{
// Get the BODY object.
CComPtr<IHTMLElement> spBody;
if (SUCCEEDED(spHTMLDocument2->get_body(&spBody)))
{
// Get the Body HTML text.
CComBSTR bstrBodyHTMLText;
if (SUCCEEDED(spBody->get_innerHTML(&bstrBodyHTMLText)))
{
ReplaceInCComBSTR(bstrBodyHTMLText, L"alert(\"hello\");", L"");
spBody->put_innerHTML(bstrBodyHTMLText);
}
}
}
}
}
}
void CFooBHO::ReplaceInCComBSTR(CComBSTR &bstrInput, const wstring &strOld, const wstring &strNew)
{
wstring strOutput(bstrInput);
size_t iPos = 0;
size_t iLpos = 0;
while ((iPos = strOutput.find(strOld, iLpos)) != string::npos)
{
strOutput.replace(iPos, strOld.length(), strNew);
iLpos = iPos + 1;
}
::SysFreeString(bstrInput.m_str);
// Find and replace is complete; now update the CComBSTR.
bstrInput.m_str = ::SysAllocString(strOutput.c_str());
}