宏使用 serverxmlhttp 请求获取部分响应
Macro gets partial response using serverxmlhttp requests
我正在尝试从 webpage. When I use xmlhttp60
requests I get those fields accordingly. However, when I go for serverxmlhttp60
requests I get partial response most of the times and as a result the script only prints the street adddress
. I used json converter 中提取 street address
和 builder name
以从该站点的 json 内容中解析 builder name
。
这是概念证明:
Sub GrabPropertyInfo()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim oPost As Object, oData As Object, Html As HTMLDocument
Dim jsonObject As Object, jsonStr As Object, propertyMainRaw$
Dim itemStr As Variant, sResp As String, oElem As Object
Dim propertyContainer As Object, propertyMain As Object
Set Html = New HTMLDocument
' With CreateObject("MSXML2.XMLHTTP")
' .Open "GET", siteLink, False
' .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
' .send
' sResp = .responseText
' Html.body.innerHTML = .responseText
' End With
With CreateObject("MSXML2.ServerXMLHTTP.6.0")
.Open "GET", siteLink, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
.send
While .readyState < 4: DoEvents: Wend
sResp = .responseText
Html.body.innerHTML = .responseText
End With
Debug.Print "Street address: " & Html.querySelector("h1.homeAddress > .street-address").innerText
With CreateObject("VBScript.RegExp")
.Global = True
.Pattern = "reactServerState\.InitialContext = (.*);"
.MultiLine = True
Set jsonStr = .Execute(sResp)
End With
itemStr = jsonStr(0).submatches(0)
Set jsonObject = JsonConverter.ParseJson(itemStr)
Set propertyMain = jsonObject("ReactServerAgent.cache")("dataCache")("/stingray/api/home/details/mainHouseInfoPanelInfo")("res")
propertyMainRaw = Replace(propertyMain("text"), "{}&&", "")
On Error Resume Next
Set propertyContainer = JsonConverter.ParseJson(propertyMainRaw)("payload")("mainHouseInfo")("amenitiesInfo")("superGroups")
On Error GoTo 0
If Not propertyContainer Is Nothing Then
For Each oElem In propertyContainer
For Each oPost In oElem("amenityGroups")
If InStr(oPost("groupTitle"), "Building Information") > 0 Then
For Each oData In oPost("amenityEntries")
If InStr(oData("amenityName"), "Builder Name") > 0 Then
Debug.Print "Builder Name: " & oData("amenityValues")(1)
End If
Next oData
End If
Next oPost
Next oElem
End If
End Sub
使用 xmlhttp 请求,我总是得到:
Street address: 604 Amesbury Ln #2,
Builder Name: Zach Savage
使用 serverxmlhttp 请求,大多数时候我得到以下结果:
Street address: 604 Amesbury Ln #2,
How can I get complete response using serverxmlhttp requests?
编辑:
根据答案和评论,很明显,如果我使用 xmlhttp
请求从该站点抓取 browserid
并在发送请求时使用该 browserid
的值作为 cookie使用 serverxmlhttp
,我会得到想要的结果。但是,问题是我使用 xmlhttp
请求得到的 browserid
的值是 sz9u0xmCQKKV9Wu0jRa3Yg
而我可以在页面源代码中看到这个值 v-J5D2IUSyqXizI7MG67fQ
。我怎样才能得到后者的价值?这就是我解析 browserid
.
的方式
Sub FetchBrowserId()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim Rxp As Object, browserId As Object, sRes$, cookie$
Set Rxp = CreateObject("VBScript.RegExp")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", siteLink, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36"
.send
sRes = .responseText
End With
With Rxp
.Global = True
.Pattern = "window.__rfBrowserId=""(.*?)"";"
.MultiLine = True
Set browserId = .Execute(sRes)
End With
cookie = browserId(0).submatches(0)
Debug.Print cookie
End Sub
所以我想真正的问题是为什么 MSXML2.XMLHTTP
和 MSXML2.ServerXMLHTTP
对相同的 URL 请求返回不同的响应。
MSXML2.XMLHTTP
使用 WinINet 堆栈,MSXML2.ServerXMLHTTP
使用 WinHTTP 堆栈。查看 WinINet vs. WinHTTP 文章了解更多详情。
WinINet 提供完整的 cookie 处理(顺便说一句,IE 也依赖它)。因此,您有不同响应的第一个原因是发送到服务器的 cookie 可能会影响流量。它可以很容易地与任何服务进行比较,如 e。 G。 Webhook.site。当您使用 MSXML2.XMLHTTP
发出第二个请求时,网络服务会记录从第一个响应中接受的 cookie。
还要考虑 SSL 条件。通过 MSXML2.XMLHTTP
和 MSXML2.ServerXMLHTTP
向 How's My SSL? 发出请求,然后在浏览器中跟随 link(即 Chrome)并比较结果。
我正在尝试从 webpage. When I use xmlhttp60
requests I get those fields accordingly. However, when I go for serverxmlhttp60
requests I get partial response most of the times and as a result the script only prints the street adddress
. I used json converter 中提取 street address
和 builder name
以从该站点的 json 内容中解析 builder name
。
这是概念证明:
Sub GrabPropertyInfo()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim oPost As Object, oData As Object, Html As HTMLDocument
Dim jsonObject As Object, jsonStr As Object, propertyMainRaw$
Dim itemStr As Variant, sResp As String, oElem As Object
Dim propertyContainer As Object, propertyMain As Object
Set Html = New HTMLDocument
' With CreateObject("MSXML2.XMLHTTP")
' .Open "GET", siteLink, False
' .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
' .send
' sResp = .responseText
' Html.body.innerHTML = .responseText
' End With
With CreateObject("MSXML2.ServerXMLHTTP.6.0")
.Open "GET", siteLink, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
.send
While .readyState < 4: DoEvents: Wend
sResp = .responseText
Html.body.innerHTML = .responseText
End With
Debug.Print "Street address: " & Html.querySelector("h1.homeAddress > .street-address").innerText
With CreateObject("VBScript.RegExp")
.Global = True
.Pattern = "reactServerState\.InitialContext = (.*);"
.MultiLine = True
Set jsonStr = .Execute(sResp)
End With
itemStr = jsonStr(0).submatches(0)
Set jsonObject = JsonConverter.ParseJson(itemStr)
Set propertyMain = jsonObject("ReactServerAgent.cache")("dataCache")("/stingray/api/home/details/mainHouseInfoPanelInfo")("res")
propertyMainRaw = Replace(propertyMain("text"), "{}&&", "")
On Error Resume Next
Set propertyContainer = JsonConverter.ParseJson(propertyMainRaw)("payload")("mainHouseInfo")("amenitiesInfo")("superGroups")
On Error GoTo 0
If Not propertyContainer Is Nothing Then
For Each oElem In propertyContainer
For Each oPost In oElem("amenityGroups")
If InStr(oPost("groupTitle"), "Building Information") > 0 Then
For Each oData In oPost("amenityEntries")
If InStr(oData("amenityName"), "Builder Name") > 0 Then
Debug.Print "Builder Name: " & oData("amenityValues")(1)
End If
Next oData
End If
Next oPost
Next oElem
End If
End Sub
使用 xmlhttp 请求,我总是得到:
Street address: 604 Amesbury Ln #2,
Builder Name: Zach Savage
使用 serverxmlhttp 请求,大多数时候我得到以下结果:
Street address: 604 Amesbury Ln #2,
How can I get complete response using serverxmlhttp requests?
编辑:
根据答案和评论,很明显,如果我使用 xmlhttp
请求从该站点抓取 browserid
并在发送请求时使用该 browserid
的值作为 cookie使用 serverxmlhttp
,我会得到想要的结果。但是,问题是我使用 xmlhttp
请求得到的 browserid
的值是 sz9u0xmCQKKV9Wu0jRa3Yg
而我可以在页面源代码中看到这个值 v-J5D2IUSyqXizI7MG67fQ
。我怎样才能得到后者的价值?这就是我解析 browserid
.
Sub FetchBrowserId()
Const siteLink$ = "https://www.redfin.com/TX/Austin/604-Amesbury-Ln-78752/unit-2/home/171045975"
Dim Rxp As Object, browserId As Object, sRes$, cookie$
Set Rxp = CreateObject("VBScript.RegExp")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", siteLink, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36"
.send
sRes = .responseText
End With
With Rxp
.Global = True
.Pattern = "window.__rfBrowserId=""(.*?)"";"
.MultiLine = True
Set browserId = .Execute(sRes)
End With
cookie = browserId(0).submatches(0)
Debug.Print cookie
End Sub
所以我想真正的问题是为什么 MSXML2.XMLHTTP
和 MSXML2.ServerXMLHTTP
对相同的 URL 请求返回不同的响应。
MSXML2.XMLHTTP
使用 WinINet 堆栈,MSXML2.ServerXMLHTTP
使用 WinHTTP 堆栈。查看 WinINet vs. WinHTTP 文章了解更多详情。
WinINet 提供完整的 cookie 处理(顺便说一句,IE 也依赖它)。因此,您有不同响应的第一个原因是发送到服务器的 cookie 可能会影响流量。它可以很容易地与任何服务进行比较,如 e。 G。 Webhook.site。当您使用 MSXML2.XMLHTTP
发出第二个请求时,网络服务会记录从第一个响应中接受的 cookie。
还要考虑 SSL 条件。通过 MSXML2.XMLHTTP
和 MSXML2.ServerXMLHTTP
向 How's My SSL? 发出请求,然后在浏览器中跟随 link(即 Chrome)并比较结果。