VBA 网页抓取更新
VBA web scraping update
我有以下代码:
- 打开网页(本例中为亚马逊)
- 单击页面上显示的所有产品(并在新选项卡中打开每个产品)
- 浏览每个打开的选项卡(从第 2 步开始),复制 "product title" 并将其粘贴到 A 列
你能帮我更新代码以包含一个循环吗:
- 遍历每个打开的选项卡(从第 2 步开始)并复制价格元素并将其粘贴到对应于产品标题的 B 列中
HTML 元素是因为价格是“649”
Sub launch_product()
Dim IE As SHDocVw.InternetExplorer
Dim idoc As MSHTML.HTMLDocument
Dim doc_ele As MSHTML.IHTMLElement
Dim doc_eles As MSHTML.IHTMLElementCollection
Dim startoftitle As Integer, endoftitle As Integer, rownum As Long
Dim vouterHTML As String, ProductTitle As String
Set IE = New SHDocVw.InternetExplorer
IE.Visible = True
IE.Navigate "https://www.amazon.in/s?k=rudra+fashion&rh=p_n_size_two_browse-vebin%3A1975333031&dc&crid=2AKWK100N33Q9&qid=1574534623&rnid=1974754031&sprefix=rudra+fas%2Caps%2C287&ref=sr_nr_p_n_size_two_browse-vebin_8"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading"
Loop
Set idoc = IE.Document
Set doc_eles = idoc.getElementsByTagName("img")
rownum = 1
For Each doc_ele In doc_eles
If doc_ele.className = "s-image" Then
doc_ele.Click
vouterHTML = doc_ele.outerHTML
startoftitle = InStr(1, vouterHTML, "alt=") + 5
endoftitle = InStr(startoftitle, vouterHTML, """") - 1
ProductTitle = Mid(vouterHTML, startoftitle, endoftitle - startoftitle + 1)
ActiveSheet.Cells(rownum, 1).Value = ProductTitle
rownum = rownum + 1
End If
Next doc_ele
ActiveSheet.Columns(1).EntireColumn.AutoFit
IE.Quit
结束子
我会从与 class 和价格匹配的节点的 alt 属性中获取标题,假设你想要当前的,来自两个 class 名称匹配节点之一。您不需要浏览器,因为内容是响应速度更快的简单 xmlhttp 请求而呈现的。
由于所有价格节点都没有出现卢比符号,因此我将其删除。
Option Explicit
Public Sub WriteOutProductInfo()
'VBE>Tools>References> Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.amazon.in/s?k=kuki+fashion&rh=p_72%3A1318476031&dc&qid=1574617862&rnid=1318475031&ref=sr_nr_p_72_1", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim headers(), titles As Object, prices As Object
headers = Array("Title", "Price", "Img url")
With html
Set titles = .querySelectorAll(".s-image")
Set prices = .querySelectorAll(".a-price-whole,.a-color-price")
End With
Dim results(), r As Long, priceInfo As String
ReDim results(1 To titles.Length, 1 To UBound(headers) + 1)
For r = 0 To titles.Length - 1
results(r + 1, 1) = titles.Item(r).alt
results(r + 1, 2) = Replace$(prices.Item(r).innerText, ChrW(8377), vbNullString)
results(r + 1, 3) = titles.Item(r).src
Next
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
我有以下代码:
- 打开网页(本例中为亚马逊)
- 单击页面上显示的所有产品(并在新选项卡中打开每个产品)
- 浏览每个打开的选项卡(从第 2 步开始),复制 "product title" 并将其粘贴到 A 列
你能帮我更新代码以包含一个循环吗:
- 遍历每个打开的选项卡(从第 2 步开始)并复制价格元素并将其粘贴到对应于产品标题的 B 列中
HTML 元素是因为价格是“649”
Sub launch_product()
Dim IE As SHDocVw.InternetExplorer
Dim idoc As MSHTML.HTMLDocument
Dim doc_ele As MSHTML.IHTMLElement
Dim doc_eles As MSHTML.IHTMLElementCollection
Dim startoftitle As Integer, endoftitle As Integer, rownum As Long
Dim vouterHTML As String, ProductTitle As String
Set IE = New SHDocVw.InternetExplorer
IE.Visible = True
IE.Navigate "https://www.amazon.in/s?k=rudra+fashion&rh=p_n_size_two_browse-vebin%3A1975333031&dc&crid=2AKWK100N33Q9&qid=1574534623&rnid=1974754031&sprefix=rudra+fas%2Caps%2C287&ref=sr_nr_p_n_size_two_browse-vebin_8"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading"
Loop
Set idoc = IE.Document
Set doc_eles = idoc.getElementsByTagName("img")
rownum = 1
For Each doc_ele In doc_eles
If doc_ele.className = "s-image" Then
doc_ele.Click
vouterHTML = doc_ele.outerHTML
startoftitle = InStr(1, vouterHTML, "alt=") + 5
endoftitle = InStr(startoftitle, vouterHTML, """") - 1
ProductTitle = Mid(vouterHTML, startoftitle, endoftitle - startoftitle + 1)
ActiveSheet.Cells(rownum, 1).Value = ProductTitle
rownum = rownum + 1
End If
Next doc_ele
ActiveSheet.Columns(1).EntireColumn.AutoFit
IE.Quit
结束子
我会从与 class 和价格匹配的节点的 alt 属性中获取标题,假设你想要当前的,来自两个 class 名称匹配节点之一。您不需要浏览器,因为内容是响应速度更快的简单 xmlhttp 请求而呈现的。
由于所有价格节点都没有出现卢比符号,因此我将其删除。
Option Explicit
Public Sub WriteOutProductInfo()
'VBE>Tools>References> Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.amazon.in/s?k=kuki+fashion&rh=p_72%3A1318476031&dc&qid=1574617862&rnid=1318475031&ref=sr_nr_p_72_1", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim headers(), titles As Object, prices As Object
headers = Array("Title", "Price", "Img url")
With html
Set titles = .querySelectorAll(".s-image")
Set prices = .querySelectorAll(".a-price-whole,.a-color-price")
End With
Dim results(), r As Long, priceInfo As String
ReDim results(1 To titles.Length, 1 To UBound(headers) + 1)
For r = 0 To titles.Length - 1
results(r + 1, 1) = titles.Item(r).alt
results(r + 1, 2) = Replace$(prices.Item(r).innerText, ChrW(8377), vbNullString)
results(r + 1, 3) = titles.Item(r).src
Next
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub