VBA 网页抓取更新

VBA web scraping update

我有以下代码:

  1. 打开网页(本例中为亚马逊)
  2. 单击页面上显示的所有产品(并在新选项卡中打开每个产品)
  3. 浏览每个打开的选项卡(从第 2 步开始),复制 "product title" 并将其粘贴到 A 列

你能帮我更新代码以包含一个循环吗:

  1. 遍历每个打开的选项卡(从第 2 步开始)并复制价格元素并将其粘贴到对应于产品标题的 B 列中

HTML 元素是因为价格是“649”

Sub launch_product()
Dim IE As SHDocVw.InternetExplorer
Dim idoc As MSHTML.HTMLDocument
Dim doc_ele As MSHTML.IHTMLElement
Dim doc_eles As MSHTML.IHTMLElementCollection

Dim startoftitle As Integer, endoftitle As Integer, rownum As Long
Dim vouterHTML As String, ProductTitle As String

Set IE = New SHDocVw.InternetExplorer
IE.Visible = True
IE.Navigate "https://www.amazon.in/s?k=rudra+fashion&rh=p_n_size_two_browse-vebin%3A1975333031&dc&crid=2AKWK100N33Q9&qid=1574534623&rnid=1974754031&sprefix=rudra+fas%2Caps%2C287&ref=sr_nr_p_n_size_two_browse-vebin_8"

Do While IE.ReadyState <> READYSTATE_COMPLETE
   Application.StatusBar = "Loading"
Loop
Set idoc = IE.Document

Set doc_eles = idoc.getElementsByTagName("img")
rownum = 1

For Each doc_ele In doc_eles
    If doc_ele.className = "s-image" Then
       doc_ele.Click

       vouterHTML = doc_ele.outerHTML
       startoftitle = InStr(1, vouterHTML, "alt=") + 5
       endoftitle = InStr(startoftitle, vouterHTML, """") - 1
       ProductTitle = Mid(vouterHTML, startoftitle, endoftitle - startoftitle + 1)
       ActiveSheet.Cells(rownum, 1).Value = ProductTitle
       rownum = rownum + 1
    End If
Next doc_ele

ActiveSheet.Columns(1).EntireColumn.AutoFit
IE.Quit

结束子

我会从与 class 和价格匹配的节点的 alt 属性中获取标题,假设你想要当前的,来自两个 class 名称匹配节点之一。您不需要浏览器,因为内容是响应速度更快的简单 xmlhttp 请求而呈现的。

由于所有价格节点都没有出现卢比符号,因此我将其删除。

Option Explicit

Public Sub WriteOutProductInfo()
    'VBE>Tools>References> Microsoft HTML Object Library
    Dim html As MSHTML.HTMLDocument

    Set html = New MSHTML.HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.amazon.in/s?k=kuki+fashion&rh=p_72%3A1318476031&dc&qid=1574617862&rnid=1318475031&ref=sr_nr_p_72_1", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        html.body.innerHTML = .responseText
    End With

    Dim headers(), titles As Object, prices As Object
    headers = Array("Title", "Price", "Img url")

    With html
        Set titles = .querySelectorAll(".s-image")
        Set prices = .querySelectorAll(".a-price-whole,.a-color-price")
    End With

    Dim results(), r As Long, priceInfo As String

    ReDim results(1 To titles.Length, 1 To UBound(headers) + 1)

    For r = 0 To titles.Length - 1
        results(r + 1, 1) = titles.Item(r).alt
        results(r + 1, 2) = Replace$(prices.Item(r).innerText, ChrW(8377), vbNullString)
        results(r + 1, 3) = titles.Item(r).src
    Next

    Dim ws As Worksheet

    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub