VBA - WebScraping 为相等的类名获取类名的元素

VBA - WebScraping Get elements for classname for equal classNames

我想为每个玩家提取两个值(数字和位置),它们具有相同的 class 名称“文本”。我目前无法 select 每个玩家的两个正确值。

我的问题是我实际上只有“HTMLnumbers”和“HTMLposition”中的第一个和第二个值。否则,如果我 select class“文本”的所有项目,第一个玩家获得数字值,第二个玩家获得位置值。那也不对。

Option Explicit

Sub erweiterteWerte()

Dim IE As New SHDocVw.InternetExplorer
Dim HTMLdoc As MSHTML.HTMLDocument
Dim HTMLplayerRow As MSHTML.IHTMLElementCollection

Dim i As Integer
Dim j As Integer

Dim HTMLnumbers As Object
Dim HTMLposition As Object

Dim numbers As String
Dim position As String

Dim letzteZeile As Long
Dim aktuelleZeile As Long

IE.Visible = False
IE.Navigate "https://examplexyz.de"

Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop

Application.Wait (Now + TimeValue("0:00:7"))

Set HTMLdoc = IE.Document

Set HTMLplayerRow = HTMLdoc.getElementsByClassName("playerRow")
Set HTMLnumbers = HTMLplayerRow(0).getElementsByClassName("text")

   If Not HTMLnumbers Is Nothing Then
   numbers = HTMLnumbers.Item(0).innerText
   position = HTMLnumbers.Item(1).innerText
    
   Else
     numbers = "no_value"
   End If

Debug.Print numbers
Debug.Print position

IE.Quit

End Sub

未经测试,但为了说明基本方法:

Sub erweiterteWerte()

    Dim IE As SHDocVw.InternetExplorer
    Dim HTMLdoc As MSHTML.HTMLDocument
    Dim playerRows As MSHTML.IHTMLElementCollection
    Dim playerBadges As MSHTML.IHTMLElementCollection
    Dim player As Object, badge As Object
    
    Set IE = New SHDocVw.InternetExplorer
    IE.Visible = False
    IE.Navigate "https://play.kickbase.com/transfermarkt/kaufen"
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
    Application.Wait (Now + TimeValue("0:00:07"))
    
    Set HTMLdoc = IE.Document
    Set playerRows = HTMLdoc.getElementsByClassName("playerRow")
    
    For Each player In playerRows
        
        Debug.Print "---------------"
        Debug.Print classText(player, "firstName") & " " & classText(player, "lastName")
        
        Set playerBadges = player.getElementsByClassName("badge")
        For Each badge In playerBadges
            Debug.Print badge.innerText
        Next badge
    
    Next player
    
    IE.Quit

End Sub

'Helper function to get a child (of `obj`) element's text using its className
'  (only handles a single instance but could be extended)
Function classText(obj As Object, classname As String) As String
    Dim els As Object
    Set els = obj.getElementsByClassName(classname)
    If els.Length > 0 Then
        classText = els(0).innerText
    Else
        classText = "[not found]"
    End If
End Function