Ebay Scraper,第一行缺少日期,然后是每个循环

Ebay Scraper, missing date for first line and then evey loop

我的 eBAY Scraper 有问题,无法找出原因。虽然它可以很好地提取数据,但它会遗漏第一行的一些数据,然后是每个循环的第一行,因此数据不在正确的行中。

Q) 为什么在开始和之后的每个循环中都缺少数据?

我认为这可能与标题提取速度比其他项目慢有关,但是我无法解决这个问题,因为我对 vba 非常有限。我附上了一个演示,供您查看。

我不是在寻找完全重写的代码,只是指向正确的方向或对我的代码进行轻微更改。正如我所说,我在 vba 方面非常有限,我可以理解我的代码,任何更高级的东西都超出了我的理解范围。

演示下载 - Download Excel File

网站 - Ebay.co.uk

Ebay 产品页面 - Prodcts Shown may vary browser to browser

我对它进行了颜色编码,以便您看得更清楚

这就是它在做的事情

什么时候应该这样

出于某种原因,它错过了开始时第一个项目的 价格、状况、原价和折扣 以及 EVERY Loop。对于错过项目的每个循环,价格、条件、原价和折扣变得更加不合时宜

第一个循环 - 项目现在 2 行越界

第二个循环 - 项目现在 3 行越界

因为我搜索了 3 页(2 页 + 额外 1 页)并且它循环了 3 次,它错过了每个循环的第一行。我排在第三排。我认为这可能与项目的 Title 有关,因为它的提取速度比其余项目

慢一点

提取结束

这是我的代码

Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet

    Set wb = ThisWorkbook
    Set wsSheet = wb.Sheets("Sheet1")
    Set sht = ThisWorkbook.Worksheets("Sheet1")

'+++++ Internet Explorer ++++++
    Set objIE = New InternetExplorer
        objIE.Visible = False
        objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page

    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
    Set ie = CreateObject("InternetExplorer.Application")

'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################

    '  Application.ScreenUpdating = False
        Set HTML = objIE.document
        Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
        'FOR LOOP
        For Each element In elements
    
''' Element 1
        If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__link")(0).href
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
        End If
''' Element 2
        If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__link")(0).innerText  'src
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
        End If
''' Element 3
        If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
        End If
''' Element 4
        If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
        End If
''' Element 5
        If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
        End If
''' Element 6
        If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
        End If
   ' Application.ScreenUpdating = True
    Next element

    Do
    'Searches Number of Pages entered in
    If pageNumber >= Replace(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
        Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
    If nextPageElement Is Nothing Then Exit Do
        objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
        nextPageElement.Click 'next web page
    Do While objIE.Busy = True Or objIE.readyState <> 4
   
    Loop
        Set HTML = objIE.document
        pageNumber = pageNumber + 1

'##################################### Restart Loop ##################################
 GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################

     Loop

        objIE.Quit ' end and clear browser
            Set objIE = Nothing
            Set HTML = Nothing
            Set nextPageElement = Nothing
            Set HtmlText = Nothing
            Set element = Nothing

End Sub

一如既往地提前致谢。

确保跳过返回集合中的第一个元素。遵守您的代码。

Private Sub CommandButton1_Click()

    Dim HTML As HTMLDocument
    Dim objIE As Object
    Dim result As String
    Dim pageNumber As Long                       ' page no.
    Dim nextPageElement As Object                'page element
    Dim HtmlText As Variant
    Dim wsSheet As Worksheet
    Dim wb As Workbook
    Dim sht As Worksheet

    Set wb = ThisWorkbook
    Set wsSheet = wb.Sheets("Sheet1")
    Set sht = ThisWorkbook.Worksheets("Sheet1")

    '+++++ Internet Explorer ++++++
    Set objIE = New InternetExplorer
    objIE.Visible = True
    objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page

    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
    'Set ie = CreateObject("InternetExplorer.Application")

    '######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart:                            'Restart the code HERE, this is the key part
    '######################################## RESTART CODE FROM HERE ####################################

    '  Application.ScreenUpdating = False
    Set HTML = objIE.document
    Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
    'FOR LOOP
    Dim counter As Long
    counter = 0
    
    For Each element In elements
        If counter > 0 Then
        ''' Element 1
        If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__link")(0).href
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
        End If
        ''' Element 2
        If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
        End If
        ''' Element 3
        If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
        End If
        ''' Element 4
        If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
        End If
        ''' Element 5
        If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
        End If
        ''' Element 6
        If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
        Else
            HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
            wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
        End If
        ' Application.ScreenUpdating = True
        End If
        counter = counter + 1
    Next element
    
    Do
        'Searches Number of Pages entered in
        If pageNumber >= Replace$(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
        
        Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
        
        If nextPageElement Is Nothing Then Exit Do
        
        objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
        nextPageElement.Click                    'next web page
        
        Do While objIE.Busy = True Or objIE.readyState <> 4
        DoEvents
        Loop
        
        Set HTML = objIE.document
        
        pageNumber = pageNumber + 1
        counter = 0
        '##################################### Restart Loop ##################################
        GoTo StartForLoop_Restart                ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
        '##################################### Restart Loop ##################################

    Loop

    objIE.Quit                                   ' end and clear browser
    Set objIE = Nothing
    Set HTML = Nothing
    Set nextPageElement = Nothing
    Set HtmlText = Nothing
    Set element = Nothing

End Sub