Ebay Scraper,第一行缺少日期,然后是每个循环
Ebay Scraper, missing date for first line and then evey loop
我的 eBAY Scraper 有问题,无法找出原因。虽然它可以很好地提取数据,但它会遗漏第一行的一些数据,然后是每个循环的第一行,因此数据不在正确的行中。
Q) 为什么在开始和之后的每个循环中都缺少数据?
我认为这可能与标题提取速度比其他项目慢有关,但是我无法解决这个问题,因为我对 vba 非常有限。我附上了一个演示,供您查看。
我不是在寻找完全重写的代码,只是指向正确的方向或对我的代码进行轻微更改。正如我所说,我在 vba 方面非常有限,我可以理解我的代码,任何更高级的东西都超出了我的理解范围。
演示下载 - Download Excel File
网站 - Ebay.co.uk
Ebay 产品页面 - Prodcts Shown may vary browser to browser
我对它进行了颜色编码,以便您看得更清楚
这就是它在做的事情
什么时候应该这样
出于某种原因,它错过了开始时第一个项目的 价格、状况、原价和折扣 以及 EVERY Loop。对于错过项目的每个循环,价格、条件、原价和折扣变得更加不合时宜
第一个循环 - 项目现在 2 行越界
第二个循环 - 项目现在 3 行越界
因为我搜索了 3 页(2 页 + 额外 1 页)并且它循环了 3 次,它错过了每个循环的第一行。我排在第三排。我认为这可能与项目的 Title 有关,因为它的提取速度比其余项目
慢一点
提取结束
这是我的代码
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = False
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
For Each element In elements
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub
一如既往地提前致谢。
确保跳过返回集合中的第一个元素。遵守您的代码。
Private Sub CommandButton1_Click()
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = True
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
'Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
Dim counter As Long
counter = 0
For Each element In elements
If counter > 0 Then
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
End If
counter = counter + 1
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace$(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
DoEvents
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
counter = 0
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub
我的 eBAY Scraper 有问题,无法找出原因。虽然它可以很好地提取数据,但它会遗漏第一行的一些数据,然后是每个循环的第一行,因此数据不在正确的行中。
Q) 为什么在开始和之后的每个循环中都缺少数据?
我认为这可能与标题提取速度比其他项目慢有关,但是我无法解决这个问题,因为我对 vba 非常有限。我附上了一个演示,供您查看。
我不是在寻找完全重写的代码,只是指向正确的方向或对我的代码进行轻微更改。正如我所说,我在 vba 方面非常有限,我可以理解我的代码,任何更高级的东西都超出了我的理解范围。
演示下载 - Download Excel File
网站 - Ebay.co.uk
Ebay 产品页面 - Prodcts Shown may vary browser to browser
我对它进行了颜色编码,以便您看得更清楚
这就是它在做的事情
什么时候应该这样
出于某种原因,它错过了开始时第一个项目的 价格、状况、原价和折扣 以及 EVERY Loop。对于错过项目的每个循环,价格、条件、原价和折扣变得更加不合时宜
第一个循环 - 项目现在 2 行越界
第二个循环 - 项目现在 3 行越界
因为我搜索了 3 页(2 页 + 额外 1 页)并且它循环了 3 次,它错过了每个循环的第一行。我排在第三排。我认为这可能与项目的 Title 有关,因为它的提取速度比其余项目
慢一点提取结束
这是我的代码
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = False
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
For Each element In elements
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub
一如既往地提前致谢。
确保跳过返回集合中的第一个元素。遵守您的代码。
Private Sub CommandButton1_Click()
Dim HTML As HTMLDocument
Dim objIE As Object
Dim result As String
Dim pageNumber As Long ' page no.
Dim nextPageElement As Object 'page element
Dim HtmlText As Variant
Dim wsSheet As Worksheet
Dim wb As Workbook
Dim sht As Worksheet
Set wb = ThisWorkbook
Set wsSheet = wb.Sheets("Sheet1")
Set sht = ThisWorkbook.Worksheets("Sheet1")
'+++++ Internet Explorer ++++++
Set objIE = New InternetExplorer
objIE.Visible = True
objIE.navigate Sheets("Sheet1").Range("A2").Value & Replace(Worksheets("Sheet1").Range("B2") & Range("C2").Value, " ", "+") 'navigate IE to this web page
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop 'wait here a few seconds while the browser is busy
'Set ie = CreateObject("InternetExplorer.Application")
'######################################## RESTART CODE FROM HERE ####################################
StartForLoop_Restart: 'Restart the code HERE, this is the key part
'######################################## RESTART CODE FROM HERE ####################################
' Application.ScreenUpdating = False
Set HTML = objIE.document
Set elements = HTML.getElementsByClassName("s-item__wrapper clearfix") ' parent CLASS
'FOR LOOP
Dim counter As Long
counter = 0
For Each element In elements
If counter > 0 Then
''' Element 1
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).href
wsSheet.Cells(sht.Cells(sht.Rows.Count, "A").End(xlUp).Row + 1, "A").Value = HtmlText
End If
''' Element 2
If element.getElementsByClassName("s-item__link")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__link")(0).innerText 'src
wsSheet.Cells(sht.Cells(sht.Rows.Count, "B").End(xlUp).Row + 1, "B").Value = HtmlText
End If
''' Element 3
If element.getElementsByClassName("s-item__price")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__price")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "C").End(xlUp).Row + 1, "C").Value = HtmlText
End If
''' Element 4
If element.getElementsByClassName("SECONDARY_INFO")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = "-"
Else
HtmlText = element.getElementsByClassName("SECONDARY_INFO")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "D").End(xlUp).Row + 1, "D").Value = HtmlText
End If
''' Element 5
If element.getElementsByClassName("STRIKETHROUGH")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = "-"
Else
HtmlText = element.getElementsByClassName("STRIKETHROUGH")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "E").End(xlUp).Row + 1, "E").Value = HtmlText
End If
''' Element 6
If element.getElementsByClassName("s-item__discount s-item__discount")(0) Is Nothing Then
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = "-"
Else
HtmlText = element.getElementsByClassName("s-item__discount s-item__discount")(0).innerText
wsSheet.Cells(sht.Cells(sht.Rows.Count, "F").End(xlUp).Row + 1, "F").Value = HtmlText
End If
' Application.ScreenUpdating = True
End If
counter = counter + 1
Next element
Do
'Searches Number of Pages entered in
If pageNumber >= Replace$(Worksheets("Sheet1").Range("d2").Value, "", "+") Then Exit Do
Set nextPageElement = HTML.getElementsByClassName("pagination__next")(0) ' CLICK TO NEXT PAGE
If nextPageElement Is Nothing Then Exit Do
objIE.document.parentWindow.Scroll 0&, 99999 ' Scrolls Down the Browser
nextPageElement.Click 'next web page
Do While objIE.Busy = True Or objIE.readyState <> 4
DoEvents
Loop
Set HTML = objIE.document
pageNumber = pageNumber + 1
counter = 0
'##################################### Restart Loop ##################################
GoTo StartForLoop_Restart ' use GOTo command and label to reinitiate the CODE AS WOULD NOT LOOP TO NEXT PAGE
'##################################### Restart Loop ##################################
Loop
objIE.Quit ' end and clear browser
Set objIE = Nothing
Set HTML = Nothing
Set nextPageElement = Nothing
Set HtmlText = Nothing
Set element = Nothing
End Sub