完成后无法让我的 do 循环停止
Can't make my do loop stop when it is done
我在 vba 中编写了一个脚本,它可以非常顺利地从某个站点抓取数据。我试图以一种非常规的方式做同样的事情。我在脚本中使用的循环一直在继续。我需要在这里应用一个逻辑,以便当 "Y" 的值为 None 时它将停止滚动。 Y 的值在这里是数字。我可以使用 for 循环来完成此操作,但我尝试通过这种方式来确定是否要 运行 我的爬虫,而不知道有多少页面要爬。提前致谢。
Sub Aoty_Data()
Dim http As New XMLHTTP60
Dim html As New HTMLDocument, topic As HTMLHtmlElement
y = 1
Do
With http
.Open "GET", "http://www.albumoftheyear.org/ratings/6-highest-rated/2000/" & y, False
.send
html.body.innerHTML = .responseText
End With
For Each topic In html.getElementsByClassName("albumListRow")
x = x + 1
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
If .Length Then Cells(x, 1) = Split(.Item(0).innerText, "-")(0)
End With
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
If .Length Then Cells(x, 2) = Split(.Item(0).innerText, "-")(1)
End With
Next topic
y = y + 1
Loop Until y = "" 'I used y="" cause the editor did not let me leave it blank.
End Sub
你想出自己的解决方案了吗?一个答案在另一个问题中:当 GET 路由超过有效页数时,"certain site" 的行为如何......?好像是return第一页的结果。请注意,我根本没有更改您现有的代码,只是重构了一点,并添加了一个测试,以确定第一个 artist/album 是否已被 return 第二次编辑。
Sub Aoty_Data()
Dim http As New XMLHTTP60
Dim html As New HTMLDocument, topic As HTMLHtmlElement
y = 1
numberOneAlbumForYear = ""
Do
http.Open "GET", "http://www.albumoftheyear.org/ratings/6-highest-rated/2000/" & y, False
http.send
html.body.innerHTML = http.responseText
For Each topic In html.getElementsByClassName("albumListRow")
x = x + 1
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
Debug.Print .Item(0).innerText
If .Length Then
Cells(x, 1) = Split(.Item(0).innerText, "-")(0)
Cells(x, 2) = Split(.Item(0).innerText, "-")(1)
End If
End With
If y = 1 And numberOneAlbumForYear = "" Then
numberOneAlbumForYear = Cells(x, 1) & Cells(x, 2)
ElseIf (Cells(x, 1) & Cells(x, 2)) = numberOneAlbumForYear Then
Rows(x).ClearContents
Exit Do
End If
Next topic
y = y + 1
Loop 'Until y = "" [don't need this condition at all].
End Sub
我在 vba 中编写了一个脚本,它可以非常顺利地从某个站点抓取数据。我试图以一种非常规的方式做同样的事情。我在脚本中使用的循环一直在继续。我需要在这里应用一个逻辑,以便当 "Y" 的值为 None 时它将停止滚动。 Y 的值在这里是数字。我可以使用 for 循环来完成此操作,但我尝试通过这种方式来确定是否要 运行 我的爬虫,而不知道有多少页面要爬。提前致谢。
Sub Aoty_Data()
Dim http As New XMLHTTP60
Dim html As New HTMLDocument, topic As HTMLHtmlElement
y = 1
Do
With http
.Open "GET", "http://www.albumoftheyear.org/ratings/6-highest-rated/2000/" & y, False
.send
html.body.innerHTML = .responseText
End With
For Each topic In html.getElementsByClassName("albumListRow")
x = x + 1
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
If .Length Then Cells(x, 1) = Split(.Item(0).innerText, "-")(0)
End With
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
If .Length Then Cells(x, 2) = Split(.Item(0).innerText, "-")(1)
End With
Next topic
y = y + 1
Loop Until y = "" 'I used y="" cause the editor did not let me leave it blank.
End Sub
你想出自己的解决方案了吗?一个答案在另一个问题中:当 GET 路由超过有效页数时,"certain site" 的行为如何......?好像是return第一页的结果。请注意,我根本没有更改您现有的代码,只是重构了一点,并添加了一个测试,以确定第一个 artist/album 是否已被 return 第二次编辑。
Sub Aoty_Data()
Dim http As New XMLHTTP60
Dim html As New HTMLDocument, topic As HTMLHtmlElement
y = 1
numberOneAlbumForYear = ""
Do
http.Open "GET", "http://www.albumoftheyear.org/ratings/6-highest-rated/2000/" & y, False
http.send
html.body.innerHTML = http.responseText
For Each topic In html.getElementsByClassName("albumListRow")
x = x + 1
With topic.getElementsByClassName("listLargeTitle")(0).getElementsByTagName("a")
Debug.Print .Item(0).innerText
If .Length Then
Cells(x, 1) = Split(.Item(0).innerText, "-")(0)
Cells(x, 2) = Split(.Item(0).innerText, "-")(1)
End If
End With
If y = 1 And numberOneAlbumForYear = "" Then
numberOneAlbumForYear = Cells(x, 1) & Cells(x, 2)
ElseIf (Cells(x, 1) & Cells(x, 2)) = numberOneAlbumForYear Then
Rows(x).ClearContents
Exit Do
End If
Next topic
y = y + 1
Loop 'Until y = "" [don't need this condition at all].
End Sub