如何通过不让它重复结果来正确循环范围
how do I get to loop a range properly by not getting it to repeat results
如何让这个 for 循环在仍然使用范围的同时不重复这个列表之前的输出。此 for 循环重复前一个数字的输出。每次转到下一个数字。而不是从 0-20 一次。它变为 0-1,0-2,0-3,0-4........等等。我希望它从 0 到 20 一次,而不是自我复制。
import time
from selenium import webdriver
import selenium
from selenium.webdriver.chrome import service
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
#class scraperdata():
ser= Service("C:\Program Files (x86)\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options,service=ser)
driver.get('https://soundcloud.com/jujubucks')
print(driver.title)
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
try:
song_list = []
i = 1
for _ in range(20):
song_contents = driver.find_element(By.XPATH, "//li[@class='soundList__item'][{}]".format(i))
driver.execute_script("arguments[0].scrollIntoView(true);",song_contents)
search = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__username')]/span").text
search_song = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__title')]/span").text
search_date = song_contents.find_element(By.XPATH, ".//time[contains(@class,'relativeTime')]/span").text
search_plays = song_contents.find_element(By.XPATH, ".//span[contains(@class,'sc-ministats-small')]/span").text
i+=1
if _ == Exception:
break
option ={
'Artist': search,
'Song_title': search_song,
'Date': search_date,
'Streams': search_plays
}
song_list.append(option)
df = pd.DataFrame(song_list)
print(df)
except Exception:
pass
driver.quit()
输出
Stream Juju Bucks music | Listen to songs, albums, playlists for free on SoundCloud
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
2 Juju Bucks Party Ka Mngani Ft. X-Poll Posted 1 year ago 72 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
2 Juju Bucks Party Ka Mngani Ft. X-Poll Posted 1 year ago 72 plays
3 Juju Bucks Joy Ft. Black Sushi & Gavin Bowden Posted 1 year ago 122 plays
for 循环的范围很好。问题是,对于循环的每次迭代,您都将一个新项目附加到 song_list
,它位于循环范围之外。将 song_list = []
移动到循环中,使打印语句按您想要的方式工作。
但是,当循环结束时,您将不再跟踪所有歌曲。您可能根本不想在循环内打印。在循环外打印一次。
您应该将数据帧分配移到 for 循环之外:
for _ in range(20):
…
song_list.append(option)
df = pd.DataFrame(song_list)
print(df)
如何让这个 for 循环在仍然使用范围的同时不重复这个列表之前的输出。此 for 循环重复前一个数字的输出。每次转到下一个数字。而不是从 0-20 一次。它变为 0-1,0-2,0-3,0-4........等等。我希望它从 0 到 20 一次,而不是自我复制。
import time
from selenium import webdriver
import selenium
from selenium.webdriver.chrome import service
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
#class scraperdata():
ser= Service("C:\Program Files (x86)\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options,service=ser)
driver.get('https://soundcloud.com/jujubucks')
print(driver.title)
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
try:
song_list = []
i = 1
for _ in range(20):
song_contents = driver.find_element(By.XPATH, "//li[@class='soundList__item'][{}]".format(i))
driver.execute_script("arguments[0].scrollIntoView(true);",song_contents)
search = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__username')]/span").text
search_song = song_contents.find_element(By.XPATH, ".//a[contains(@class,'soundTitle__title')]/span").text
search_date = song_contents.find_element(By.XPATH, ".//time[contains(@class,'relativeTime')]/span").text
search_plays = song_contents.find_element(By.XPATH, ".//span[contains(@class,'sc-ministats-small')]/span").text
i+=1
if _ == Exception:
break
option ={
'Artist': search,
'Song_title': search_song,
'Date': search_date,
'Streams': search_plays
}
song_list.append(option)
df = pd.DataFrame(song_list)
print(df)
except Exception:
pass
driver.quit()
输出
Stream Juju Bucks music | Listen to songs, albums, playlists for free on SoundCloud
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
2 Juju Bucks Party Ka Mngani Ft. X-Poll Posted 1 year ago 72 plays
Artist Song_title Date Streams
0 Juju Bucks Squad Too Deep Ft. Cool Prince (Outro) Posted 1 year ago 31 plays
1 Juju Bucks Tropikana ft. P-Dogg Amazing Posted 1 year ago 48 plays
2 Juju Bucks Party Ka Mngani Ft. X-Poll Posted 1 year ago 72 plays
3 Juju Bucks Joy Ft. Black Sushi & Gavin Bowden Posted 1 year ago 122 plays
for 循环的范围很好。问题是,对于循环的每次迭代,您都将一个新项目附加到 song_list
,它位于循环范围之外。将 song_list = []
移动到循环中,使打印语句按您想要的方式工作。
但是,当循环结束时,您将不再跟踪所有歌曲。您可能根本不想在循环内打印。在循环外打印一次。
您应该将数据帧分配移到 for 循环之外:
for _ in range(20):
…
song_list.append(option)
df = pd.DataFrame(song_list)
print(df)