selenium scraper 的问题,indexError
Problem with selenium scraper, indexError
这是我的代码,
它用于从在线仓库中获取各种数据并将其报告为 csv
class Selenium:
#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')
def __init__(self, webdriver, x, y):
with open('File-Results.csv', 'w') as f:
f.write("Codice ;Prezzo; Giacenza \n")
def testo_prezzo():
prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
return prezzo
def giacenza():
giacenza = webdriver.find_elements_by_class_name('testoGiac')
return giacenza
def giacenza_2():
giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
return giacenza2
# self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
def codice_prodotto():
codice = webdriver.find_elements_by_class_name('testo_codiceArt')
return codice
def for_def():
# cnt = 0
test = len(giacenza())
with open('File-Results.csv', 'a') as f:
for i in range(test):
if giacenza()[i].text == "Disponibile":
giac = giacenza()[i].text
print(1, codice_prodotto()[i].text + " " + giac)
else:
giac = giacenza_2()[i].text
print(2,codice_prodotto()[i].text + " " + giac)
f.write(
codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")
self.username = webdriver.find_element_by_name("username").send_keys(x)
self.password = webdriver.find_element_by_name("password").send_keys(y)
time.sleep(3)
self.login = webdriver.find_element_by_xpath(
'//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
time.sleep(5)
self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
time.sleep(5)
self.SCROLL_PAUSE_TIME = 20
self.last_height = webdriver.execute_script("return document.body.scrollHeight")
while True:
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(self.SCROLL_PAUSE_TIME)
self.new_height = webdriver.execute_script("return document.body.scrollHeight")
if self.new_height == self.last_height:
break
self.last_height = self.new_height
for_def()
time.sleep(5)
webdriver.close()
self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")
在周期结束时,对于没有 'giacenza' 的产品,代码应为 'disponibile',对于具有 'giacenza'
的产品,代码应为 'disponibile'
而不是继续程序崩溃返回此错误:
giac = giacenza_2()[i].text IndexError: list index out of range
giacenza()
和 giacenza_2()
似乎有时包含不同数量的元素,这会导致错误。添加检查 giacenza_2 有助于避免错误:
class Selenium:
#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')
def __init__(self):
with open('./File-Results.csv', 'w') as f:
f.write("Codice ;Prezzo; Giacenza \n")
def testo_prezzo():
prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
return prezzo
def giacenza():
giacenza = webdriver.find_elements_by_class_name('testoGiac')
return giacenza
def giacenza_2():
giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
return giacenza2
# self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
def codice_prodotto():
codice = webdriver.find_elements_by_class_name('testo_codiceArt')
return codice
def for_def():
# cnt = 0
test = len(giacenza())
with open('File-Results.csv', 'a') as f:
for i in range(test):
if len(giacenza())<=i and giacenza()[i].text == "Disponibile":
giac = giacenza()[i].text
print(1, codice_prodotto()[i].text + " " + giac)
else:
if giacenza_2()<=i:
giac = giacenza_2()[i].text
print(2,codice_prodotto()[i].text + " " + giac)
f.write(codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")
self.username = webdriver.find_element_by_name("username").send_keys(x)
self.password = webdriver.find_element_by_name("password").send_keys(y)
time.sleep(3)
self.login = webdriver.find_element_by_xpath(
'//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
time.sleep(5)
self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
time.sleep(5)
self.SCROLL_PAUSE_TIME = 20
self.last_height = webdriver.execute_script("return document.body.scrollHeight")
while True:
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(self.SCROLL_PAUSE_TIME)
self.new_height = webdriver.execute_script("return document.body.scrollHeight")
if self.new_height == self.last_height:
break
self.last_height = self.new_height
for_def()
time.sleep(5)
webdriver.close()
self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")
ob = Selenium()
ob.for_def()
这是我的代码, 它用于从在线仓库中获取各种数据并将其报告为 csv
class Selenium:
#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')
def __init__(self, webdriver, x, y):
with open('File-Results.csv', 'w') as f:
f.write("Codice ;Prezzo; Giacenza \n")
def testo_prezzo():
prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
return prezzo
def giacenza():
giacenza = webdriver.find_elements_by_class_name('testoGiac')
return giacenza
def giacenza_2():
giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
return giacenza2
# self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
def codice_prodotto():
codice = webdriver.find_elements_by_class_name('testo_codiceArt')
return codice
def for_def():
# cnt = 0
test = len(giacenza())
with open('File-Results.csv', 'a') as f:
for i in range(test):
if giacenza()[i].text == "Disponibile":
giac = giacenza()[i].text
print(1, codice_prodotto()[i].text + " " + giac)
else:
giac = giacenza_2()[i].text
print(2,codice_prodotto()[i].text + " " + giac)
f.write(
codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")
self.username = webdriver.find_element_by_name("username").send_keys(x)
self.password = webdriver.find_element_by_name("password").send_keys(y)
time.sleep(3)
self.login = webdriver.find_element_by_xpath(
'//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
time.sleep(5)
self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
time.sleep(5)
self.SCROLL_PAUSE_TIME = 20
self.last_height = webdriver.execute_script("return document.body.scrollHeight")
while True:
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(self.SCROLL_PAUSE_TIME)
self.new_height = webdriver.execute_script("return document.body.scrollHeight")
if self.new_height == self.last_height:
break
self.last_height = self.new_height
for_def()
time.sleep(5)
webdriver.close()
self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")
在周期结束时,对于没有 'giacenza' 的产品,代码应为 'disponibile',对于具有 'giacenza'
的产品,代码应为 'disponibile'而不是继续程序崩溃返回此错误:
giac = giacenza_2()[i].text IndexError: list index out of range
giacenza()
和 giacenza_2()
似乎有时包含不同数量的元素,这会导致错误。添加检查 giacenza_2 有助于避免错误:
class Selenium:
#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')
def __init__(self):
with open('./File-Results.csv', 'w') as f:
f.write("Codice ;Prezzo; Giacenza \n")
def testo_prezzo():
prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
return prezzo
def giacenza():
giacenza = webdriver.find_elements_by_class_name('testoGiac')
return giacenza
def giacenza_2():
giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
return giacenza2
# self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
def codice_prodotto():
codice = webdriver.find_elements_by_class_name('testo_codiceArt')
return codice
def for_def():
# cnt = 0
test = len(giacenza())
with open('File-Results.csv', 'a') as f:
for i in range(test):
if len(giacenza())<=i and giacenza()[i].text == "Disponibile":
giac = giacenza()[i].text
print(1, codice_prodotto()[i].text + " " + giac)
else:
if giacenza_2()<=i:
giac = giacenza_2()[i].text
print(2,codice_prodotto()[i].text + " " + giac)
f.write(codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")
self.username = webdriver.find_element_by_name("username").send_keys(x)
self.password = webdriver.find_element_by_name("password").send_keys(y)
time.sleep(3)
self.login = webdriver.find_element_by_xpath(
'//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
time.sleep(5)
self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
time.sleep(5)
self.SCROLL_PAUSE_TIME = 20
self.last_height = webdriver.execute_script("return document.body.scrollHeight")
while True:
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(self.SCROLL_PAUSE_TIME)
self.new_height = webdriver.execute_script("return document.body.scrollHeight")
if self.new_height == self.last_height:
break
self.last_height = self.new_height
for_def()
time.sleep(5)
webdriver.close()
self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")
ob = Selenium()
ob.for_def()