selenium scraper 的问题,indexError

Problem with selenium scraper, indexError

这是我的代码, 它用于从在线仓库中获取各种数据并将其报告为 csv

class Selenium:

#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')

def __init__(self, webdriver, x, y):
    with open('File-Results.csv', 'w') as f:
        f.write("Codice ;Prezzo; Giacenza \n")

        def testo_prezzo():
            prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
            return prezzo

        def giacenza():
            giacenza = webdriver.find_elements_by_class_name('testoGiac')
            return giacenza

        def giacenza_2():
            giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
            return giacenza2
        # self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')

        def codice_prodotto():
            codice = webdriver.find_elements_by_class_name('testo_codiceArt')
            return codice

        def for_def():
            # cnt = 0
            test = len(giacenza())
            with open('File-Results.csv', 'a') as f:
                for i in range(test):
                    if giacenza()[i].text == "Disponibile":
                        giac = giacenza()[i].text
                        print(1, codice_prodotto()[i].text + " " + giac)
                    else:
                        giac = giacenza_2()[i].text
                        print(2,codice_prodotto()[i].text + " " + giac)

                    f.write(
                        codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")

    self.username = webdriver.find_element_by_name("username").send_keys(x)
    self.password = webdriver.find_element_by_name("password").send_keys(y)
    time.sleep(3)
    self.login = webdriver.find_element_by_xpath(
        '//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
    time.sleep(5)
    self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
    time.sleep(5)

    self.SCROLL_PAUSE_TIME = 20

    self.last_height = webdriver.execute_script("return document.body.scrollHeight")

    while True:

        webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        time.sleep(self.SCROLL_PAUSE_TIME)

        self.new_height = webdriver.execute_script("return document.body.scrollHeight")
        if self.new_height == self.last_height:
            break
        self.last_height = self.new_height

        for_def()

    time.sleep(5)
    webdriver.close()

    self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")

在周期结束时,对于没有 'giacenza' 的产品,代码应为 'disponibile',对于具有 'giacenza'

的产品,代码应为 'disponibile'

而不是继续程序崩溃返回此错误:

giac = giacenza_2()[i].text IndexError: list index out of range

giacenza()giacenza_2() 似乎有时包含不同数量的元素,这会导致错误。添加检查 giacenza_2 有助于避免错误:

class Selenium:

#find_element_by_name('')
#find_element_by_xpath('')
#find_elements_by_class_name('')
#find_element_by_id('')
# giacenza = webdriver.find_elements_by_css_selector('.tdLarghezzaArt > .clFascia .tdwithAut > .testoGiac')
# giacenza = webdriver.find_elements_by_class_name('testoGiacVal')
# giacenza2 = webdriver.find_elements_by_class_name('testoGiacVal')

 def __init__(self):
    with open('./File-Results.csv', 'w') as f:
        f.write("Codice ;Prezzo; Giacenza \n")

        def testo_prezzo():
            prezzo = webdriver.find_elements_by_class_name('testoPrezzo')
            return prezzo

        def giacenza():
            giacenza = webdriver.find_elements_by_class_name('testoGiac')
            return giacenza

        def giacenza_2():
            giacenza2 = webdriver.find_elements_by_css_selector('.testoGiacVal')
            return giacenza2
        # self.giacenza = webdriver.find_elements_by_class_name('testoGiacVal')

        def codice_prodotto():
            codice = webdriver.find_elements_by_class_name('testo_codiceArt')
            return codice

        def for_def():
            # cnt = 0
            test = len(giacenza())
            with open('File-Results.csv', 'a') as f:
                for i in range(test):
                    if len(giacenza())<=i and giacenza()[i].text == "Disponibile":
                        giac = giacenza()[i].text
                        print(1, codice_prodotto()[i].text + " " + giac)
                    else:
                            if giacenza_2()<=i:
                                    giac = giacenza_2()[i].text
                                    print(2,codice_prodotto()[i].text + " " + giac)
                                    f.write(codice_prodotto()[i].text + ";" + testo_prezzo()[i].text + ";" + giac + "\n")

    self.username = webdriver.find_element_by_name("username").send_keys(x)
    self.password = webdriver.find_element_by_name("password").send_keys(y)
    time.sleep(3)
    self.login = webdriver.find_element_by_xpath(
        '//*[contains(concat( " ", @class, " " ), concat( " ", "buttonLogin", " " ))]').click()
    time.sleep(5)
    self.cookie = webdriver.find_element_by_class_name("buttonCookie").click()
    time.sleep(5)

    self.SCROLL_PAUSE_TIME = 20

    self.last_height = webdriver.execute_script("return document.body.scrollHeight")

    while True:

        webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        time.sleep(self.SCROLL_PAUSE_TIME)

        self.new_height = webdriver.execute_script("return document.body.scrollHeight")
        if self.new_height == self.last_height:
            break
        self.last_height = self.new_height

        for_def()

    time.sleep(5)
    webdriver.close()

    self.end_msg = messagebox.showinfo(title="\t", message="Grazie per aver utilizzato ReadyDrop")
ob = Selenium()
ob.for_def()