Return None - 通过 Python 抓取 p

Return None - scraping p by Python

我想报废库存产品的名称和数量,但是 return None.

data_insight = []

for n in range(pagenum):
    pages_url = f"https://www.insight.com/en_US/search.html?qtype=all&q=tp-link&qsrc=k&pq=%7B%22pageSize%22%3A100%2C%22currentPage%22%3A{n+1}%2C%22shownFlag%22%3Afalse%2C%22priceRangeLower%22%3Anull%2C%22priceRangeUpper%22%3Anull%2C%22cmtStandards%22%3Atrue%2C%22categoryId%22%3Anull%2C%22setType%22%3Anull%2C%22setId%22%3Anull%2C%22shared%22%3Anull%2C%22groupId%22%3Anull%2C%22cmtCustomerNumber%22%3Anull%2C%22groupName%22%3Anull%2C%22fromLicense%22%3Atrue%2C%22licenseContractIds%22%3Anull%2C%22assortmentIds%22%3Anull%2C%22controller%22%3Anull%2C%22fromcs%22%3Afalse%2C%22searchTerms%22%3A%7B%22TP-LINK%2520TECHNOLOGY%22%3A%7B%22field%22%3A%22field%22%2C%22value%22%3A%22A-HYBRIS-ManufacturerId~0007045098%22%7D%7D%2C%22sortBy%22%3A%22BestMatch%22%7D"
driver.get(pages_url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '[class="prod-section-container"]')))
html = driver.page_source
soup = Soup(html)
soup

    for item in soup.select('[class="prod-section-container"]'):
    data_insight.append({
        'title' : item.find("a", class_="select-prod").text,
        'name' : item.find("p", string="Insight Part"),
        'link' : item.find("a", class_="select-prod")['href'],
        'price' : item.find("span", class_="c-currency__value"),
        'stock' : item.find("p", class_="prod-stock").text
    })
    
df_insight = pd.DataFrame(data_insight)
df_insight.drop_duplicates()
df_insight

会发生什么?

使用 string 您可以搜索字符串而不是标签,但您搜索的方式是寻找完全匹配。

如何修复?

您可以选择 string=re.compile(),但这只是第二好的解决方案。

更好的方法是调整您的 css selector 以获得所有 itemprop 属性:

soup.select('#js-search-product-items [itemprop="itemListElement"]')

例子

...
data_insight = []

for n in range(1):
    pages_url = f"https://www.insight.com/en_US/search.html?qtype=all&q=tp-link&qsrc=k&pq=%7B%22pageSize%22%3A100%2C%22currentPage%22%3A{n+1}%2C%22shownFlag%22%3Afalse%2C%22priceRangeLower%22%3Anull%2C%22priceRangeUpper%22%3Anull%2C%22cmtStandards%22%3Atrue%2C%22categoryId%22%3Anull%2C%22setType%22%3Anull%2C%22setId%22%3Anull%2C%22shared%22%3Anull%2C%22groupId%22%3Anull%2C%22cmtCustomerNumber%22%3Anull%2C%22groupName%22%3Anull%2C%22fromLicense%22%3Atrue%2C%22licenseContractIds%22%3Anull%2C%22assortmentIds%22%3Anull%2C%22controller%22%3Anull%2C%22fromcs%22%3Afalse%2C%22searchTerms%22%3A%7B%22TP-LINK%2520TECHNOLOGY%22%3A%7B%22field%22%3A%22field%22%2C%22value%22%3A%22A-HYBRIS-ManufacturerId~0007045098%22%7D%7D%2C%22sortBy%22%3A%22BestMatch%22%7D"
    driver.get(pages_url)
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '[class="prod-section-container"]')))
    html = driver.page_source
    soup = Soup(html)
    soup

    for item in soup.select('#js-search-product-items [itemprop="itemListElement"]'):

        data_insight.append({
            'title' : item.find(attrs={'itemprop':'name'}).text,
            'sku' : item.find(attrs={'itemprop':'sku'}).text,
            'link' : item.find(attrs={'itemprop':'url'}).text,
            'price' : item.find("span", class_="c-currency__value").text,
            'stock' : item.find("p", class_="prod-stock").get_text(strip=True).split(' ')[0]
        })

df_insight = pd.DataFrame(data_insight)
df_insight.drop_duplicates()
df_insight

输出