无法使用 Selenium 打印带有 href 的标签 python
Unable to print a tag with href using Selenium python
因为我想提取 https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pandas as pd
import time
import re
url = 'https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html'
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('F:/chromedriver',options=options)
driver.get(url)
time.sleep(6)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
installment =soup.find_all('img',{'class':'afterpay-image'})
installmenta =installment.find('a',href=True)
print(installmenta)
首先 find_all()
returns elements.You 列表需要 iterate.However 您正在搜索的锚标签 而不是 image tag
的 child
它在 同一个节点 。
你有两个解决方案
#1 迭代后使用 find_next()
installment =soup.find_all('img',{'class':'afterpay-image'})
for item in installment:
print(item.find_next('a')['href'])
#2 使用 css 选择器 select()
installment =soup.select(img.afterpay-image+a[href])
for item in installment:
print(item['href'])
因为我想提取 https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pandas as pd
import time
import re
url = 'https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html'
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('F:/chromedriver',options=options)
driver.get(url)
time.sleep(6)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
installment =soup.find_all('img',{'class':'afterpay-image'})
installmenta =installment.find('a',href=True)
print(installmenta)
首先 find_all()
returns elements.You 列表需要 iterate.However 您正在搜索的锚标签 而不是 image tag
的 child
它在 同一个节点 。
你有两个解决方案
#1 迭代后使用 find_next()
installment =soup.find_all('img',{'class':'afterpay-image'})
for item in installment:
print(item.find_next('a')['href'])
#2 使用 css 选择器 select()
installment =soup.select(img.afterpay-image+a[href])
for item in installment:
print(item['href'])