如何从 page_source 获取 iframe 源
How to get iframe source from page_source
您好,我尝试从 page_source 中提取 link,我的代码是:
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import html5lib
driver_path = r"C:\Users6\Desktop\New folder (8)\chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.implicitly_wait(10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
try:
time.sleep(4)
iframe = driver.find_elements_by_tag_name('iframe')
for i in range(0, len(iframe)):
f = driver.find_elements_by_tag_name('iframe')[i]
driver.switch_to.frame(i)
# your work to extract link
text = driver.find_element_by_tag_name('body').text
print(text)
driver.switch_to.default_content()
output = driver.page_source
print (output)
finally:
driver.quit();
现在我只想抓取这个 link
试试
element = driver.find_element_by_id('iframevideo')
link = element.get_attribute('src')
试试下面的脚本来获取你想要解析的link。您无需切换到 iframe 即可获得 link。硬编码延迟始终是解析任何动态内容的最差选择。如果 link 在 5 秒后出现怎么办?我在下面的脚本中使用了 Explicit Wait
以使其健壮。
from selenium import webdriver
from selenium.webdriver.support import ui
driver = webdriver.Chrome()
wait = ui.WebDriverWait(driver, 10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
elem = wait.until(lambda driver: driver.find_element_by_id("iframevideo"))
print(elem.get_attribute("src"))
driver.quit()
输出:
https://openload.co/embed/8wVwFQEP1Sw
您好,我尝试从 page_source 中提取 link,我的代码是:
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import html5lib
driver_path = r"C:\Users6\Desktop\New folder (8)\chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.implicitly_wait(10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
try:
time.sleep(4)
iframe = driver.find_elements_by_tag_name('iframe')
for i in range(0, len(iframe)):
f = driver.find_elements_by_tag_name('iframe')[i]
driver.switch_to.frame(i)
# your work to extract link
text = driver.find_element_by_tag_name('body').text
print(text)
driver.switch_to.default_content()
output = driver.page_source
print (output)
finally:
driver.quit();
现在我只想抓取这个 link
试试
element = driver.find_element_by_id('iframevideo')
link = element.get_attribute('src')
试试下面的脚本来获取你想要解析的link。您无需切换到 iframe 即可获得 link。硬编码延迟始终是解析任何动态内容的最差选择。如果 link 在 5 秒后出现怎么办?我在下面的脚本中使用了 Explicit Wait
以使其健壮。
from selenium import webdriver
from selenium.webdriver.support import ui
driver = webdriver.Chrome()
wait = ui.WebDriverWait(driver, 10)
driver.get("https://www.milversite.club/milver/outsiders-1x01-video_060893d7a.html")
elem = wait.until(lambda driver: driver.find_element_by_id("iframevideo"))
print(elem.get_attribute("src"))
driver.quit()
输出:
https://openload.co/embed/8wVwFQEP1Sw