无法加载 <tbody>、python table
Can not load <tbody>, python table
我正在尝试在网站上抓取 table。问题是html代码中没有<tbody>
。我厌倦了请求和硒,但结果总是一样的。有人知道吗?
这是代码(带有请求)网站:https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances
from urllib.request import Request, urlopen
import bs4
link="https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances"
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = bs4.BeautifulSoup(webpage,"html.parser" )
print(soup)
这是硒:
import time
import bs4
from selenium import webdriver
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
time.sleep(7)
html=driver.page_source
soup=bs4.BeautifulSoup(html,"lxml" )
print(soup)
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
print(driver.page_source)
您尝试访问的 table 在 iframe 中。您需要切换到该 iframe 才能访问该元素:
import time
import bs4
from selenium import webdriver
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
time.sleep(7)
html=driver.page_source
soup=bs4.BeautifulSoup(html,"lxml" )
print(soup)
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#tokeholdersiframe")))
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
print(driver.page_source)
完成后,您必须使用
切换回默认内容
driver.switch_to.default_content()
我正在尝试在网站上抓取 table。问题是html代码中没有<tbody>
。我厌倦了请求和硒,但结果总是一样的。有人知道吗?
这是代码(带有请求)网站:https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances
from urllib.request import Request, urlopen
import bs4
link="https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances"
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = bs4.BeautifulSoup(webpage,"html.parser" )
print(soup)
这是硒:
import time
import bs4
from selenium import webdriver
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
time.sleep(7)
html=driver.page_source
soup=bs4.BeautifulSoup(html,"lxml" )
print(soup)
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
print(driver.page_source)
您尝试访问的 table 在 iframe 中。您需要切换到该 iframe 才能访问该元素:
import time
import bs4
from selenium import webdriver
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://bscscan.com/token/0xAdeaE50E0097fBf8139Bdff45e7ed00de4b14170#balances")
time.sleep(7)
html=driver.page_source
soup=bs4.BeautifulSoup(html,"lxml" )
print(soup)
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#tokeholdersiframe")))
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".table > tbody:nth-child(2)")))
print(driver.page_source)
完成后,您必须使用
切换回默认内容driver.switch_to.default_content()