Selenium chromedriver error: The AudioContext was not allowed to start. It must be resumed (or created) after a user gesture on the page
Selenium chromedriver error: The AudioContext was not allowed to start. It must be resumed (or created) after a user gesture on the page
我正在尝试访问此网站上的数据:
https://vemcount.app/embed/widget/uOCRuLPangWo5fT?locale=en
到目前为止我的代码如下:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
def configure_driver():
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--use-fake-ui-for-media-stream")
driver = webdriver.Chrome(executable_path="C:\Users\uqiabde1\Downloads\chromedriver.exe", options = chrome_options)
return driver
def getNumber(driver):
# Step 1: Go to website
driver.get(f"https://vemcount.app/embed/widget/uOCRuLPangWo5fT?locale=en")
# wait for the element to load
try:
WebDriverWait(driver, 10).until(lambda s: s.find_element_by_id("flex items-center").is_displayed())
except TimeoutException:
print("TimeoutException: Element not found")
return None
# Step 2: Create a parse tree of page sources after searching
soup = BeautifulSoup(driver.page_source, "lxml")
# Step 3: Iterate over the search result and fetch the number
for number in soup.select("div.items-center"):
number_needed = "p span"
print({
"title": number.select_one(number_needed).text,
})
# create the driver object.
driver = configure_driver()
getNumber(driver)
# close the driver.
driver.close()
我在 chromedriver 中收到以下错误
[0414/150051.086:INFO:CONSOLE(2)] "The AudioContext was not allowed to start. It must be resumed (or created) after a user gesture on the page. https://goo[dot]gl/7K7WLu", source: https://vemcount.app/build/embed.js?id=2ff0173dd78c5c1f99c6 (2)
我不确定使用哪个 chrome_option 来绕过这个错误。我尝试了一些,例如
--no-user-gesture-required
和
--disable-gesture-requirement-for-presentation
非常感谢您的帮助。
谢谢。
以防万一您有相同的查询或正在努力进行动态网络抓取,我找到了一种无需使用 selenium
和 bs4
.
即可抓取数据的方法
我使用了 playwright,它更直接,并且有一个非常受欢迎的 inner_html()
函数,可以直接读取动态 flex HTML 代码。这里是参考代码。
#part of the help to write the script I got from
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(slow_mo=1000)
page = browser.new_page()
page.goto('https://vemcount.app/embed/widget/uOCRuLPangWo5fT')
central = page.query_selector("p.w-full span");
print({'central': central.inner_html()})
browser.close()
如果有更好的方法,我很乐意听取您的建议。
你的,
Python 的初学者。 :)
我正在尝试访问此网站上的数据: https://vemcount.app/embed/widget/uOCRuLPangWo5fT?locale=en
到目前为止我的代码如下:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
def configure_driver():
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--use-fake-ui-for-media-stream")
driver = webdriver.Chrome(executable_path="C:\Users\uqiabde1\Downloads\chromedriver.exe", options = chrome_options)
return driver
def getNumber(driver):
# Step 1: Go to website
driver.get(f"https://vemcount.app/embed/widget/uOCRuLPangWo5fT?locale=en")
# wait for the element to load
try:
WebDriverWait(driver, 10).until(lambda s: s.find_element_by_id("flex items-center").is_displayed())
except TimeoutException:
print("TimeoutException: Element not found")
return None
# Step 2: Create a parse tree of page sources after searching
soup = BeautifulSoup(driver.page_source, "lxml")
# Step 3: Iterate over the search result and fetch the number
for number in soup.select("div.items-center"):
number_needed = "p span"
print({
"title": number.select_one(number_needed).text,
})
# create the driver object.
driver = configure_driver()
getNumber(driver)
# close the driver.
driver.close()
我在 chromedriver 中收到以下错误
[0414/150051.086:INFO:CONSOLE(2)] "The AudioContext was not allowed to start. It must be resumed (or created) after a user gesture on the page. https://goo[dot]gl/7K7WLu", source: https://vemcount.app/build/embed.js?id=2ff0173dd78c5c1f99c6 (2)
我不确定使用哪个 chrome_option 来绕过这个错误。我尝试了一些,例如
--no-user-gesture-required
和
--disable-gesture-requirement-for-presentation
非常感谢您的帮助。 谢谢。
以防万一您有相同的查询或正在努力进行动态网络抓取,我找到了一种无需使用 selenium
和 bs4
.
我使用了 playwright,它更直接,并且有一个非常受欢迎的 inner_html()
函数,可以直接读取动态 flex HTML 代码。这里是参考代码。
#part of the help to write the script I got from
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(slow_mo=1000)
page = browser.new_page()
page.goto('https://vemcount.app/embed/widget/uOCRuLPangWo5fT')
central = page.query_selector("p.w-full span");
print({'central': central.inner_html()})
browser.close()
如果有更好的方法,我很乐意听取您的建议。
你的,
Python 的初学者。 :)