使用 chromedriver 使用 Selenium Python 截取整页的屏幕截图
Take screenshot of full page with Selenium Python with chromedriver
在尝试了各种方法之后...我偶然发现了这个页面,用 chromedriver、selenium 和 python 截取 full-page 屏幕截图。
原码为here。 (我复制了下面这篇文章中的代码)
它使用 PIL,效果很好!但是,存在一个问题……它捕获固定的 headers 并在整个页面上重复,并且在页面更改期间还遗漏了页面的某些部分。示例 url 以截取屏幕截图:
http://www.w3schools.com/js/default.asp
如何避免使用此代码重复 headers... 或者是否有更好的选择仅使用 python... (我不知道java也不想用java).
请看下面的当前结果截图和示例代码。
test.py
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
import unittest
import util
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
#url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
self.driver.get(url)
util.fullpage_screenshot(self.driver, "test.png")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
util.py
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
您可以通过更改屏幕截图前 header 的 CSS 来实现:
topnav = driver.find_element_by_id("topnav")
driver.execute_script("arguments[0].setAttribute('style', 'position: absolute; top: 0px;')", topnav)
编辑:将此行放在 window 滚动之后:
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
因此在您的 util.py 中它将是:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
如果站点正在使用 header
标签,您可以使用 find_element_by_tag_name("header")
了解了@Moshisho的做法后
我的完整独立工作脚本是...(在每次滚动和定位后添加 sleep 0.2)
import sys
from selenium import webdriver
import util
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
time.sleep(0.2)
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
driver = webdriver.Chrome()
''' Generate document-height screenshot '''
url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
driver.get(url)
fullpage_screenshot(driver, "test1236.png")
我更改了 Python 3.6 的代码,也许对某人有用:
from selenium import webdriver
from sys import stdout
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import unittest
#from Login_Page import Login_Page
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from io import BytesIO
from PIL import Image
def testdenovoUIavailable(self):
binary = FirefoxBinary("C:\Mozilla Firefox\firefox.exe")
self.driver = webdriver.Firefox(firefox_binary=binary)
verbose = 0
#open page
self.driver.get("http://yandex.ru")
#hide fixed header
#js_hide_header=' var x = document.getElementsByClassName("topnavbar-wrapper ng-scope")[0];x[\'style\'] = \'display:none\';'
#self.driver.execute_script(js_hide_header)
#get total height of page
js = 'return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);'
scrollheight = self.driver.execute_script(js)
if verbose > 0:
print(scrollheight)
slices = []
offset = 0
offset_arr=[]
#separate full screen in parts and make printscreens
while offset < scrollheight:
if verbose > 0:
print(offset)
#scroll to size of page
if (scrollheight-offset)<offset:
#if part of screen is the last one, we need to scroll just on rest of page
self.driver.execute_script("window.scrollTo(0, %s);" % (scrollheight-offset))
offset_arr.append(scrollheight-offset)
else:
self.driver.execute_script("window.scrollTo(0, %s);" % offset)
offset_arr.append(offset)
#create image (in Python 3.6 use BytesIO)
img = Image.open(BytesIO(self.driver.get_screenshot_as_png()))
offset += img.size[1]
#append new printscreen to array
slices.append(img)
if verbose > 0:
self.driver.get_screenshot_as_file('screen_%s.jpg' % (offset))
print(scrollheight)
#create image with
screenshot = Image.new('RGB', (slices[0].size[0], scrollheight))
offset = 0
offset2= 0
#now glue all images together
for img in slices:
screenshot.paste(img, (0, offset_arr[offset2]))
offset += img.size[1]
offset2+= 1
screenshot.save('test.png')
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://developer.mozilla.org/')
element = driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
这对我有用。它将整个页面保存为屏幕截图。
有关更多信息,您可以阅读 api 文档:
http://selenium-python.readthedocs.io/api.html
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
前面第 2 行中建议的代码中存在错误。这是更正后的代码。作为菜鸟,还不能编辑我自己的post。
有时候宝贝并没有得到最好的结果。因此可以使用另一种方法获取所有元素的高度并将它们相加以设置捕获高度,如下所示:
element=driver.find_elements_by_xpath("/html/child::*/child::*")
eheight=set()
for e in element:
eheight.add(round(e.size["height"]))
print (eheight)
total_height = sum(eheight)
driver.execute_script("document.getElementsByTagName('html')[0].setAttribute('style', 'height:"+str(total_height)+"px')")
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open(fname, "wb") as file:
file.write(element_png)
顺便说一句,它适用于 FF。
不确定人们是否仍然遇到这个问题。
我做了一个小技巧,效果很好,并且可以很好地与动态区域配合使用。希望对你有帮助
# 1. get dimensions
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
time.sleep(sometime)
total_height = browser.execute_script("return document.body.parentNode.scrollHeight")
browser.quit()
# 2. get screenshot
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, total_height)
browser.get(url)
browser.save_screenshot(screenshot_path)
稍微修改@ihightower和@A.Minachev的代码,使其在mac retina中工作:
import time
from PIL import Image
from io import BytesIO
def fullpage_screenshot(driver, file, scroll_delay=0.3):
device_pixel_ratio = driver.execute_script('return window.devicePixelRatio')
total_height = driver.execute_script('return document.body.parentNode.scrollHeight')
viewport_height = driver.execute_script('return window.innerHeight')
total_width = driver.execute_script('return document.body.offsetWidth')
viewport_width = driver.execute_script("return document.body.clientWidth")
# this implementation assume (viewport_width == total_width)
assert(viewport_width == total_width)
# scroll the page, take screenshots and save screenshots to slices
offset = 0 # height
slices = {}
while offset < total_height:
if offset + viewport_height > total_height:
offset = total_height - viewport_height
driver.execute_script('window.scrollTo({0}, {1})'.format(0, offset))
time.sleep(scroll_delay)
img = Image.open(BytesIO(driver.get_screenshot_as_png()))
slices[offset] = img
offset = offset + viewport_height
# combine image slices
stitched_image = Image.new('RGB', (total_width * device_pixel_ratio, total_height * device_pixel_ratio))
for offset, image in slices.items():
stitched_image.paste(image, (0, offset * device_pixel_ratio))
stitched_image.save(file)
fullpage_screenshot(driver, 'test.png')
我修改了,让它只得到一次url。
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
height = browser.execute_script("return document.body.parentNode.scrollHeight")
# 2. get screenshot
browser.set_window_size(default_width, height)
browser.save_screenshot(screenshot_path)
browser.quit()
此答案改进了 and 之前的答案。
它采用无头模式,并且最初未设置 window-size 选项。在调用此函数之前,请确保页面已完全或充分加载。
它尝试将宽度和高度都设置为所需的值。整个页面的屏幕截图有时会包含不必要的垂直滚动条。通常避免滚动条的一种方法是截取 body 元素的屏幕截图。保存屏幕截图后,它会将尺寸恢复为原来的尺寸,否则下一张屏幕截图的尺寸可能无法正确设置。
最终,对于某些示例,此技术可能仍无法完美运行。
from selenium import webdriver
def save_screenshot(driver: webdriver.Chrome, path: str = '/tmp/screenshot.png') -> None:
# Ref:
original_size = driver.get_window_size()
required_width = driver.execute_script('return document.body.parentNode.scrollWidth')
required_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(required_width, required_height)
# driver.save_screenshot(path) # has scrollbar
driver.find_element_by_tag_name('body').screenshot(path) # avoids scrollbar
driver.set_window_size(original_size['width'], original_size['height'])
如果使用 Python 早于 3.6,请从函数定义中删除类型注释。
为什么不只获取页面的宽度和高度然后调整驱动程序的大小?所以会是这样的
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(total_width, total_height)
driver.save_screenshot("SomeName.png")
这将制作整个页面的屏幕截图,而无需将不同的部分合并在一起。
屏幕截图仅限于视口,但您可以通过捕获 body
元素来解决此问题,因为网络驱动程序将捕获整个元素,即使它大于视口。这将使您不必处理滚动和拼接图像,但是您可能会发现页脚位置有问题(如下面的屏幕截图所示)。
使用 Chrome 驱动程序在 Windows 8 和 Mac High Sierra 上进行了测试。
from selenium import webdriver
url = 'https://whosebug.com/'
path = '/path/to/save/in/scrape.png'
driver = webdriver.Chrome()
driver.get(url)
el = driver.find_element_by_tag_name('body')
el.screenshot(path)
driver.quit()
Returns:(全尺寸:https://i.stack.imgur.com/ppDiI.png)
您可以使用Splinter
Splinter 是现有浏览器自动化工具之上的抽象层,例如 Selenium
新版本 0.10.0
中有一项新功能 browser.screenshot(..., full=True)
。
full=True
选项将为您进行全屏捕获。
知道了!!!工作起来很有魅力
对于 NodeJS,但概念是相同的:
await driver.executeScript(`
document.documentElement.style.display = "table";
document.documentElement.style.width = "100%";
document.body.style.display = "table-row";
`);
await driver.findElement(By.css('body')).takeScreenshot();
关键是开启headless
模式!
无需拼接,无需两次加载页面。
完整的工作代码:
URL = 'http://www.w3schools.com/js/default.asp'
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll'+X)
driver.set_window_size(S('Width'),S('Height')) # May need manual adjustment
driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
driver.quit()
这实际上与 @Acumenus 的 posted 相同,但略有改进。
我的发现总结
我决定 post 无论如何,因为我没有找到有关关闭 headless
模式(显示浏览器)以截取屏幕截图时发生的情况的解释。
正如我测试的那样(使用 Chrome WebDriver),如果打开 headless
模式,屏幕截图会根据需要保存。但是,如果关闭 headless
模式,保存的屏幕截图的宽度和高度大致正确,但结果会有所不同 case-by-case。通常,屏幕可见的页面上部会被保存,但图像的其余部分只是纯白色。还有一个案例是试图通过使用上面的 link 来保存这个 Stack Overflow 线程;甚至上半部分也没有保存,有趣的是现在它是透明的,而其余部分仍然是白色的。我注意到的最后一个案例只有一次给定 W3Schools link;那里没有白色部分,但页面的上部重复到最后,包括 header.
我希望这对许多 由于某种原因 没有得到预期结果的人有所帮助,因为我没有看到任何人明确解释 [=12= 的要求] 模式与这种简单的方法。
只有当我自己发现这个问题的解决方案时,我才发现 @vc2279 的一个 post 提到无头浏览器的 window 可以设置为任意大小(对于相反的情况似乎也是如此)。虽然,我的 post 中的解决方案改进了它不需要重复 browser/driver 打开或重新加载页面。
进一步的建议
如果对于某些页面它不适合您,我建议在获取页面大小之前尝试添加 time.sleep(seconds)
。另一种情况是,如果页面需要滚动到底部才能加载更多内容,这可以通过 post:
中的 scheight
方法解决
scheight = .1
while scheight < 9.9:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .01
另请注意,对于某些页面,内容可能不在任何 top-level HTML 标记中,例如 <html>
或 <body>
,例如 YouTube 使用 <ytd-app>
标签。
最后一点,我发现有一个页面 "returned" 的屏幕截图仍然带有水平滚动条, window 的大小需要手动调整,即图像宽度需要增加 18 像素,例如所以:S('Width')+18
.
到 python 很容易,但速度较慢
import os
from selenium import webdriver
from PIL import Image
def full_screenshot(driver: webdriver):
driver.execute_script(f"window.scrollTo({0}, {0})")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
file_name = "part_{0}.png".format(part)
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
return stitched_image
我已经修改了@ihightower给出的答案,而不是在那个函数中保存截图,return网页的总高度和总宽度,然后将window大小设置为total高度和总宽度。
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def open_url(url):
options = Options()
options.headless = True
driver = webdriver.Chrome(chrome_options=options)
driver.maximize_window()
driver.get(url)
save_screenshot(driver, 'screen.png')
def save_screenshot(driver, file_name):
height, width = scroll_down(driver)
driver.set_window_size(width, height)
img_binary = driver.get_screenshot_as_png()
img = Image.open(BytesIO(img_binary))
img.save(file_name)
# print(file_name)
print(" screenshot saved ")
def scroll_down(driver):
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
time.sleep(0.5)
# time.sleep(0.2)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
previous = rectangle
return (total_height, total_width)
open_url("https://www.medium.com")
工作原理:尽可能设置浏览器高度...
#coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def test_fullpage_screenshot(self):
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("yoururlxxx")
time.sleep(2)
#the element with longest height on page
ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')
total_height = ele.size["height"]+1000
driver.set_window_size(1920, total_height) #the trick
time.sleep(2)
driver.save_screenshot("screenshot1.png")
driver.quit()
if __name__ == "__main__":
test_fullpage_screenshot()
我在 Whosebug 上的第一个答案。我是新手。
专家编码员引用的其他答案很棒,我什至没有参加比赛。我只想引用以下 link 采取的步骤:pypi.org
参考整页截图部分。
打开命令提示符并导航到 Python 的安装目录
cd "enter the directory"
使用 pip 安装模块
pip install Selenium-Screenshot
以上模块适用于 python 3。
安装模块后,通过在 python IDLE
中创建一个单独的文件来尝试以下代码
from Screenshot import Screenshot_Clipping
from selenium import webdriver
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://github.com/sam4u3/Selenium_Screenshot/tree/master/test"
driver.get(url)
# the line below makes taking & saving screenshots very easy.
img_url=ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
print(img_url)
driver.close()
driver.quit()
来源:https://pypi.org/project/Selenium-Screenshot/
from Screenshot import Screenshot_Clipping
from selenium import webdriver
import time
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://www.bbc.com/news/world-asia-china-51108726"
driver.get(url)
time.sleep(1)
img_url = ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
driver.close()
driver.quit()
整页截图不是W3C spec的一部分。然而,许多网络驱动程序实现了他们的 自己的 端点以获得真正的整页屏幕截图。我发现这种使用 geckodriver 的方法远远优于注入的“屏幕截图、滚动、拼接”方法,并且 far 比调整 window 在无头模式下。
示例:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
service = Service('/your/path/to/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
driver.get('https://www.nytimes.com/')
driver.get_full_page_screenshot_as_file('example.png')
driver.close()
geckodriver (Firefox)
如果您使用的是 geckodriver,您可以点击这些功能:
driver.get_full_page_screenshot_as_file
driver.save_full_page_screenshot
driver.get_full_page_screenshot_as_png
driver.get_full_page_screenshot_as_base64
我已经测试并确认这些可以在 Selenium 4.07 上运行。我不相信这些功能包含在 Selenium 3 中。
我能找到的关于这些的最佳文档在 merge
chromedriver(铬)
看来 chromedriver 已经实现了自己的整页截图功能:
https://chromium-review.googlesource.com/c/chromium/src/+/2300980
而且 Selenium 团队的目标似乎是在 Selenium 4 中获得支持:
对于Chrome,也可以使用Chrome DevTools Protocol:
import base64
...
page_rect = browser.driver.execute_cdp_cmd("Page.getLayoutMetrics", {})
screenshot = browser.driver.execute_cdp_cmd(
"Page.captureScreenshot",
{
"format": "png",
"captureBeyondViewport": True,
"clip": {
"width": page_rect["contentSize"]["width"],
"height": page_rect["contentSize"]["height"],
"x": 0,
"y": 0,
"scale": 1
}
})
with open(path, "wb") as file:
file.write(base64.urlsafe_b64decode(screenshot["data"]))
这在无头和非无头模式下都有效。
我目前正在使用这种方法:
def take_screenshot(self, driver, screenshot_name = "debug.png"):
elem = driver.find_element_by_tag_name('body')
total_height = elem.size["height"] + 1000
driver.set_window_size(1920, total_height)
time.sleep(2)
driver.save_screenshot(screenshot_name)
return driver
Python 使用 Selenium 4 和 Chrome 驱动程序
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
import shutil
def take_full_page_screenshot():
#Install chrome driver
chrome_driver_path = ChromeDriverManager().install()
service = Service(chrome_driver_path)
service.start()
#setup chrome options
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--incognito')
options.add_argument('--start-maximized')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_driver_path, options=options)
#open url and wait for the page to load
driver.get('https://www.whosebug.com')
time.sleep(2)
#find the element with longest height on page
element = driver.find_element(By.TAG_NAME, 'body')
total_height = element.size["height"]+1000
#set the window dimensions
driver.set_window_size(1920, total_height)
#save screenshot
driver.save_screenshot("screenshot.png")
#quit driver
driver.quit()
if __name__ == '__main__':
take_full_page_screenshot()
在尝试了各种方法之后...我偶然发现了这个页面,用 chromedriver、selenium 和 python 截取 full-page 屏幕截图。
原码为here。 (我复制了下面这篇文章中的代码)
它使用 PIL,效果很好!但是,存在一个问题……它捕获固定的 headers 并在整个页面上重复,并且在页面更改期间还遗漏了页面的某些部分。示例 url 以截取屏幕截图:
http://www.w3schools.com/js/default.asp
如何避免使用此代码重复 headers... 或者是否有更好的选择仅使用 python... (我不知道java也不想用java).
请看下面的当前结果截图和示例代码。
test.py
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
import unittest
import util
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
#url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
self.driver.get(url)
util.fullpage_screenshot(self.driver, "test.png")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
util.py
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
您可以通过更改屏幕截图前 header 的 CSS 来实现:
topnav = driver.find_element_by_id("topnav")
driver.execute_script("arguments[0].setAttribute('style', 'position: absolute; top: 0px;')", topnav)
编辑:将此行放在 window 滚动之后:
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
因此在您的 util.py 中它将是:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
如果站点正在使用 header
标签,您可以使用 find_element_by_tag_name("header")
了解了@Moshisho的做法后
我的完整独立工作脚本是...(在每次滚动和定位后添加 sleep 0.2)
import sys
from selenium import webdriver
import util
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
time.sleep(0.2)
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
driver = webdriver.Chrome()
''' Generate document-height screenshot '''
url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
driver.get(url)
fullpage_screenshot(driver, "test1236.png")
我更改了 Python 3.6 的代码,也许对某人有用:
from selenium import webdriver
from sys import stdout
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import unittest
#from Login_Page import Login_Page
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from io import BytesIO
from PIL import Image
def testdenovoUIavailable(self):
binary = FirefoxBinary("C:\Mozilla Firefox\firefox.exe")
self.driver = webdriver.Firefox(firefox_binary=binary)
verbose = 0
#open page
self.driver.get("http://yandex.ru")
#hide fixed header
#js_hide_header=' var x = document.getElementsByClassName("topnavbar-wrapper ng-scope")[0];x[\'style\'] = \'display:none\';'
#self.driver.execute_script(js_hide_header)
#get total height of page
js = 'return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);'
scrollheight = self.driver.execute_script(js)
if verbose > 0:
print(scrollheight)
slices = []
offset = 0
offset_arr=[]
#separate full screen in parts and make printscreens
while offset < scrollheight:
if verbose > 0:
print(offset)
#scroll to size of page
if (scrollheight-offset)<offset:
#if part of screen is the last one, we need to scroll just on rest of page
self.driver.execute_script("window.scrollTo(0, %s);" % (scrollheight-offset))
offset_arr.append(scrollheight-offset)
else:
self.driver.execute_script("window.scrollTo(0, %s);" % offset)
offset_arr.append(offset)
#create image (in Python 3.6 use BytesIO)
img = Image.open(BytesIO(self.driver.get_screenshot_as_png()))
offset += img.size[1]
#append new printscreen to array
slices.append(img)
if verbose > 0:
self.driver.get_screenshot_as_file('screen_%s.jpg' % (offset))
print(scrollheight)
#create image with
screenshot = Image.new('RGB', (slices[0].size[0], scrollheight))
offset = 0
offset2= 0
#now glue all images together
for img in slices:
screenshot.paste(img, (0, offset_arr[offset2]))
offset += img.size[1]
offset2+= 1
screenshot.save('test.png')
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://developer.mozilla.org/')
element = driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
这对我有用。它将整个页面保存为屏幕截图。 有关更多信息,您可以阅读 api 文档: http://selenium-python.readthedocs.io/api.html
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
前面第 2 行中建议的代码中存在错误。这是更正后的代码。作为菜鸟,还不能编辑我自己的post。
有时候宝贝并没有得到最好的结果。因此可以使用另一种方法获取所有元素的高度并将它们相加以设置捕获高度,如下所示:
element=driver.find_elements_by_xpath("/html/child::*/child::*")
eheight=set()
for e in element:
eheight.add(round(e.size["height"]))
print (eheight)
total_height = sum(eheight)
driver.execute_script("document.getElementsByTagName('html')[0].setAttribute('style', 'height:"+str(total_height)+"px')")
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open(fname, "wb") as file:
file.write(element_png)
顺便说一句,它适用于 FF。
不确定人们是否仍然遇到这个问题。 我做了一个小技巧,效果很好,并且可以很好地与动态区域配合使用。希望对你有帮助
# 1. get dimensions
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
time.sleep(sometime)
total_height = browser.execute_script("return document.body.parentNode.scrollHeight")
browser.quit()
# 2. get screenshot
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, total_height)
browser.get(url)
browser.save_screenshot(screenshot_path)
稍微修改@ihightower和@A.Minachev的代码,使其在mac retina中工作:
import time
from PIL import Image
from io import BytesIO
def fullpage_screenshot(driver, file, scroll_delay=0.3):
device_pixel_ratio = driver.execute_script('return window.devicePixelRatio')
total_height = driver.execute_script('return document.body.parentNode.scrollHeight')
viewport_height = driver.execute_script('return window.innerHeight')
total_width = driver.execute_script('return document.body.offsetWidth')
viewport_width = driver.execute_script("return document.body.clientWidth")
# this implementation assume (viewport_width == total_width)
assert(viewport_width == total_width)
# scroll the page, take screenshots and save screenshots to slices
offset = 0 # height
slices = {}
while offset < total_height:
if offset + viewport_height > total_height:
offset = total_height - viewport_height
driver.execute_script('window.scrollTo({0}, {1})'.format(0, offset))
time.sleep(scroll_delay)
img = Image.open(BytesIO(driver.get_screenshot_as_png()))
slices[offset] = img
offset = offset + viewport_height
# combine image slices
stitched_image = Image.new('RGB', (total_width * device_pixel_ratio, total_height * device_pixel_ratio))
for offset, image in slices.items():
stitched_image.paste(image, (0, offset * device_pixel_ratio))
stitched_image.save(file)
fullpage_screenshot(driver, 'test.png')
我修改了
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
height = browser.execute_script("return document.body.parentNode.scrollHeight")
# 2. get screenshot
browser.set_window_size(default_width, height)
browser.save_screenshot(screenshot_path)
browser.quit()
此答案改进了
它采用无头模式,并且最初未设置 window-size 选项。在调用此函数之前,请确保页面已完全或充分加载。
它尝试将宽度和高度都设置为所需的值。整个页面的屏幕截图有时会包含不必要的垂直滚动条。通常避免滚动条的一种方法是截取 body 元素的屏幕截图。保存屏幕截图后,它会将尺寸恢复为原来的尺寸,否则下一张屏幕截图的尺寸可能无法正确设置。
最终,对于某些示例,此技术可能仍无法完美运行。
from selenium import webdriver
def save_screenshot(driver: webdriver.Chrome, path: str = '/tmp/screenshot.png') -> None:
# Ref:
original_size = driver.get_window_size()
required_width = driver.execute_script('return document.body.parentNode.scrollWidth')
required_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(required_width, required_height)
# driver.save_screenshot(path) # has scrollbar
driver.find_element_by_tag_name('body').screenshot(path) # avoids scrollbar
driver.set_window_size(original_size['width'], original_size['height'])
如果使用 Python 早于 3.6,请从函数定义中删除类型注释。
为什么不只获取页面的宽度和高度然后调整驱动程序的大小?所以会是这样的
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(total_width, total_height)
driver.save_screenshot("SomeName.png")
这将制作整个页面的屏幕截图,而无需将不同的部分合并在一起。
屏幕截图仅限于视口,但您可以通过捕获 body
元素来解决此问题,因为网络驱动程序将捕获整个元素,即使它大于视口。这将使您不必处理滚动和拼接图像,但是您可能会发现页脚位置有问题(如下面的屏幕截图所示)。
使用 Chrome 驱动程序在 Windows 8 和 Mac High Sierra 上进行了测试。
from selenium import webdriver
url = 'https://whosebug.com/'
path = '/path/to/save/in/scrape.png'
driver = webdriver.Chrome()
driver.get(url)
el = driver.find_element_by_tag_name('body')
el.screenshot(path)
driver.quit()
Returns:(全尺寸:https://i.stack.imgur.com/ppDiI.png)
您可以使用Splinter
Splinter 是现有浏览器自动化工具之上的抽象层,例如 Selenium
新版本 0.10.0
中有一项新功能 browser.screenshot(..., full=True)
。
full=True
选项将为您进行全屏捕获。
知道了!!!工作起来很有魅力
对于 NodeJS,但概念是相同的:
await driver.executeScript(`
document.documentElement.style.display = "table";
document.documentElement.style.width = "100%";
document.body.style.display = "table-row";
`);
await driver.findElement(By.css('body')).takeScreenshot();
关键是开启headless
模式!
无需拼接,无需两次加载页面。
完整的工作代码:
URL = 'http://www.w3schools.com/js/default.asp'
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll'+X)
driver.set_window_size(S('Width'),S('Height')) # May need manual adjustment
driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
driver.quit()
这实际上与 @Acumenus 的 posted 相同,但略有改进。
我的发现总结
我决定 post 无论如何,因为我没有找到有关关闭 headless
模式(显示浏览器)以截取屏幕截图时发生的情况的解释。
正如我测试的那样(使用 Chrome WebDriver),如果打开 headless
模式,屏幕截图会根据需要保存。但是,如果关闭 headless
模式,保存的屏幕截图的宽度和高度大致正确,但结果会有所不同 case-by-case。通常,屏幕可见的页面上部会被保存,但图像的其余部分只是纯白色。还有一个案例是试图通过使用上面的 link 来保存这个 Stack Overflow 线程;甚至上半部分也没有保存,有趣的是现在它是透明的,而其余部分仍然是白色的。我注意到的最后一个案例只有一次给定 W3Schools link;那里没有白色部分,但页面的上部重复到最后,包括 header.
我希望这对许多 由于某种原因 没有得到预期结果的人有所帮助,因为我没有看到任何人明确解释 [=12= 的要求] 模式与这种简单的方法。 只有当我自己发现这个问题的解决方案时,我才发现 @vc2279 的一个 post 提到无头浏览器的 window 可以设置为任意大小(对于相反的情况似乎也是如此)。虽然,我的 post 中的解决方案改进了它不需要重复 browser/driver 打开或重新加载页面。
进一步的建议
如果对于某些页面它不适合您,我建议在获取页面大小之前尝试添加 time.sleep(seconds)
。另一种情况是,如果页面需要滚动到底部才能加载更多内容,这可以通过 post:
scheight
方法解决
scheight = .1
while scheight < 9.9:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .01
另请注意,对于某些页面,内容可能不在任何 top-level HTML 标记中,例如 <html>
或 <body>
,例如 YouTube 使用 <ytd-app>
标签。
最后一点,我发现有一个页面 "returned" 的屏幕截图仍然带有水平滚动条, window 的大小需要手动调整,即图像宽度需要增加 18 像素,例如所以:S('Width')+18
.
到 python 很容易,但速度较慢
import os
from selenium import webdriver
from PIL import Image
def full_screenshot(driver: webdriver):
driver.execute_script(f"window.scrollTo({0}, {0})")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
file_name = "part_{0}.png".format(part)
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
return stitched_image
我已经修改了@ihightower给出的答案,而不是在那个函数中保存截图,return网页的总高度和总宽度,然后将window大小设置为total高度和总宽度。
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def open_url(url):
options = Options()
options.headless = True
driver = webdriver.Chrome(chrome_options=options)
driver.maximize_window()
driver.get(url)
save_screenshot(driver, 'screen.png')
def save_screenshot(driver, file_name):
height, width = scroll_down(driver)
driver.set_window_size(width, height)
img_binary = driver.get_screenshot_as_png()
img = Image.open(BytesIO(img_binary))
img.save(file_name)
# print(file_name)
print(" screenshot saved ")
def scroll_down(driver):
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
time.sleep(0.5)
# time.sleep(0.2)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
previous = rectangle
return (total_height, total_width)
open_url("https://www.medium.com")
工作原理:尽可能设置浏览器高度...
#coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def test_fullpage_screenshot(self):
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("yoururlxxx")
time.sleep(2)
#the element with longest height on page
ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')
total_height = ele.size["height"]+1000
driver.set_window_size(1920, total_height) #the trick
time.sleep(2)
driver.save_screenshot("screenshot1.png")
driver.quit()
if __name__ == "__main__":
test_fullpage_screenshot()
我在 Whosebug 上的第一个答案。我是新手。 专家编码员引用的其他答案很棒,我什至没有参加比赛。我只想引用以下 link 采取的步骤:pypi.org
参考整页截图部分。
打开命令提示符并导航到 Python 的安装目录
cd "enter the directory"
使用 pip 安装模块
pip install Selenium-Screenshot
以上模块适用于 python 3。 安装模块后,通过在 python IDLE
中创建一个单独的文件来尝试以下代码from Screenshot import Screenshot_Clipping
from selenium import webdriver
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://github.com/sam4u3/Selenium_Screenshot/tree/master/test"
driver.get(url)
# the line below makes taking & saving screenshots very easy.
img_url=ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
print(img_url)
driver.close()
driver.quit()
来源:https://pypi.org/project/Selenium-Screenshot/
from Screenshot import Screenshot_Clipping
from selenium import webdriver
import time
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://www.bbc.com/news/world-asia-china-51108726"
driver.get(url)
time.sleep(1)
img_url = ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
driver.close()
driver.quit()
整页截图不是W3C spec的一部分。然而,许多网络驱动程序实现了他们的 自己的 端点以获得真正的整页屏幕截图。我发现这种使用 geckodriver 的方法远远优于注入的“屏幕截图、滚动、拼接”方法,并且 far 比调整 window 在无头模式下。
示例:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
service = Service('/your/path/to/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
driver.get('https://www.nytimes.com/')
driver.get_full_page_screenshot_as_file('example.png')
driver.close()
geckodriver (Firefox)
如果您使用的是 geckodriver,您可以点击这些功能:
driver.get_full_page_screenshot_as_file
driver.save_full_page_screenshot
driver.get_full_page_screenshot_as_png
driver.get_full_page_screenshot_as_base64
我已经测试并确认这些可以在 Selenium 4.07 上运行。我不相信这些功能包含在 Selenium 3 中。
我能找到的关于这些的最佳文档在 merge
chromedriver(铬)
看来 chromedriver 已经实现了自己的整页截图功能:
https://chromium-review.googlesource.com/c/chromium/src/+/2300980
而且 Selenium 团队的目标似乎是在 Selenium 4 中获得支持:
对于Chrome,也可以使用Chrome DevTools Protocol:
import base64
...
page_rect = browser.driver.execute_cdp_cmd("Page.getLayoutMetrics", {})
screenshot = browser.driver.execute_cdp_cmd(
"Page.captureScreenshot",
{
"format": "png",
"captureBeyondViewport": True,
"clip": {
"width": page_rect["contentSize"]["width"],
"height": page_rect["contentSize"]["height"],
"x": 0,
"y": 0,
"scale": 1
}
})
with open(path, "wb") as file:
file.write(base64.urlsafe_b64decode(screenshot["data"]))
这在无头和非无头模式下都有效。
我目前正在使用这种方法:
def take_screenshot(self, driver, screenshot_name = "debug.png"):
elem = driver.find_element_by_tag_name('body')
total_height = elem.size["height"] + 1000
driver.set_window_size(1920, total_height)
time.sleep(2)
driver.save_screenshot(screenshot_name)
return driver
Python 使用 Selenium 4 和 Chrome 驱动程序
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
import shutil
def take_full_page_screenshot():
#Install chrome driver
chrome_driver_path = ChromeDriverManager().install()
service = Service(chrome_driver_path)
service.start()
#setup chrome options
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--incognito')
options.add_argument('--start-maximized')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_driver_path, options=options)
#open url and wait for the page to load
driver.get('https://www.whosebug.com')
time.sleep(2)
#find the element with longest height on page
element = driver.find_element(By.TAG_NAME, 'body')
total_height = element.size["height"]+1000
#set the window dimensions
driver.set_window_size(1920, total_height)
#save screenshot
driver.save_screenshot("screenshot.png")
#quit driver
driver.quit()
if __name__ == '__main__':
take_full_page_screenshot()