如何在 linux 服务器上使用 docker-compose 安装 phantomjs 和 selenium?
How to install phantomjs and selenium with docker-compose on linux server?
我正在为我的网络抓取工具使用 selenium 和 phantomjs。
一切都适用于我的测试 windows 应用程序。
尝试将此代码更新添加到我的主应用程序中,使用 docker-compose 部署,我得到了这个:
selenium.common.exceptions.WebDriverException: Message: 'phantomjs' executable needs to be in PATH.
我该如何解决这个问题?
目前我的 docker-compose.yml 有这个代码:
version: '3.1'
services:
tgbot:
container_name: bot
build:
context: .
command: python app.py
restart: always
environment:
WEBAPP_PORT: 3001
env_file:
- ".env"
# bot start after load db
ports:
- 8443:3001
networks:
- botnet
phantomjs:
image: shufo/phantomjs
command: --webdriver 8901
networks:
botnet:
driver: bridge
还有我的 python 代码:
from selenium import webdriver
driver = webdriver.PhantomJS()
码头文件:
FROM python:latest
RUN mkdir /src
WORKDIR /src
COPY requirements.txt /src
RUN pip install -r requirements.txt
COPY . /src
P.S。我正在使用 phantomjs,因为我正在抓取的网页有 JS。不适用于 chrome
您的配置有几个问题:
- 您的机器人代码在不同的容器中运行。不在启动 phantomjs 的那一个。这就是它找不到可执行文件的原因。
- 您 运行 phantomjs 容器与您的代码不在同一网络中
- 有些无用的配置似乎是从其他示例中复制粘贴的。
- 您强制重新启动您的容器。即使在成功退出代码后它也会重新启动。
下面是如何运行一切的完整示例:
- 创建空文件夹
myfolder
并放入 app.py
并包含以下内容:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Remote(command_executor='http://phantomjs:8901/wd/hub/',desired_capabilities=DesiredCapabilities.PHANTOMJS)
- 将requirements.txt文件放入
myfolder
- 将以下
Dockerfile
添加到 myfolder
:
FROM python:latest
WORKDIR /configs
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
- 将以下
docker-compose.yml
到 myfolder
:
version: '3.1'
services:
tgbot:
build: .
container_name: bot
volumes:
- .:/apps
command: python /apps/app.py
depends_on:
- phantomjs
networks:
- botnet
phantomjs:
container_name: phantomjs
image: shufo/phantomjs
command: --webdriver 8901
networks:
- botnet
networks:
botnet:
driver: bridge
cd myfloder
、docker-compose up
输出:
phantomjs | [INFO - 2020-11-10T15:18:11.049Z] GhostDriver - Main - running on port 8901
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - page.customHeaders: - {}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"linux-unknown-64bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: f2091fe0-2367-11eb-bcd7-956b9cd40e54
bot exited with code 0
我正在为我的网络抓取工具使用 selenium 和 phantomjs。
一切都适用于我的测试 windows 应用程序。
尝试将此代码更新添加到我的主应用程序中,使用 docker-compose 部署,我得到了这个:
selenium.common.exceptions.WebDriverException: Message: 'phantomjs' executable needs to be in PATH.
我该如何解决这个问题? 目前我的 docker-compose.yml 有这个代码:
version: '3.1'
services:
tgbot:
container_name: bot
build:
context: .
command: python app.py
restart: always
environment:
WEBAPP_PORT: 3001
env_file:
- ".env"
# bot start after load db
ports:
- 8443:3001
networks:
- botnet
phantomjs:
image: shufo/phantomjs
command: --webdriver 8901
networks:
botnet:
driver: bridge
还有我的 python 代码:
from selenium import webdriver
driver = webdriver.PhantomJS()
码头文件:
FROM python:latest
RUN mkdir /src
WORKDIR /src
COPY requirements.txt /src
RUN pip install -r requirements.txt
COPY . /src
P.S。我正在使用 phantomjs,因为我正在抓取的网页有 JS。不适用于 chrome
您的配置有几个问题:
- 您的机器人代码在不同的容器中运行。不在启动 phantomjs 的那一个。这就是它找不到可执行文件的原因。
- 您 运行 phantomjs 容器与您的代码不在同一网络中
- 有些无用的配置似乎是从其他示例中复制粘贴的。
- 您强制重新启动您的容器。即使在成功退出代码后它也会重新启动。
下面是如何运行一切的完整示例:
- 创建空文件夹
myfolder
并放入app.py
并包含以下内容:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Remote(command_executor='http://phantomjs:8901/wd/hub/',desired_capabilities=DesiredCapabilities.PHANTOMJS)
- 将requirements.txt文件放入
myfolder
- 将以下
Dockerfile
添加到myfolder
:
FROM python:latest
WORKDIR /configs
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
- 将以下
docker-compose.yml
到myfolder
:
version: '3.1'
services:
tgbot:
build: .
container_name: bot
volumes:
- .:/apps
command: python /apps/app.py
depends_on:
- phantomjs
networks:
- botnet
phantomjs:
container_name: phantomjs
image: shufo/phantomjs
command: --webdriver 8901
networks:
- botnet
networks:
botnet:
driver: bridge
cd myfloder
、docker-compose up
输出:
phantomjs | [INFO - 2020-11-10T15:18:11.049Z] GhostDriver - Main - running on port 8901
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - page.customHeaders: - {}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] Session [f2091fe0-2367-11eb-bcd7-956b9cd40e54] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"linux-unknown-64bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
phantomjs | [INFO - 2020-11-10T15:18:11.425Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: f2091fe0-2367-11eb-bcd7-956b9cd40e54
bot exited with code 0