Python JavaScript Scraping: ConnectionRefusedError: [Errno 61] Connection refused
Python JavaScript Scraping: ConnectionRefusedError: [Errno 61] Connection refused
我正在尝试从 Yahoo Finance 上抓取一些信息。下面是我的简单代码:
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
from selenium import webdriver
webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
browser.get(url)
browser.quit
但我收到以下错误:
ConnectionRefusedError Traceback (most recent call last)
<ipython-input-12-f4d26b367ef7> in <module>()
8 webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
9
---> 10 browser.get(url)
> /anaconda/lib/python3.6/site-packages/selenium-3.4.3- py3.6.egg/selenium/webdriver/remote/webdriver.py in get(self, url)
266 Loads a web page in the current browser session.
267 """
--> 268 self.execute(Command.GET, {'url': url})
269
270 @property
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
252
253 params = self._wrap_value(params)
--> 254 response = self.command_executor.execute(driver_command, params)
255 if response:
256 self.error_handler.check_response(response)
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/remote_connection.py in execute(self, command, params)
462 path = string.Template(command_info[1]).substitute(params)
463 url = '%s%s' % (self._url, path)
--> 464 return self._request(command_info[0], url, body=data)
465
466 def _request(self, method, url, body=None):
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/remote_connection.py in _request(self, method, url, body)
485 body = None
486 try:
--> 487 self._conn.request(method, parsed_url.path, body, headers)
488 resp = self._conn.getresponse()
489 except (httplib.HTTPException, socket.error):
/anaconda/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
1237 encode_chunked=False):
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
1241 def _send_request(self, method, url, body, headers, encode_chunked):
/anaconda/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1283 # default charset of iso-8859-1.
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
1287 def getresponse(self):
/anaconda/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
1232 else:
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
1236 def request(self, method, url, body=None, headers={}, *,
/anaconda/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
1024 msg = b"\r\n".join(self._buffer)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
1028 if message_body is not None:
/anaconda/lib/python3.6/http/client.py in send(self, data)
962 if self.sock is None:
963 if self.auto_open:
--> 964 self.connect()
965 else:
966 raise NotConnected()
/anaconda/lib/python3.6/http/client.py in connect(self)
934 """Connect to the host and port specified in __init__."""
935 self.sock = self._create_connection(
--> 936 (self.host,self.port), self.timeout, self.source_address)
937 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
938
/anaconda/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
720
721 if err is not None:
--> 722 raise err
723 else:
724 raise error("getaddrinfo returns an empty list")
/anaconda/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
711 if source_address:
712 sock.bind(source_address)
713 sock.connect(sa)
714 return sock
715
ConnectionRefusedError: [Errno 61] Connection refused
我已经检查了有关此主题的其他问题,但找不到任何相关答案。我该如何纠正这个错误?
这是您问题的答案:
错误堆栈跟踪提示我们实际问题可能出在这行代码中:
webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
当您尝试通过 browser
实例打开 URL 时,如 browser.get(url)
,browser
实例必须配置 absolute path
PhantomJS
可执行文件如下(在我的 Win8 机器上通过 PyCharm IDE):
from selenium import webdriver
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
browser = webdriver.PhantomJS(executable_path="C:\Utility\phantomjs-2.1.1-windows\bin\phantomjs.exe")
browser.get(url)
# your code block
print(browser.title)
# your code block
browser.quit
我的控制台输出如下:
AAPL Key Statistics | Apple Inc. Stock - Yahoo Finance
如果这回答了您的问题,请告诉我。
我正在尝试从 Yahoo Finance 上抓取一些信息。下面是我的简单代码:
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
from selenium import webdriver
webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
browser.get(url)
browser.quit
但我收到以下错误:
ConnectionRefusedError Traceback (most recent call last)
<ipython-input-12-f4d26b367ef7> in <module>()
8 webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
9
---> 10 browser.get(url)
> /anaconda/lib/python3.6/site-packages/selenium-3.4.3- py3.6.egg/selenium/webdriver/remote/webdriver.py in get(self, url)
266 Loads a web page in the current browser session.
267 """
--> 268 self.execute(Command.GET, {'url': url})
269
270 @property
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
252
253 params = self._wrap_value(params)
--> 254 response = self.command_executor.execute(driver_command, params)
255 if response:
256 self.error_handler.check_response(response)
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/remote_connection.py in execute(self, command, params)
462 path = string.Template(command_info[1]).substitute(params)
463 url = '%s%s' % (self._url, path)
--> 464 return self._request(command_info[0], url, body=data)
465
466 def _request(self, method, url, body=None):
/anaconda/lib/python3.6/site-packages/selenium-3.4.3-py3.6.egg/selenium/webdriver/remote/remote_connection.py in _request(self, method, url, body)
485 body = None
486 try:
--> 487 self._conn.request(method, parsed_url.path, body, headers)
488 resp = self._conn.getresponse()
489 except (httplib.HTTPException, socket.error):
/anaconda/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
1237 encode_chunked=False):
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
1241 def _send_request(self, method, url, body, headers, encode_chunked):
/anaconda/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1283 # default charset of iso-8859-1.
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
1287 def getresponse(self):
/anaconda/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
1232 else:
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
1236 def request(self, method, url, body=None, headers={}, *,
/anaconda/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
1024 msg = b"\r\n".join(self._buffer)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
1028 if message_body is not None:
/anaconda/lib/python3.6/http/client.py in send(self, data)
962 if self.sock is None:
963 if self.auto_open:
--> 964 self.connect()
965 else:
966 raise NotConnected()
/anaconda/lib/python3.6/http/client.py in connect(self)
934 """Connect to the host and port specified in __init__."""
935 self.sock = self._create_connection(
--> 936 (self.host,self.port), self.timeout, self.source_address)
937 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
938
/anaconda/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
720
721 if err is not None:
--> 722 raise err
723 else:
724 raise error("getaddrinfo returns an empty list")
/anaconda/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
711 if source_address:
712 sock.bind(source_address)
713 sock.connect(sa)
714 return sock
715
ConnectionRefusedError: [Errno 61] Connection refused
我已经检查了有关此主题的其他问题,但找不到任何相关答案。我该如何纠正这个错误?
这是您问题的答案:
错误堆栈跟踪提示我们实际问题可能出在这行代码中:
webdriver.PhantomJS(executable_path="/anaconda/bin/phantomjs")
当您尝试通过 browser
实例打开 URL 时,如 browser.get(url)
,browser
实例必须配置 absolute path
PhantomJS
可执行文件如下(在我的 Win8 机器上通过 PyCharm IDE):
from selenium import webdriver
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
browser = webdriver.PhantomJS(executable_path="C:\Utility\phantomjs-2.1.1-windows\bin\phantomjs.exe")
browser.get(url)
# your code block
print(browser.title)
# your code block
browser.quit
我的控制台输出如下:
AAPL Key Statistics | Apple Inc. Stock - Yahoo Finance
如果这回答了您的问题,请告诉我。