urllib.error.HTTPError: HTTP Error 404: Not Found when using request.urlopen()
urllib.error.HTTPError: HTTP Error 404: Not Found when using request.urlopen()
我正在学习教程,但在使用 request.urlopen(url)
时出现错误,我已尝试检查 URL
(https://www.wsj.com/market-data/quotes/PH/XPHS/JFC/historical-prices/download?MOD_VIEW=page&num_rows=150&range_days=150&startDate=06/01/2020&endDate=07/05/2020)
而且还好。
这是我的代码:
from urllib import request
import datetime
def download_stock_from_day_until_today(stock_code, start_date):
current_day = datetime.date.today()
formatted_current_day = datetime.date.strftime(current_day, "%m/%d/%Y") #formats today's date for links
#formatted url
url = "https://www.wsj.com/market-data/quotes/PH/XPHS/"+ stock_code +"/historical-prices/download?MOD_VIEW=page&num_rows=150&range_days=150&startDate="+ start_date +"&endDate=" + formatted_current_day
print(url)
response = request.urlopen(url) #requests the csv file
csv = response.read() #reads the csv file
csv_str = str(csv)
lines = csv_str.split("\n")
dest_url = r'asd.csv'
fx = open(dest_url, "w")
for line in lines:
fx.write(line + "\n")
fx.close()
download_stock_from_day_until_today("JFC", "06/01/2020")
我在控制台中得到的错误是:
Traceback (most recent call last):
File "C:/Users/Lathrix/PycharmProject/StockExcelDownloader/main.py", line 23, in <module>
download_stock_from_day_until_today("JFC", "06/01/2020")
File "C:/Users/Lathrix/PycharmProject/StockExcelDownloader/main.py", line 12, in download_stock_from_day_until_today
response = request.urlopen(url) #requests the csv file
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
看起来 wsj.com 不喜欢 urllib 的用户代理。
使用行
response = request.urlopen(request.Request(url,headers={'User-Agent': 'Mozilla/5.0'}))
您的代码可以正常工作
我正在学习教程,但在使用 request.urlopen(url)
时出现错误,我已尝试检查 URL
(https://www.wsj.com/market-data/quotes/PH/XPHS/JFC/historical-prices/download?MOD_VIEW=page&num_rows=150&range_days=150&startDate=06/01/2020&endDate=07/05/2020)
而且还好。
这是我的代码:
from urllib import request
import datetime
def download_stock_from_day_until_today(stock_code, start_date):
current_day = datetime.date.today()
formatted_current_day = datetime.date.strftime(current_day, "%m/%d/%Y") #formats today's date for links
#formatted url
url = "https://www.wsj.com/market-data/quotes/PH/XPHS/"+ stock_code +"/historical-prices/download?MOD_VIEW=page&num_rows=150&range_days=150&startDate="+ start_date +"&endDate=" + formatted_current_day
print(url)
response = request.urlopen(url) #requests the csv file
csv = response.read() #reads the csv file
csv_str = str(csv)
lines = csv_str.split("\n")
dest_url = r'asd.csv'
fx = open(dest_url, "w")
for line in lines:
fx.write(line + "\n")
fx.close()
download_stock_from_day_until_today("JFC", "06/01/2020")
我在控制台中得到的错误是:
Traceback (most recent call last):
File "C:/Users/Lathrix/PycharmProject/StockExcelDownloader/main.py", line 23, in <module>
download_stock_from_day_until_today("JFC", "06/01/2020")
File "C:/Users/Lathrix/PycharmProject/StockExcelDownloader/main.py", line 12, in download_stock_from_day_until_today
response = request.urlopen(url) #requests the csv file
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\Lathrix\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
看起来 wsj.com 不喜欢 urllib 的用户代理。 使用行
response = request.urlopen(request.Request(url,headers={'User-Agent': 'Mozilla/5.0'}))
您的代码可以正常工作