使用 Python 下载 NSE 2021 数据
Download NSE 2021 data using Python
我在通过 Python 代码
访问此类 URL 时遇到问题
https://www1.nseindia.com/content/historical/EQUITIES/2021/JAN/cm01JAN2021bhav.csv.zip
这在过去 3 年一直有效,直到 2020 年 12 月 31 日。该网站似乎实施了一些限制。
中有类似解决方案
VB NSE ACCESS DENIED
This addition is made : "User-Agent" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11" "Referer" : "https://www1.nseindia.com/products/content/equities/equities/archieve_eq.htm"
原代码在这里:
https://github.com/naveen7v/Bhavcopy/blob/master/Bhavcopy.py
即使在请求部分添加以下内容后仍然无法正常工作
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11'}
##print (Path)
a=requests.get(Path,headers)
有人可以帮忙吗?
def download_bhavcopy(formated_date):
url = "https://www1.nseindia.com/content/historical/DERIVATIVES/{0}/{1}/fo{2}{1}{0}bhav.csv.zip".format(
formated_date.split('-')[2],
month_dict[formated_date.split('-')[1]],
formated_date.split('-')[0])
print(url)
res=None
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*,q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-IN,en;q=0.9,en-GB;q=0.8,en-US;q=0.7,hi;q=0.6',
'Connection': 'keep-alive','Host':'www1.nseindia.com',
'Cache-Control':'max-age=0',
'Host':'www1.nseindia.com',
'Referer':'https://www1.nseindia.com/products/content/derivatives/equities/fo.htm',
}
cookie_dict={'bm_sv':'E2109FAE3F0EA09C38163BBF24DD9A7E~t53LAJFVQDcB/+q14T3amyom/sJ5dm1gV7z2R0E3DKg6WiKBpLgF0t1Mv32gad4CqvL3DIswsfAKTAHD16vNlona86iCn3267hHmZU/O7DrKPY73XE6C4p5geps7yRwXxoUOlsqqPtbPsWsxE7cyDxr6R+RFqYMoDc9XuhS7e18='}
session = requests.session()
for cookie in cookie_dict:
session.cookies.set(cookie,cookie_dict[cookie])
response = session.get(url,headers = hdr)
if response.status_code == 200:
print('Success!')
elif response.status_code == 404:
print('Not Found.')
else :
print('response.status_code ', response.status_code)
file_name="none";
try:
zipT=zipfile.ZipFile(io.BytesIO(response.content) )
zipT.extractall()
file_name = zipT.filelist[0].filename
print('file name '+ file_name)
except zipfile.BadZipFile: # if the zip file has any errors then it prints the error message which you wrote under the 'except' block
print('Error: Zip file is corrupted')
except zipfile.LargeZipFile: # it raises an 'LargeZipFile' error because you didn't enable the 'Zip64'
print('Error: File size if too large')
print(file_name)
return file_name
在您的网络浏览器中检查 link 并找到所需下载的 GET link。
转到 Headers 并检查 User-Agent
例如User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0
现在将您的代码修改为:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
}
result = requests.get(URL, headers = headers)
我在通过 Python 代码
访问此类 URL 时遇到问题https://www1.nseindia.com/content/historical/EQUITIES/2021/JAN/cm01JAN2021bhav.csv.zip
这在过去 3 年一直有效,直到 2020 年 12 月 31 日。该网站似乎实施了一些限制。
中有类似解决方案VB NSE ACCESS DENIED This addition is made : "User-Agent" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11" "Referer" : "https://www1.nseindia.com/products/content/equities/equities/archieve_eq.htm"
原代码在这里: https://github.com/naveen7v/Bhavcopy/blob/master/Bhavcopy.py
即使在请求部分添加以下内容后仍然无法正常工作
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11'}
##print (Path)
a=requests.get(Path,headers)
有人可以帮忙吗?
def download_bhavcopy(formated_date):
url = "https://www1.nseindia.com/content/historical/DERIVATIVES/{0}/{1}/fo{2}{1}{0}bhav.csv.zip".format(
formated_date.split('-')[2],
month_dict[formated_date.split('-')[1]],
formated_date.split('-')[0])
print(url)
res=None
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*,q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-IN,en;q=0.9,en-GB;q=0.8,en-US;q=0.7,hi;q=0.6',
'Connection': 'keep-alive','Host':'www1.nseindia.com',
'Cache-Control':'max-age=0',
'Host':'www1.nseindia.com',
'Referer':'https://www1.nseindia.com/products/content/derivatives/equities/fo.htm',
}
cookie_dict={'bm_sv':'E2109FAE3F0EA09C38163BBF24DD9A7E~t53LAJFVQDcB/+q14T3amyom/sJ5dm1gV7z2R0E3DKg6WiKBpLgF0t1Mv32gad4CqvL3DIswsfAKTAHD16vNlona86iCn3267hHmZU/O7DrKPY73XE6C4p5geps7yRwXxoUOlsqqPtbPsWsxE7cyDxr6R+RFqYMoDc9XuhS7e18='}
session = requests.session()
for cookie in cookie_dict:
session.cookies.set(cookie,cookie_dict[cookie])
response = session.get(url,headers = hdr)
if response.status_code == 200:
print('Success!')
elif response.status_code == 404:
print('Not Found.')
else :
print('response.status_code ', response.status_code)
file_name="none";
try:
zipT=zipfile.ZipFile(io.BytesIO(response.content) )
zipT.extractall()
file_name = zipT.filelist[0].filename
print('file name '+ file_name)
except zipfile.BadZipFile: # if the zip file has any errors then it prints the error message which you wrote under the 'except' block
print('Error: Zip file is corrupted')
except zipfile.LargeZipFile: # it raises an 'LargeZipFile' error because you didn't enable the 'Zip64'
print('Error: File size if too large')
print(file_name)
return file_name
在您的网络浏览器中检查 link 并找到所需下载的 GET link。 转到 Headers 并检查 User-Agent 例如User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0
现在将您的代码修改为:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
}
result = requests.get(URL, headers = headers)