无法使用第一次调用响应作为参数成功进行第二次调用
Unable to successfully make the second call using the first call response as parameters
我是 Python 的初学者,我正在尝试使用 python 访问以下数据。
1) https://www.nseindia.com/corporates/corporateHome.html,点击左面板'Corporate Information'下的'Corporate Announcements'。
2) 输入公司代码(例如KSCL)并选择公告期
3) 单击任何单独的行主题以获取更多详细信息
前两个步骤转换为下面的 url“https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp?period=More%20than%203%20Months&symbol=kscl&industry=&subject=”。这在我的 python 请求代码中工作正常。
但是我无法复制第三步,请求成功但我没有获取数据。以下是我正在使用的代码,我卡住了,请帮忙。
我比较了当我从浏览器尝试此操作时发出的所有请求 headers 与我使用 python 发送的请求,它们匹配。我也尝试发送 cookie,但没有用。我认为可能不需要 cookie,因为网站在禁用 cookie 后也可以在浏览器中运行。我是 运行 这个 Python 3.5。
import requests as rq
from requests.utils import requote_uri
from requests_html import HTMLSession
import demjson as dj
from urllib.parse import quote
class BuyBack:
def start(self):
# Define headers used across all requests
self.req_headers = {'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',}
self.req_headers['Accept'] = '*/*'
self.req_headers['Accept-Encoding'] = 'gzip, deflate, br'
self.getAllSymbols()
def readAnnouncement(self, pyAnnouncement):
# This is done using request_html
symbol = pyAnnouncement['sym']
desc = pyAnnouncement['desc']
tstamp = pyAnnouncement['date']
seqId = pyAnnouncement['seqId']
payload = {'symbol' : symbol,'desc' : desc, 'tstamp' : tstamp, 'seqId' : seqId}
quote_payload = {}
params_string = '?'
#formats as required with '%20' for spaces
for(k,v) in payload.items():
quote_payload [quote(k)] = quote(v)
params_string += quote(k)
params_string += '='
params_string += quote(v)
params_string += '&'
params_string = params_string[:-1]
announDetail_Url = 'https://nseindia.com/corporates/corpInfo/equities/AnnouncementDetail.jsp'
self.req_headers['Referer'] = 'https://www.nseindia.com/corporates/corpInfo/equities/Announcements.html'
self.req_headers['X-Requested-With'] = 'XMLHttpRequest'
self.req_headers['Host'] = 'www.nseindia.com'
annReqUrl = announDetail_Url + params_string
session = HTMLSession()
r = session.get(annReqUrl, headers = self.req_headers)
print(r.url)
#I am not getting the proper data in the response
print(r.content)
print(r.request.headers)
def getAllSymbols(self):
# To get the list of symbols to run the rest of the process, for now just run with one
symbol = 'KSCL'
self.getAnnouncements(symbol)
def getAnnouncements(self,symbol):
# To get a list of all announcements so far in the last few months
# This is done by using requests and demjson because the request returns a js object
# Open request to get everything
payload = {'symbol' : symbol,'Industry' : '', 'ExDt' : '', 'subject' : ''}
corporateActions_url='https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp'
r = rq.get(corporateActions_url, headers = self.req_headers, params=payload)
for line in r.iter_lines():
lineAscii = line.decode("ascii")
if len(lineAscii) > 5:
pyAnnouncements = dj.decode(lineAscii)
#Tried setting the cookie but no use
#cookie = r.headers['Set-Cookie']
#self.req_headers['Cookie'] = cookie
# read from the announcements
if pyAnnouncements['success']:
#for x in pyAnnouncements['rows']:
for i in range(0,1):
self.readAnnouncement(pyAnnouncements['rows'][i])
BuyBack_inst = BuyBack()
BuyBack_inst.start()
当我从浏览器尝试此流程时,第二个呼叫响应将包含指向另一个 pdf 的 href link。但是我的 python 回复中没有得到 href link。
我可以通过以下方式获取所有 PDF href
给定的符号和公告期:
import demjson
import requests
from bs4 import BeautifulSoup
symbol = 'KSCL'
s = requests.Session()
r = s.get("https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp"
f"?period=Last%201%20Month&symbol={symbol}&industry=&subject=")
for ann in demjson.decode(r.text.strip())['rows']:
url = (
"https://www.nseindia.com/corporates/corpInfo/equities/AnnouncementDetail.jsp?"
f"symbol={ann['sym']}"
f"&desc={ann['desc']}"
f"&tstamp={int(ann['date']) // 100}"
f"&seqId={ann['seqId']}"
)
soup = BeautifulSoup(s.get(url).content, 'html.parser')
print(soup.select_one('.t1 a[href$=".pdf"]')['href'])
结果:
/corporate/KSCL_20122018134432_Outcome_046.pdf
/corporate/KSCL_20122018133033_Outcome_043.pdf
我是 Python 的初学者,我正在尝试使用 python 访问以下数据。
1) https://www.nseindia.com/corporates/corporateHome.html,点击左面板'Corporate Information'下的'Corporate Announcements'。 2) 输入公司代码(例如KSCL)并选择公告期 3) 单击任何单独的行主题以获取更多详细信息
前两个步骤转换为下面的 url“https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp?period=More%20than%203%20Months&symbol=kscl&industry=&subject=”。这在我的 python 请求代码中工作正常。
但是我无法复制第三步,请求成功但我没有获取数据。以下是我正在使用的代码,我卡住了,请帮忙。
我比较了当我从浏览器尝试此操作时发出的所有请求 headers 与我使用 python 发送的请求,它们匹配。我也尝试发送 cookie,但没有用。我认为可能不需要 cookie,因为网站在禁用 cookie 后也可以在浏览器中运行。我是 运行 这个 Python 3.5。
import requests as rq
from requests.utils import requote_uri
from requests_html import HTMLSession
import demjson as dj
from urllib.parse import quote
class BuyBack:
def start(self):
# Define headers used across all requests
self.req_headers = {'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',}
self.req_headers['Accept'] = '*/*'
self.req_headers['Accept-Encoding'] = 'gzip, deflate, br'
self.getAllSymbols()
def readAnnouncement(self, pyAnnouncement):
# This is done using request_html
symbol = pyAnnouncement['sym']
desc = pyAnnouncement['desc']
tstamp = pyAnnouncement['date']
seqId = pyAnnouncement['seqId']
payload = {'symbol' : symbol,'desc' : desc, 'tstamp' : tstamp, 'seqId' : seqId}
quote_payload = {}
params_string = '?'
#formats as required with '%20' for spaces
for(k,v) in payload.items():
quote_payload [quote(k)] = quote(v)
params_string += quote(k)
params_string += '='
params_string += quote(v)
params_string += '&'
params_string = params_string[:-1]
announDetail_Url = 'https://nseindia.com/corporates/corpInfo/equities/AnnouncementDetail.jsp'
self.req_headers['Referer'] = 'https://www.nseindia.com/corporates/corpInfo/equities/Announcements.html'
self.req_headers['X-Requested-With'] = 'XMLHttpRequest'
self.req_headers['Host'] = 'www.nseindia.com'
annReqUrl = announDetail_Url + params_string
session = HTMLSession()
r = session.get(annReqUrl, headers = self.req_headers)
print(r.url)
#I am not getting the proper data in the response
print(r.content)
print(r.request.headers)
def getAllSymbols(self):
# To get the list of symbols to run the rest of the process, for now just run with one
symbol = 'KSCL'
self.getAnnouncements(symbol)
def getAnnouncements(self,symbol):
# To get a list of all announcements so far in the last few months
# This is done by using requests and demjson because the request returns a js object
# Open request to get everything
payload = {'symbol' : symbol,'Industry' : '', 'ExDt' : '', 'subject' : ''}
corporateActions_url='https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp'
r = rq.get(corporateActions_url, headers = self.req_headers, params=payload)
for line in r.iter_lines():
lineAscii = line.decode("ascii")
if len(lineAscii) > 5:
pyAnnouncements = dj.decode(lineAscii)
#Tried setting the cookie but no use
#cookie = r.headers['Set-Cookie']
#self.req_headers['Cookie'] = cookie
# read from the announcements
if pyAnnouncements['success']:
#for x in pyAnnouncements['rows']:
for i in range(0,1):
self.readAnnouncement(pyAnnouncements['rows'][i])
BuyBack_inst = BuyBack()
BuyBack_inst.start()
当我从浏览器尝试此流程时,第二个呼叫响应将包含指向另一个 pdf 的 href link。但是我的 python 回复中没有得到 href link。
我可以通过以下方式获取所有 PDF href
给定的符号和公告期:
import demjson
import requests
from bs4 import BeautifulSoup
symbol = 'KSCL'
s = requests.Session()
r = s.get("https://www.nseindia.com/corporates/corpInfo/equities/getAnnouncements.jsp"
f"?period=Last%201%20Month&symbol={symbol}&industry=&subject=")
for ann in demjson.decode(r.text.strip())['rows']:
url = (
"https://www.nseindia.com/corporates/corpInfo/equities/AnnouncementDetail.jsp?"
f"symbol={ann['sym']}"
f"&desc={ann['desc']}"
f"&tstamp={int(ann['date']) // 100}"
f"&seqId={ann['seqId']}"
)
soup = BeautifulSoup(s.get(url).content, 'html.parser')
print(soup.select_one('.t1 a[href$=".pdf"]')['href'])
结果:
/corporate/KSCL_20122018134432_Outcome_046.pdf
/corporate/KSCL_20122018133033_Outcome_043.pdf