Scrapy POST 请求表单数据无效
Scrapy POST request form data not working
我想从这个网站获取数据:
https://livedragon.vdsc.com.vn/general/intradayBoard.rv
这是我检查响应的代码:
import scrapy
from scrapy import FormRequest
from scrapy.shell import inspect_response
class VdscSpider(scrapy.Spider):
name = 'vdsc'
start_urls = ['https://livedragon.vdsc.com.vn/general/intradayBoard.rv']
def start_requests(self):
headers= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"
}
formdata = {
"stockCode": "AAA",
"boardDate": "12/11/2021"
}
yield FormRequest(url='https://livedragon.vdsc.com.vn/general/intradayBoard.rv',
headers=headers,
formdata=formdata,
callback=self.parse_item
)
def parse_item(self, response):
inspect_response(response, self)
当通过 scrapy shell 查看(响应)检查时,它没有显示任何数据:
请看一下,帮我解决这个问题。谢谢。
- 您需要将请求发送至https://livedragon.vdsc.com.vn/general/intradaySearch.rv
- 从搜索页面获取headers。
- 您需要将方法设置为POST。
- 抓取 json 响应。
import scrapy
from scrapy import FormRequest
class VdscSpider(scrapy.Spider):
name = 'vdsc'
start_urls = ['https://livedragon.vdsc.com.vn/general/intradayBoard.rv']
def parse(self, response):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"DNT": "1",
"Host": "livedragon.vdsc.com.vn",
"Origin": "https://livedragon.vdsc.com.vn",
"Pragma": "no-cache",
"Referer": "https://livedragon.vdsc.com.vn/general/intradayBoard.rv",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Sec-GPC": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
"X-KL-Ajax-Request": "Ajax_Request",
"X-Requested-With": "XMLHttpRequest"
}
search_page = 'https://livedragon.vdsc.com.vn/general/intradaySearch.rv'
formdata = {
"stockCode": "AAA",
"boardDate": "12/11/2021"
}
yield FormRequest(url=search_page,
method='POST',
headers=headers,
formdata=formdata,
callback=self.parse_item
)
def parse_item(self, response):
json_data = response.json()
if json_data['success'] != True:
self.logger.error("Couldn't retrieve json")
return
for item in json_data['list']:
item_data = {
"MatchedPrice": item["MatchedPrice"],
"OfferVol1": item["OfferVol1"],
"OfferVol2": item["OfferVol2"],
"FlrPrice": item["FlrPrice"],
"OfferVol3": item["OfferVol3"],
"BidVol1": item["BidVol1"],
"BidVol2": item["BidVol2"],
"FSellVol": item["FSellVol"],
"BidVol3": item["BidVol3"],
"OfferPrice3": item["OfferPrice3"],
"OfferPrice1": item["OfferPrice1"],
"OfferPrice2": item["OfferPrice2"],
"MatchedChange": item["MatchedChange"],
"AvgPrice": item["AvgPrice"],
"FloorCode": item["FloorCode"],
"HigPrice": item["HigPrice"],
"TradeTime": item["TradeTime"],
"MatchedVol": item["MatchedVol"],
"BidPrice2": item["BidPrice2"],
"BidPrice3": item["BidPrice3"],
"Code": item["Code"],
"FBuyVol": item["FBuyVol"],
"CeiPrice": item["CeiPrice"],
"BidPrice1": item["BidPrice1"],
"LowPrice": item["LowPrice"],
"RefPrice": item["RefPrice"],
"AmPm": item["AmPm"],
"MatchedTotalVol": item["MatchedTotalVol"],
}
yield item_data
输出
{'MatchedPrice': 18.5, 'OfferVol1': 461100, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 339900, 'BidVol1': 73500, 'BidVol2': 41100, 'FSellVol': 209200, 'BidVol3': 54900, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:54:37', 'MatchedVol': 100, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7234200}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
{'MatchedPrice': 18.5, 'OfferVol1': 438600, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 334900, 'BidVol1': 61800, 'BidVol2': 31800, 'FSellVol': 209200, 'BidVol3': 54500, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:54:56', 'MatchedVol': 200, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7257600}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
{'MatchedPrice': 18.5, 'OfferVol1': 437400, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 338000, 'BidVol1': 61900, 'BidVol2': 31800, 'FSellVol': 209200, 'BidVol3': 54500, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:55:14', 'MatchedVol': 200, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7257600}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
...
...
...
编辑:
在 settings.py、mainy.py 或 start_requests 函数中设置用户代理。
main.py
中的示例
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
settings = get_project_settings()
settings['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
process = CrawlerProcess(settings)
process.crawl('vdsc')
process.start()
start_requests中的示例:
def start_requests(self):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
}
for url in self.start_urls:
yield scrapy.Request(url=url, headers=headers)
我想从这个网站获取数据:
https://livedragon.vdsc.com.vn/general/intradayBoard.rv
这是我检查响应的代码:
import scrapy
from scrapy import FormRequest
from scrapy.shell import inspect_response
class VdscSpider(scrapy.Spider):
name = 'vdsc'
start_urls = ['https://livedragon.vdsc.com.vn/general/intradayBoard.rv']
def start_requests(self):
headers= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"
}
formdata = {
"stockCode": "AAA",
"boardDate": "12/11/2021"
}
yield FormRequest(url='https://livedragon.vdsc.com.vn/general/intradayBoard.rv',
headers=headers,
formdata=formdata,
callback=self.parse_item
)
def parse_item(self, response):
inspect_response(response, self)
当通过 scrapy shell 查看(响应)检查时,它没有显示任何数据:
请看一下,帮我解决这个问题。谢谢。
- 您需要将请求发送至https://livedragon.vdsc.com.vn/general/intradaySearch.rv
- 从搜索页面获取headers。
- 您需要将方法设置为POST。
- 抓取 json 响应。
import scrapy
from scrapy import FormRequest
class VdscSpider(scrapy.Spider):
name = 'vdsc'
start_urls = ['https://livedragon.vdsc.com.vn/general/intradayBoard.rv']
def parse(self, response):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"DNT": "1",
"Host": "livedragon.vdsc.com.vn",
"Origin": "https://livedragon.vdsc.com.vn",
"Pragma": "no-cache",
"Referer": "https://livedragon.vdsc.com.vn/general/intradayBoard.rv",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Sec-GPC": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
"X-KL-Ajax-Request": "Ajax_Request",
"X-Requested-With": "XMLHttpRequest"
}
search_page = 'https://livedragon.vdsc.com.vn/general/intradaySearch.rv'
formdata = {
"stockCode": "AAA",
"boardDate": "12/11/2021"
}
yield FormRequest(url=search_page,
method='POST',
headers=headers,
formdata=formdata,
callback=self.parse_item
)
def parse_item(self, response):
json_data = response.json()
if json_data['success'] != True:
self.logger.error("Couldn't retrieve json")
return
for item in json_data['list']:
item_data = {
"MatchedPrice": item["MatchedPrice"],
"OfferVol1": item["OfferVol1"],
"OfferVol2": item["OfferVol2"],
"FlrPrice": item["FlrPrice"],
"OfferVol3": item["OfferVol3"],
"BidVol1": item["BidVol1"],
"BidVol2": item["BidVol2"],
"FSellVol": item["FSellVol"],
"BidVol3": item["BidVol3"],
"OfferPrice3": item["OfferPrice3"],
"OfferPrice1": item["OfferPrice1"],
"OfferPrice2": item["OfferPrice2"],
"MatchedChange": item["MatchedChange"],
"AvgPrice": item["AvgPrice"],
"FloorCode": item["FloorCode"],
"HigPrice": item["HigPrice"],
"TradeTime": item["TradeTime"],
"MatchedVol": item["MatchedVol"],
"BidPrice2": item["BidPrice2"],
"BidPrice3": item["BidPrice3"],
"Code": item["Code"],
"FBuyVol": item["FBuyVol"],
"CeiPrice": item["CeiPrice"],
"BidPrice1": item["BidPrice1"],
"LowPrice": item["LowPrice"],
"RefPrice": item["RefPrice"],
"AmPm": item["AmPm"],
"MatchedTotalVol": item["MatchedTotalVol"],
}
yield item_data
输出
{'MatchedPrice': 18.5, 'OfferVol1': 461100, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 339900, 'BidVol1': 73500, 'BidVol2': 41100, 'FSellVol': 209200, 'BidVol3': 54900, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:54:37', 'MatchedVol': 100, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7234200}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
{'MatchedPrice': 18.5, 'OfferVol1': 438600, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 334900, 'BidVol1': 61800, 'BidVol2': 31800, 'FSellVol': 209200, 'BidVol3': 54500, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:54:56', 'MatchedVol': 200, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7257600}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
{'MatchedPrice': 18.5, 'OfferVol1': 437400, 'OfferVol2': 160100, 'FlrPrice': 16.75, 'OfferVol3': 338000, 'BidVol1': 61900, 'BidVol2': 31800, 'FSellVol': 209200, 'BidVol3': 54500, 'OfferPrice3': 18.6, 'OfferPrice1': 18.5, 'OfferPrice2': 18.55, 'MatchedChange': 0.5, 'AvgPrice': 18.31, 'FloorCode': 'HSX', 'HigPrice': 18.7, 'TradeTime': '13:55:14', 'MatchedVol': 200, 'BidPrice2': 18.4, 'BidPrice3': 18.35, 'Code': 'AAA', 'FBuyVol': 75000, 'CeiPrice': 19.25, 'BidPrice1': 18.45, 'LowPrice': 17.9, 'RefPrice': 18.0, 'AmPm': 'PM', 'MatchedTotalVol': 7257600}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://livedragon.vdsc.com.vn/general/intradaySearch.rv>
...
...
...
编辑:
在 settings.py、mainy.py 或 start_requests 函数中设置用户代理。
main.py
中的示例from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
settings = get_project_settings()
settings['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
process = CrawlerProcess(settings)
process.crawl('vdsc')
process.start()
start_requests中的示例:
def start_requests(self):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
}
for url in self.start_urls:
yield scrapy.Request(url=url, headers=headers)