如何在 scrapy_splash 中使用 SplashRequest 发送 post 请求
how to send a post request with SplashRequest in scrapy_splash
我尝试使用带有 endpoint='execute' 的 SplashRequest 和以下代码来发送 post 请求,但结果显示我没有成功。
import re
import sys
import os
import scrapy
from scrapy_splash import SplashRequest
from crawler.items import CrawlerItem
class Exp10itSpider(scrapy.Spider):
name = "test"
lua_script = """
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(0.5))
return splash:html()
end
"""
def start_requests(self):
urls = [
'https://httpbin.org/post^sss=lalala'
]
for url in urls:
if "^" in url:
post_url_list = url.split("^")
post_url = post_url_list[0]
post_data = post_url_list[1]
yield SplashRequest(post_url, self.parse, endpoint='execute', magic_response=True, meta={'handle_httpstatus_all': True}, args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
input("body is:")
print(response.body)
但是,我不能post成功,return内容显示我没有post它well.Can你帮我post 在 scrapy_splash?
中使用 SplashRequest 请求
Splash 1.8+ is required to handle POST requests; in earlier Splash versions http_method
and body
arguments are ignored. If you work with /execute
endpoint and want to support POST requests you have to handle http_method
and body
arguments in your Lua script manually.
在文档中,还有一个 POST 请求的示例。适用于最小的准备 运行 示例:
import scrapy
from scrapy_splash import SplashRequest
class SplashPostSpider(scrapy.Spider):
name = "splash_post"
lua_script = """
function main(splash, args)
assert(splash:go{
splash.args.url,
http_method=splash.args.http_method,
body=splash.args.body,
})
assert(splash:wait(0.5))
return {
html = splash:html(),
}
end
"""
def start_requests(self):
post_url = 'https://httpbin.org/post'
post_data = 'foo=bar'
yield SplashRequest(post_url, self.parse, endpoint='execute',
magic_response=True, meta={'handle_httpstatus_all': True},
args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
print(response.body)
我尝试使用带有 endpoint='execute' 的 SplashRequest 和以下代码来发送 post 请求,但结果显示我没有成功。
import re
import sys
import os
import scrapy
from scrapy_splash import SplashRequest
from crawler.items import CrawlerItem
class Exp10itSpider(scrapy.Spider):
name = "test"
lua_script = """
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(0.5))
return splash:html()
end
"""
def start_requests(self):
urls = [
'https://httpbin.org/post^sss=lalala'
]
for url in urls:
if "^" in url:
post_url_list = url.split("^")
post_url = post_url_list[0]
post_data = post_url_list[1]
yield SplashRequest(post_url, self.parse, endpoint='execute', magic_response=True, meta={'handle_httpstatus_all': True}, args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
input("body is:")
print(response.body)
但是,我不能post成功,return内容显示我没有post它well.Can你帮我post 在 scrapy_splash?
中使用 SplashRequest 请求Splash 1.8+ is required to handle POST requests; in earlier Splash versions
http_method
andbody
arguments are ignored. If you work with/execute
endpoint and want to support POST requests you have to handlehttp_method
andbody
arguments in your Lua script manually.
在文档中,还有一个 POST 请求的示例。适用于最小的准备 运行 示例:
import scrapy
from scrapy_splash import SplashRequest
class SplashPostSpider(scrapy.Spider):
name = "splash_post"
lua_script = """
function main(splash, args)
assert(splash:go{
splash.args.url,
http_method=splash.args.http_method,
body=splash.args.body,
})
assert(splash:wait(0.5))
return {
html = splash:html(),
}
end
"""
def start_requests(self):
post_url = 'https://httpbin.org/post'
post_data = 'foo=bar'
yield SplashRequest(post_url, self.parse, endpoint='execute',
magic_response=True, meta={'handle_httpstatus_all': True},
args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
print(response.body)