如何在 scrapy_splash 中使用 SplashRequest 发送 post 请求

how to send a post request with SplashRequest in scrapy_splash

我尝试使用带有 endpoint='execute' 的 SplashRequest 和以下代码来发送 post 请求,但结果显示我没有成功。

import re
import sys
import os
import scrapy
from scrapy_splash import SplashRequest
from crawler.items import CrawlerItem


class Exp10itSpider(scrapy.Spider):
    name = "test"
    lua_script = """
    function main(splash, args)
      assert(splash:go(args.url))
      assert(splash:wait(0.5))
      return splash:html()
    end
    """

    def start_requests(self):
        urls = [
            'https://httpbin.org/post^sss=lalala'
        ]
        for url in urls:
            if "^" in url:
                post_url_list = url.split("^")
                post_url = post_url_list[0]
                post_data = post_url_list[1]
                yield SplashRequest(post_url, self.parse, endpoint='execute', magic_response=True, meta={'handle_httpstatus_all': True}, args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})

    def parse(self, response):
        input("body is:")
        print(response.body)

但是,我不能post成功,return内容显示我没有post它well.Can你帮我post 在 scrapy_splash?

中使用 SplashRequest 请求

来自documentation

Splash 1.8+ is required to handle POST requests; in earlier Splash versions http_method and body arguments are ignored. If you work with /execute endpoint and want to support POST requests you have to handle http_method and body arguments in your Lua script manually.

在文档中,还有一个 POST 请求的示例。适用于最小的准备 运行 示例:

import scrapy
from scrapy_splash import SplashRequest

class SplashPostSpider(scrapy.Spider):
    name = "splash_post"

    lua_script = """
    function main(splash, args)
      assert(splash:go{
        splash.args.url,
        http_method=splash.args.http_method,
        body=splash.args.body,
      })
      assert(splash:wait(0.5))
      return {
        html = splash:html(),
      }
    end
    """

    def start_requests(self):
        post_url = 'https://httpbin.org/post'
        post_data = 'foo=bar'
        yield SplashRequest(post_url, self.parse, endpoint='execute',
                            magic_response=True, meta={'handle_httpstatus_all': True},
                            args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})

    def parse(self, response):
        print(response.body)