scrapy 下一页重定向到主页

scrapy next page redirecting to home page

在我的 scrapy 代码中,FormRequest 请求获取下一页,我正在重定向到主页,但在浏览器中该页面将转到下一页。我想我已经给出了所有的表单数据。

from scrapy.selector import Selector
from scrapy.http import Request, FormRequest
from scrapy.contrib.spiders import CrawlSpider

from scrapy.shell import inspect_response
class SampleSpider(CrawlSpider):

    name = 'samplespider'
    start_urls = ['http://jobs.hiltonworldwide.com/en/jobs/job-search-results']

    def parse(self, response):

        sel = Selector(response)
        inspect_response(response)
        eventTarget = 'phmain_0$phmaincontent_0$phjobsearchresults_0$next_page'
        VIEWSTATE = sel.xpath("//input[@name='__VIEWSTATE']/@value").extract()[0]
        EVENTVALIDATION = sel.xpath("//input[@name='__EVENTVALIDATION']/@value").extract()[0]
        hdnIPAddress = sel.xpath("//input[@name='phheader_0$hdnIPAddress']/@value").extract()[0]
        hdnPageCount = sel.xpath("//input[@name='phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageCount']/@value").extract()[0]
        hdnPageIndex = sel.xpath("//input[@name='phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageIndex']/@value").extract()[0]

        form_data = {
            '__EVENTTARGET': eventTarget,
            '__EVENTARGUMENT': "",
            '__LASTFOCUS': "",
            '__VIEWSTATE': VIEWSTATE,
            '__EVENTVALIDATION': EVENTVALIDATION,
            'phheader_0$hdnIPAddress': hdnIPAddress,
            'phmain_0$phbannerinfo_0$phcountryinfo_0$ddlCountry':"Worldwide",
            'phmain_0$phmaincontent_0$phjobsearchresults_0$albLanguage': "91351",
            'phmain_0$phmaincontent_0$phjobsearchresults_0$LoginEmail': "",
            'phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageCount': hdnPageCount,
            'phmain_0$phmaincontent_0$phjobsearchresults_0$hdnPageIndex': hdnPageIndex,
            'phmain_0$phmaincontent_0$phjobsearch_0$ddlCity': "-1",
            'phmain_0$phmaincontent_0$phjobsearch_0$albBrands': "-1",
            'phmain_0$phmaincontent_0$phjobsearch_0$albTalentAreas': "-1",
            }

        yield FormRequest(
            'http://jobs.hiltonworldwide.com/en/jobs/job-search-results',
            formdata=form_data,
            callback=self.parse
            )

我有什么遗漏或做错了什么吗?正确分页的解决方案是什么

当我试图重现问题时,对我有帮助的是指定 User-Agent header:

yield FormRequest(
    'http://jobs.hiltonworldwide.com/en/jobs/job-search-results',
    formdata=form_data,
    callback=self.parse,
    headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36'}
)