为什么我的蜘蛛不执行解析功能,但使用另一个 link 它可以工作?
why my spider is not executing the parse function but with another link it works?
我正在尝试从此页面中提取一些数据,但未执行解析函数,我尝试将另一个 URL 与 google.com 中的一个类似并执行但与页面一起执行我不需要它
import scrapy
from datetime import date
from osp_scraper.spiders.CustomSpider import CustomSpider
class PrincetonSpider(CustomSpider):
name = "princeton"
# year = date.today().year
# month = date.today().month
# day = date.today().day
# start_urls = [f'https://blackboard.princeton.edu/webapps/blackboard/execute/viewCatalog?type=Course&command=NewSearch&searchField=CourseId&searchOperator=Contains&searchText=_&dateSearchOperator=LessThan&dateSearchDate_datetime={year}-{month}-{day}+9%3A33%3A00']
start_urls = ['https://blackboard.princeton.edu/webapps/blackboard/execute/viewCatalog?type=Course']
def parse(self, response):
print('--------------------------------')
courses = response.xpath('//*[@id="listContainer_databody"]/tr')
for course in courses:
print(course.xpath('td[1]/span[2]/text()').get())
input()
yield response.follow(
url=course.xpath('th/a/@href').get(),
callback=self.search_syllabus
)
custom_settings = {
**CustomSpider.custom_settings,
'ROBOTSTXT_OBEY': False,
}
我放了那个,现在可以用了
我正在尝试从此页面中提取一些数据,但未执行解析函数,我尝试将另一个 URL 与 google.com 中的一个类似并执行但与页面一起执行我不需要它
import scrapy
from datetime import date
from osp_scraper.spiders.CustomSpider import CustomSpider
class PrincetonSpider(CustomSpider):
name = "princeton"
# year = date.today().year
# month = date.today().month
# day = date.today().day
# start_urls = [f'https://blackboard.princeton.edu/webapps/blackboard/execute/viewCatalog?type=Course&command=NewSearch&searchField=CourseId&searchOperator=Contains&searchText=_&dateSearchOperator=LessThan&dateSearchDate_datetime={year}-{month}-{day}+9%3A33%3A00']
start_urls = ['https://blackboard.princeton.edu/webapps/blackboard/execute/viewCatalog?type=Course']
def parse(self, response):
print('--------------------------------')
courses = response.xpath('//*[@id="listContainer_databody"]/tr')
for course in courses:
print(course.xpath('td[1]/span[2]/text()').get())
input()
yield response.follow(
url=course.xpath('th/a/@href').get(),
callback=self.search_syllabus
)
custom_settings = {
**CustomSpider.custom_settings,
'ROBOTSTXT_OBEY': False,
}
我放了那个,现在可以用了