使用 Scrapy 从 table 中提取文本
Extract text from table with Scrapy
我正在尝试从此页面的 table 中提取职位:http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx
这是代码,但它总是 returns 空的。知道如何解决这个问题吗?
import os
from scrapy.spiders import CrawlSpider
from scrapy.selector import Selector
class mySpider(CrawlSpider):
name = "myspider"
allowed_domains = ["www.chalmers.se"]
start_urls = [
"http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx",
]
def parse(self, response):
sel = response.selector
# try to extract text from a tag inside <td>
for tr in sel.css("table#jobsTable>tbody>tr"):
my_title = tr.xpath('td[@class="jobitem"]/a/text()').extract()
print '================', my_title
我也尝试给出绝对 html 路径,如下所示但仍然是空标题:
my_title = response.xpath('/html/body/div/div[1]/div/div[11]/div/table/tbody/tr[1]/td[2]/a/text()').extract()
您的网站从另一个来源(使用 AJAX 调用加载它)高于职位 table。
所以你只需要从另一个开始 url:
start_urls = ['https://web103.reachmee.com/ext/I003/304/main?site=5&validator=a72aeedd63ec10de71e46f8d91d0d57c&lang=UK&ref=&ihelper=http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx']
我正在尝试从此页面的 table 中提取职位:http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx
这是代码,但它总是 returns 空的。知道如何解决这个问题吗?
import os
from scrapy.spiders import CrawlSpider
from scrapy.selector import Selector
class mySpider(CrawlSpider):
name = "myspider"
allowed_domains = ["www.chalmers.se"]
start_urls = [
"http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx",
]
def parse(self, response):
sel = response.selector
# try to extract text from a tag inside <td>
for tr in sel.css("table#jobsTable>tbody>tr"):
my_title = tr.xpath('td[@class="jobitem"]/a/text()').extract()
print '================', my_title
我也尝试给出绝对 html 路径,如下所示但仍然是空标题:
my_title = response.xpath('/html/body/div/div[1]/div/div[11]/div/table/tbody/tr[1]/td[2]/a/text()').extract()
您的网站从另一个来源(使用 AJAX 调用加载它)高于职位 table。 所以你只需要从另一个开始 url:
start_urls = ['https://web103.reachmee.com/ext/I003/304/main?site=5&validator=a72aeedd63ec10de71e46f8d91d0d57c&lang=UK&ref=&ihelper=http://www.chalmers.se/en/about-chalmers/Working-at-Chalmers/Vacancies/Pages/default.aspx']