Cheerio 返回 undefined for class with hyphons and '/'
Cheerio returning undefined for class with hyphons and '/'
我正在使用 cheerio 抓取检查点。我想从 div 中获取 'checkpoint message' 和 'timestamp'。我的目标是在每个父 div..
中获取消息和时间戳
这将返回未定义:$('.roster__item.roster__item--flex.5\/12.palm-1\/1')
。我究竟做错了什么?我知道我需要转义连字符和正斜杠,但我别无选择。
HTML 来自 API 回复:https://justpaste.it/35o9l
我的代码:
const req = require('rfr')
// const settings = req('/settings')
// libs
const select = req('/utils/common').select
// dependencies
const request = require('request')
const async = require('async')
const moment = require('moment')
const cheerio = require('cheerio')
function callApi (tracking, callback) {
const options = {
method: 'GET',
url: `https://www.tnt.fr/public/suivi_colis/recherche/visubontransport.do?bonTransport=${tracking}&radiochoixrecherche=BT&radiochoixtypeexpedition=NAT`
}
request.get(options, (error, response, body) => {
// console.log(body)
if (error) return callback(error)
else return callback(null, body)
})
}
function refresh (tracking, callback) {
const tno = tracking[0].tracking_number
async.waterfall([
// interact with API
function get (waterfallCallback) {
callApi(tracking, function (err, html) {
if (err) return waterfallCallback(err)
return waterfallCallback(null, cheerio.load(html))
})
},
function transformHtml (html, waterfallCallback) {
var $ = html
var checkpoints = []
// console.log(body)
console.log('=======================')
console.log($('.roster__item.roster__item--flex.5\/12.palm-1\/1').html())
console.log('=======================')
const update = {
tracking_update: {
tracking_number: tno,
slug: 'tnt',
checkpoints
}
}
}], function waterfallDone (err, updates) {
callback(err, updates)
})
}
这些是来自 HTML 的检查点:
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis livré</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 12:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Départ en livraison</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 08:06</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Arrivée à l'agence de destination</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 06:09</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis pris en compte par l'agence TNT</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 21:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">CAEN</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis en cours d'acheminement</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 20:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis chez l'expéditeur</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 00:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
我是这样做的,效果非常好:
let html = `<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis livré</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 12:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Départ en livraison</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 08:06</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Arrivée à l'agence de destination</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 06:09</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis pris en compte par l'agence TNT</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 21:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">CAEN</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis en cours d'acheminement</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 20:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis chez l'expéditeur</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 00:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>`;
const cheerio = require("cheerio");
const $ = cheerio.load(html);
let d = $(".roster__item.roster__item--flex.5\/12.palm-1\/1").html();
console.log(d);
// outputs: Colis livré
我正在使用 cheerio 抓取检查点。我想从 div 中获取 'checkpoint message' 和 'timestamp'。我的目标是在每个父 div..
中获取消息和时间戳这将返回未定义:$('.roster__item.roster__item--flex.5\/12.palm-1\/1')
。我究竟做错了什么?我知道我需要转义连字符和正斜杠,但我别无选择。
HTML 来自 API 回复:https://justpaste.it/35o9l
我的代码:
const req = require('rfr')
// const settings = req('/settings')
// libs
const select = req('/utils/common').select
// dependencies
const request = require('request')
const async = require('async')
const moment = require('moment')
const cheerio = require('cheerio')
function callApi (tracking, callback) {
const options = {
method: 'GET',
url: `https://www.tnt.fr/public/suivi_colis/recherche/visubontransport.do?bonTransport=${tracking}&radiochoixrecherche=BT&radiochoixtypeexpedition=NAT`
}
request.get(options, (error, response, body) => {
// console.log(body)
if (error) return callback(error)
else return callback(null, body)
})
}
function refresh (tracking, callback) {
const tno = tracking[0].tracking_number
async.waterfall([
// interact with API
function get (waterfallCallback) {
callApi(tracking, function (err, html) {
if (err) return waterfallCallback(err)
return waterfallCallback(null, cheerio.load(html))
})
},
function transformHtml (html, waterfallCallback) {
var $ = html
var checkpoints = []
// console.log(body)
console.log('=======================')
console.log($('.roster__item.roster__item--flex.5\/12.palm-1\/1').html())
console.log('=======================')
const update = {
tracking_update: {
tracking_number: tno,
slug: 'tnt',
checkpoints
}
}
}], function waterfallDone (err, updates) {
callback(err, updates)
})
}
这些是来自 HTML 的检查点:
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis livré</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 12:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Départ en livraison</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 08:06</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Arrivée à l'agence de destination</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 06:09</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis pris en compte par l'agence TNT</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 21:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">CAEN</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis en cours d'acheminement</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 20:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis chez l'expéditeur</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 00:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
我是这样做的,效果非常好:
let html = `<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis livré</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 12:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Départ en livraison</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 08:06</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Arrivée à l'agence de destination</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">02/09/2020 06:09</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">TREMBLAY</div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis pris en compte par l'agence TNT</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 21:08</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1">CAEN</div>
</div>
<div class="roster roster-palm 1/1 tnt-even" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis en cours d'acheminement</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 20:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>
<div class="roster roster-palm 1/1 tnt-odd" style="padding-top: 11px; padding-bottom: 11px; padding-left: 28px;">
<div class="roster__item roster__item--flex 5/12 palm-1/1" style="white-space: normal">Colis chez l'expéditeur</div>
<div class="roster__item roster__item--flex 3/12 palm-1/1">01/09/2020 00:00</div>
<div class="roster__item roster__item--flex 4/12 palm-1/1"></div>
</div>`;
const cheerio = require("cheerio");
const $ = cheerio.load(html);
let d = $(".roster__item.roster__item--flex.5\/12.palm-1\/1").html();
console.log(d);
// outputs: Colis livré