网页抓取后 cheerio 没有收到回复
cheerio not getting response back after web scraping
我想抓取一个网站的数据,所以我尝试使用 cheerio
npm package
选择器在 chrome 开发工具
中工作得很好
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
但是在我的代码中使用它时 return 空响应
我的代码:
const request = require("request-promise"),
cheerio = require("cheerio"),
fs = require("fs"),
json2csv = require("json2csv").Parser;
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
let mandiData = [];
const response = await request({
uri: url,
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",
},
gzip: true,
});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
我从中抓取数据的网站 url 是:https://www.commodityonline.com/mandiprices/
我从 hitesh chaudhary
youtube 频道了解到这种抓取方法,this video
请求头有没有问题,
我是网络抓取的新手,所以我不明白我在哪一步做错了
在 http headers 中,您指定了 "accept-encoding": "gzip, deflate, br"
,这意味着您希望将请求结果压缩为 gzip。 Cheerio 需要文本,因此无法解析响应数据。
只需删除 header 即可使其正常工作:
const request = require("request-promise"),
cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await request({
uri: url,
headers: {
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",
}
});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
注意 request is deprecated. One good alternative is axios :
const axios = require("axios"),
cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await axios.get(url);
let $ = cheerio.load(response.data);
data = []
$("#tdm_base_scroll > div > div.dt_ta_09").each(function (i, elm) {
var price = $("div.dt_ta_14", elm)
data.push({
commodity: $("div.dt_ta_10", elm).text().trim(),
marketCenter: $("div.dt_ta_11", elm).text().trim(),
variety: $("div.dt_ta_12", elm).text().trim(),
arrrivals: $("div.dt_ta_13", elm).text().trim(),
modalPrice: $(price[0]).text().trim(),
minMaxPrice: $(price[1]).text().trim()
})
});
console.log(data);
})();
我想抓取一个网站的数据,所以我尝试使用 cheerio
npm package
选择器在 chrome 开发工具
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
但是在我的代码中使用它时 return 空响应
我的代码:
const request = require("request-promise"),
cheerio = require("cheerio"),
fs = require("fs"),
json2csv = require("json2csv").Parser;
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
let mandiData = [];
const response = await request({
uri: url,
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",
},
gzip: true,
});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
我从中抓取数据的网站 url 是:https://www.commodityonline.com/mandiprices/
我从 hitesh chaudhary
youtube 频道了解到这种抓取方法,this video
请求头有没有问题,
我是网络抓取的新手,所以我不明白我在哪一步做错了
在 http headers 中,您指定了 "accept-encoding": "gzip, deflate, br"
,这意味着您希望将请求结果压缩为 gzip。 Cheerio 需要文本,因此无法解析响应数据。
只需删除 header 即可使其正常工作:
const request = require("request-promise"),
cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await request({
uri: url,
headers: {
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",
}
});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
注意 request is deprecated. One good alternative is axios :
const axios = require("axios"),
cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await axios.get(url);
let $ = cheerio.load(response.data);
data = []
$("#tdm_base_scroll > div > div.dt_ta_09").each(function (i, elm) {
var price = $("div.dt_ta_14", elm)
data.push({
commodity: $("div.dt_ta_10", elm).text().trim(),
marketCenter: $("div.dt_ta_11", elm).text().trim(),
variety: $("div.dt_ta_12", elm).text().trim(),
arrrivals: $("div.dt_ta_13", elm).text().trim(),
modalPrice: $(price[0]).text().trim(),
minMaxPrice: $(price[1]).text().trim()
})
});
console.log(data);
})();