从特定标题值的 table 中抓取数据并过滤特定行(Google 应用程序脚本)
Scraping data from a table from a specific title value and filter specific lines (Google App Script)
CherrioGS
的文档:
https://github.com/tani/cheeriogs
我们的想法是仅从 table 中收集名称为 Argentinos Jrs
的数据,并且不保存 info
列中值为 Away on International duty
的行。
注意:我确实需要根据值Argentinos Jrs
指定并删除Away on International duty
,因为这个table的位置不固定,行中的值也是如此。
我正在寻找的这个例子中的预期结果是这样的:
Carlos Quintana Mid August
Jonathan Sandoval Early August
网站link是这样的:
https://www.sportsgambler.com/injuries/football/argentina-superliga/
我将保留站点的当前图像,因为如果数据发生变化,我的示例的想法将被注册:
我尝试的代码:
function PaginaDoJogo() {
var sheet = SpreadsheetApp.getActive().getSheetByName('Dados Importados');
var url = 'https://www.sportsgambler.com/injuries/football/argentina-superliga/';
const contentText = UrlFetchApp.fetch(url).getContentText();
const $ = Cheerio.load(contentText);
$('div:contains("Argentinos Jrs") > div > div.inj-container:not(contains("Away on International duty")) > span.inj-player')
.each((index, element) => {
sheet.getRange(index + 2, 1).setValue($(element).text());
});
$('div:contains("Argentinos Jrs") > div > div.inj-container:not(contains("Away on International duty")) > span.inj-return.h-sm')
.each((index, element) => {
sheet.getRange(index + 2, 2).setValue($(element).text());
});
}
function PaginaDoJogo() {
const sheet = SpreadsheetApp.getActive().getSheetByName('Dados Importados');
const url = 'https://www.sportsgambler.com/injuries/football/argentina-superliga/';
const response = UrlFetchApp.fetch(url);
const content = response.getContentText();
const match = content.match(/Argentinos Jrs[\s\S]+?<!--Livestream call to action-->/);
const regExp = /<div[\s\S]+?<span class="inj-player">(.+?)<\/span>[\s\S]+?<span class="inj-info">(.+?)<\/span>[\s\S]+?<span class="inj-return h-sm">(.+?)<\/span>[\s\S]+?<\/div>/g;
const values = [];
while ((r = regExp.exec(match[0])) !== null) {
// console.log(r[1], r[2], r[3]);
if (r[1] !== 'Name' && r[2] !== 'Away on International duty') {
values.push([r[1], r[3]]);
}
}
sheet.getRange(2, 1, values.length, 2).setValues(values);
}
CherrioGS
的文档:
https://github.com/tani/cheeriogs
我们的想法是仅从 table 中收集名称为 Argentinos Jrs
的数据,并且不保存 info
列中值为 Away on International duty
的行。
注意:我确实需要根据值Argentinos Jrs
指定并删除Away on International duty
,因为这个table的位置不固定,行中的值也是如此。
我正在寻找的这个例子中的预期结果是这样的:
Carlos Quintana Mid August
Jonathan Sandoval Early August
网站link是这样的:
https://www.sportsgambler.com/injuries/football/argentina-superliga/
我将保留站点的当前图像,因为如果数据发生变化,我的示例的想法将被注册:
我尝试的代码:
function PaginaDoJogo() {
var sheet = SpreadsheetApp.getActive().getSheetByName('Dados Importados');
var url = 'https://www.sportsgambler.com/injuries/football/argentina-superliga/';
const contentText = UrlFetchApp.fetch(url).getContentText();
const $ = Cheerio.load(contentText);
$('div:contains("Argentinos Jrs") > div > div.inj-container:not(contains("Away on International duty")) > span.inj-player')
.each((index, element) => {
sheet.getRange(index + 2, 1).setValue($(element).text());
});
$('div:contains("Argentinos Jrs") > div > div.inj-container:not(contains("Away on International duty")) > span.inj-return.h-sm')
.each((index, element) => {
sheet.getRange(index + 2, 2).setValue($(element).text());
});
}
function PaginaDoJogo() {
const sheet = SpreadsheetApp.getActive().getSheetByName('Dados Importados');
const url = 'https://www.sportsgambler.com/injuries/football/argentina-superliga/';
const response = UrlFetchApp.fetch(url);
const content = response.getContentText();
const match = content.match(/Argentinos Jrs[\s\S]+?<!--Livestream call to action-->/);
const regExp = /<div[\s\S]+?<span class="inj-player">(.+?)<\/span>[\s\S]+?<span class="inj-info">(.+?)<\/span>[\s\S]+?<span class="inj-return h-sm">(.+?)<\/span>[\s\S]+?<\/div>/g;
const values = [];
while ((r = regExp.exec(match[0])) !== null) {
// console.log(r[1], r[2], r[3]);
if (r[1] !== 'Name' && r[2] !== 'Away on International duty') {
values.push([r[1], r[3]]);
}
}
sheet.getRange(2, 1, values.length, 2).setValues(values);
}