使用 Playwright 抓取标题,如何将输出写入 Markdown 文件(Console.log 有效)
Scrape headlines with Playwright, how to write output into Markdown file (Console.log is working)
所以我想抓取一些网站的标题并在我的网站上使用它们(使用 markdown 文件管理)。
该脚本有效并且输出在控制台中看起来很好。但是我无法将 'For loop' 的值写入降价文件。
曾尝试使用整个 object 并将变量名 ( ${list} ) 放入 markdown 中,还尝试将输出的单个节点写入 markdown ( ${firstItem} )。
一直在搜索 'write result javascript to markdown'、'print object to markdown'、'convert json to markdown',研究了字符串,并尝试了几个相关术语,但到目前为止没有结果。
如果您有提示或知道解决此问题的方法,我们将不胜感激。
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true, slowMo: 250 });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
// console.log(listcontent);
// listcontent.forEach(item => console.log(item))
// [listcontent].forEach(console.log, console);
// [listcontent].forEach(console.log.bind(console));
// let list = {};
for (const {title: n, url: f} of listcontent) {
console.log('[' + n + '](' + f + ')');
// return list;
}
// let firstItem = listcontent[0]
// lists as json
console.log(listcontent)
// for (const property of listcontent) {
// console.log(`${property}: ${url[property]}`);
// }
// ${firstItem}
// ${list}
// Create Markdown with frontmatter part:
fs.promises.writeFile(`file` + '.md', `---
---
`);
console.log('MD created');
await browser.close()
})()
首先,您的脚本缺少一些方括号和大括号,所以这个是有效的:
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
browser.close();
})();
以 markdown 样式将文本保存到文件中的简单方法是构建特定格式的字符串,然后将其转储到文件中:
let string = '';
for (const {title: n, url: f} of listcontent) {
string += '[' + n + '](' + f + ')\n\n';
}
然后写入文件:
fs.writeFileSync('books.md', string);
如果你想要一个异步函数:
fs.writeFile('books.md', string, function (err) {
if (err) throw err;
console.log("File was saved.");
browser.close();
});
所以整个脚本:
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
let string = '';
for (const {title: n, url: f} of listcontent) {
string += '[' + n + '](' + f + ')\n\n';
}
/*fs.writeFile('books.md', string, function(err) {
if(err) throw err;
console.log("File was saved.");
browser.close();
});*/
fs.writeFileSync('books.md', string);
browser.close();
})();
所以我想抓取一些网站的标题并在我的网站上使用它们(使用 markdown 文件管理)。 该脚本有效并且输出在控制台中看起来很好。但是我无法将 'For loop' 的值写入降价文件。
曾尝试使用整个 object 并将变量名 ( ${list} ) 放入 markdown 中,还尝试将输出的单个节点写入 markdown ( ${firstItem} )。 一直在搜索 'write result javascript to markdown'、'print object to markdown'、'convert json to markdown',研究了字符串,并尝试了几个相关术语,但到目前为止没有结果。
如果您有提示或知道解决此问题的方法,我们将不胜感激。
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true, slowMo: 250 });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
// console.log(listcontent);
// listcontent.forEach(item => console.log(item))
// [listcontent].forEach(console.log, console);
// [listcontent].forEach(console.log.bind(console));
// let list = {};
for (const {title: n, url: f} of listcontent) {
console.log('[' + n + '](' + f + ')');
// return list;
}
// let firstItem = listcontent[0]
// lists as json
console.log(listcontent)
// for (const property of listcontent) {
// console.log(`${property}: ${url[property]}`);
// }
// ${firstItem}
// ${list}
// Create Markdown with frontmatter part:
fs.promises.writeFile(`file` + '.md', `---
---
`);
console.log('MD created');
await browser.close()
})()
首先,您的脚本缺少一些方括号和大括号,所以这个是有效的:
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
browser.close();
})();
以 markdown 样式将文本保存到文件中的简单方法是构建特定格式的字符串,然后将其转储到文件中:
let string = '';
for (const {title: n, url: f} of listcontent) {
string += '[' + n + '](' + f + ')\n\n';
}
然后写入文件:
fs.writeFileSync('books.md', string);
如果你想要一个异步函数:
fs.writeFile('books.md', string, function (err) {
if (err) throw err;
console.log("File was saved.");
browser.close();
});
所以整个脚本:
const { chromium } = require('playwright');
const fs = require('fs');
(async () => {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page90 = await browser.newPage();
await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];
const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
let title = book.querySelector('.thumbnail').getAttribute("alt");
let url = book.querySelector('a').getAttribute("href");
data.push({
title,
url,
});
});
return data;
});
let string = '';
for (const {title: n, url: f} of listcontent) {
string += '[' + n + '](' + f + ')\n\n';
}
/*fs.writeFile('books.md', string, function(err) {
if(err) throw err;
console.log("File was saved.");
browser.close();
});*/
fs.writeFileSync('books.md', string);
browser.close();
})();