使用 Playwright 抓取标题,如何将输出写入 Markdown 文件(Console.log 有效)

Scrape headlines with Playwright, how to write output into Markdown file (Console.log is working)

所以我想抓取一些网站的标题并在我的网站上使用它们(使用 markdown 文件管理)。 该脚本有效并且输出在控制台中看起来很好。但是我无法将 'For loop' 的值写入降价文件。

曾尝试使用整个 object 并将变量名 ( ${list} ) 放入 markdown 中,还尝试将输出的单个节点写入 markdown ( ${firstItem} )。 一直在搜索 'write result javascript to markdown'、'print object to markdown'、'convert json to markdown',研究了字符串,并尝试了几个相关术语,但到目前为止没有结果。

如果您有提示或知道解决此问题的方法,我们将不胜感激。

const { chromium } = require('playwright');
const fs = require('fs');

(async () => {
const browser = await chromium.launch({ headless: true, slowMo: 250 });
const context = await browser.newContext();
const page90 = await browser.newPage();

await page90.goto("https://books.toscrape.com/");
const listcontent = await page90.evaluate(() => {
const data = [];

const books = document.querySelectorAll(".product_pod");
books.forEach((book) => {
  let title = book.querySelector('.thumbnail').getAttribute("alt");
  let url = book.querySelector('a').getAttribute("href");
  data.push({
    title,
    url,
  });
});
return data;
});

// console.log(listcontent);
// listcontent.forEach(item => console.log(item))
// [listcontent].forEach(console.log, console);
// [listcontent].forEach(console.log.bind(console));
// let list = {};

for (const {title: n, url: f} of listcontent) {
console.log('[' + n + '](' + f + ')');
// return list;
}

//  let firstItem = listcontent[0]
// lists as json
console.log(listcontent)

// for (const property of listcontent) {
//   console.log(`${property}: ${url[property]}`);
// }


// ${firstItem} 
// ${list}

// Create Markdown with frontmatter part:
fs.promises.writeFile(`file` + '.md', `---

---

`);

console.log('MD created');
await browser.close()
})()

首先,您的脚本缺少一些方括号和大括号,所以这个是有效的:

const { chromium } = require('playwright');
const fs = require('fs');

(async () => {
  const browser = await chromium.launch({ headless: true });
  const context = await browser.newContext();
  const page90 = await browser.newPage();

  await page90.goto("https://books.toscrape.com/");
  const listcontent = await page90.evaluate(() => {
    const data = [];

    const books = document.querySelectorAll(".product_pod");
      books.forEach((book) => {
        let title = book.querySelector('.thumbnail').getAttribute("alt");
        let url = book.querySelector('a').getAttribute("href");
        data.push({
          title,
          url,
        });
      });
      return data;
  });  
  
  browser.close();

})();

以 markdown 样式将文本保存到文件中的简单方法是构建特定格式的字符串,然后将其转储到文件中:

let string = '';
for (const {title: n, url: f} of listcontent) {
  string += '[' + n + '](' + f + ')\n\n';
}

然后写入文件:

fs.writeFileSync('books.md', string);

如果你想要一个异步函数:

fs.writeFile('books.md', string, function (err) {
  if (err) throw err;

  console.log("File was saved.");
  browser.close();
});

所以整个脚本:

const { chromium } = require('playwright');
const fs = require('fs');

(async () => {
  const browser = await chromium.launch({ headless: true });
  const context = await browser.newContext();
  const page90 = await browser.newPage();

  await page90.goto("https://books.toscrape.com/");
  const listcontent = await page90.evaluate(() => {
    const data = [];

    const books = document.querySelectorAll(".product_pod");
      books.forEach((book) => {
        let title = book.querySelector('.thumbnail').getAttribute("alt");
        let url = book.querySelector('a').getAttribute("href");
        data.push({
          title,
          url,
        });
      });
      return data;
  });

  let string = '';
  for (const {title: n, url: f} of listcontent) {
    string += '[' + n + '](' + f + ')\n\n';
  }

  /*fs.writeFile('books.md', string, function(err) {
    if(err) throw err;
    console.log("File was saved.");
    browser.close();
  });*/

  fs.writeFileSync('books.md', string);
  browser.close();

})();