在 google 翻译页面上抓取文本 - Javascript

Scraping text on google translate page - Javascript

我正在尝试从中抓取一些文字 URL https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate

到目前为止我有一件可以用,但我不知道如何得到另外两件。

我正在尝试获取原文(eng)、汉字(character)和拼音(cn)

这就是我所拥有的 - eng 正在工作,但我无法让 cn 或 character 工作。

import fetch from "node-fetch"
import cheerio from "cheerio"

const getRawData = (URL) => {
  return fetch(URL)
    .then((response) => response.text())
    .then((data) => {
      return data;
    });
};

const URL = "https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate";

const getData = async () => {
  const data = await getRawData(URL);
  const $ = cheerio.load(data);

  const eng = $("div.D5aOJc")[0].children[0].data
  const cn = $("div.kO6q6e")[0].data
  const character = $("span.Q4iAWc").data

  console.log(eng, cn, character);
};

getData();

正如 Sandun Isuru Niraj 所说,你需要一个 Puppeteer。这是您的问题的解决方案:

const puppeteer = require("puppeteer");
const queryup = "Hello";
const query = encodeURI(queryup);

async function GTranslate(searchQuery) {
  const browser = await puppeteer.launch({
    headless: false,
  });
  const page = await browser.newPage();
  await page.goto(`https://translate.google.ca/?sl=en&tl=zh-TW&text=${searchQuery}&op=translate`);
  await page.waitForSelector(".Q4iAWc");

  const eng = await page.$eval(".D5aOJc.Hapztf", (el) => el.textContent);
  const cn = await page.$eval(".dePhmb .kO6q6e", (el) => el.textContent);
  const character = await page.$eval(".Q4iAWc", (el) => el.textContent);

  console.log(eng, cn, character);

  await browser.close();
}

GTranslate(query);

输出:

Hello Nǐ hǎo 你好