在 google 翻译页面上抓取文本 - Javascript
Scraping text on google translate page - Javascript
我正在尝试从中抓取一些文字 URL https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate
到目前为止我有一件可以用,但我不知道如何得到另外两件。
我正在尝试获取原文(eng)、汉字(character)和拼音(cn)
这就是我所拥有的 - eng 正在工作,但我无法让 cn 或 character 工作。
import fetch from "node-fetch"
import cheerio from "cheerio"
const getRawData = (URL) => {
return fetch(URL)
.then((response) => response.text())
.then((data) => {
return data;
});
};
const URL = "https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate";
const getData = async () => {
const data = await getRawData(URL);
const $ = cheerio.load(data);
const eng = $("div.D5aOJc")[0].children[0].data
const cn = $("div.kO6q6e")[0].data
const character = $("span.Q4iAWc").data
console.log(eng, cn, character);
};
getData();
正如 Sandun Isuru Niraj 所说,你需要一个 Puppeteer。这是您的问题的解决方案:
const puppeteer = require("puppeteer");
const queryup = "Hello";
const query = encodeURI(queryup);
async function GTranslate(searchQuery) {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(`https://translate.google.ca/?sl=en&tl=zh-TW&text=${searchQuery}&op=translate`);
await page.waitForSelector(".Q4iAWc");
const eng = await page.$eval(".D5aOJc.Hapztf", (el) => el.textContent);
const cn = await page.$eval(".dePhmb .kO6q6e", (el) => el.textContent);
const character = await page.$eval(".Q4iAWc", (el) => el.textContent);
console.log(eng, cn, character);
await browser.close();
}
GTranslate(query);
输出:
Hello Nǐ hǎo 你好
我正在尝试从中抓取一些文字 URL https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate
到目前为止我有一件可以用,但我不知道如何得到另外两件。
我正在尝试获取原文(eng)、汉字(character)和拼音(cn)
这就是我所拥有的 - eng 正在工作,但我无法让 cn 或 character 工作。
import fetch from "node-fetch"
import cheerio from "cheerio"
const getRawData = (URL) => {
return fetch(URL)
.then((response) => response.text())
.then((data) => {
return data;
});
};
const URL = "https://translate.google.ca/?sl=en&tl=zh-TW&text=Hello&op=translate";
const getData = async () => {
const data = await getRawData(URL);
const $ = cheerio.load(data);
const eng = $("div.D5aOJc")[0].children[0].data
const cn = $("div.kO6q6e")[0].data
const character = $("span.Q4iAWc").data
console.log(eng, cn, character);
};
getData();
正如 Sandun Isuru Niraj 所说,你需要一个 Puppeteer。这是您的问题的解决方案:
const puppeteer = require("puppeteer");
const queryup = "Hello";
const query = encodeURI(queryup);
async function GTranslate(searchQuery) {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(`https://translate.google.ca/?sl=en&tl=zh-TW&text=${searchQuery}&op=translate`);
await page.waitForSelector(".Q4iAWc");
const eng = await page.$eval(".D5aOJc.Hapztf", (el) => el.textContent);
const cn = await page.$eval(".dePhmb .kO6q6e", (el) => el.textContent);
const character = await page.$eval(".Q4iAWc", (el) => el.textContent);
console.log(eng, cn, character);
await browser.close();
}
GTranslate(query);
输出:
Hello Nǐ hǎo 你好