如何使用 ajax 在维基百科中获取特定信息
How to get a specific information in wikipedia with ajax
如何使用 AJAX 获取在绝大多数搜索(图中所示)中始终显示在右侧框中的信息?我已经使用了 API https://en.wikipedia.org/w/api.php 但我没有找到我想要的信息。
好的,天哪,我从来没有花这么多时间在 Whosebug 上回答问题,
所以你在下面有一个工作片段,它很脏但它在工作:)
// wikipedia article (in url)
const wiki_article_title = 'Grand_Theft_Auto_V';
// please check https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
const url_api = `https://en.wikipedia.org/w/api.php?action=parse&page=${wiki_article_title}&prop=text&formatversion=2&origin=*`;
function extractInfoboxFromWiki(doc) {
// here we extract the json provided by api
const json = doc.querySelector('pre');
const obj = JSON.parse(json.innerText);
let html = obj.parse.text;
// for whatever reason '\n' substring are present in html text
// so we remove them with a regex to not break 'JSON.parse()'
html = html.replace(/\n/gm, '');
// get the interesting part of api reponse
const node = document.createElement('div');
node.innerHTML = html;
const infobox = node.querySelector('.infobox');
let infos = [...infobox.firstChild.children];
let output = {};
// parse title
output['title'] = infos[0].querySelector('th').innerText;
infos.shift();
// parse image url
output['image_url'] = infos[0].querySelector('a').getAttribute("href");
infos.shift();
// traverse the nodes to map captions with values
infos.forEach( tr => {
const key = tr.querySelector('th').innerText;
if(tr.querySelector('ul')) {
const lis = tr.querySelectorAll('li');
const values = [...lis].map( li => li.innerText);
output[key] = values;
} else {
const value = tr.querySelector('td').innerText;
output[key] = value;
}
});
// return beautified json
return JSON.stringify(output, null, 4);
}
fetch(url_api)
.then(response => response.text())
.then(text => {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
const WHAT_YOU_WANT = extractInfoboxFromWiki(doc);
const formated = `<pre>${WHAT_YOU_WANT}</pre>`;
document.write(formated);
});
对于 GTA V 文章,您获得:
{
"title": "Grand Theft Auto V",
"image_url": "/wiki/File:Grand_Theft_Auto_V.png",
"Developer(s)": "Rockstar North[a]",
"Publisher(s)": "Rockstar Games",
"Producer(s)": [
"Leslie Benzies",
"Imran Sarwar"
],
"Designer(s)": [
"Leslie Benzies",
"Imran Sarwar"
],
"Programmer(s)": "Adam Fowler",
"Artist(s)": "Aaron Garbut",
"Writer(s)": [
"Dan Houser",
"Rupert Humphries",
"Michael Unsworth"
],
"Composer(s)": [
"Tangerine Dream",
"Woody Jackson",
"The Alchemist",
"Oh No"
],
"Series": "Grand Theft Auto",
"Engine": "RAGE",
"Platform(s)": [
"PlayStation 3",
"Xbox 360",
"PlayStation 4",
"Xbox One",
"Microsoft Windows",
"PlayStation 5",
"Xbox Series X/S"
],
"Release": [
"PS3, Xbox 360",
"17 September 2013",
"PS4, Xbox One",
"18 November 2014",
"Microsoft Windows",
"14 April 2015",
"PS5, Xbox Series X/S",
"Q3/Q4 2021"
],
"Genre(s)": "Action-adventure",
"Mode(s)": "Single-player, multiplayer"
}
我认为我的代码不能用于不同外观的 wiki 页面,但不,它仍然可以工作:
// wikipedia article (in url)
const wiki_article_title = 'Richard_Stallman';
// please check https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
const url_api = `https://en.wikipedia.org/w/api.php?action=parse&page=${wiki_article_title}&prop=text&formatversion=2&origin=*`;
function extractInfoboxFromWiki(doc) {
// here we extract the json provided by api
const json = doc.querySelector('pre');
const obj = JSON.parse(json.innerText);
let html = obj.parse.text;
// for whatever reason '\n' substring are present in html text
// so we remove them with a regex to not break 'JSON.parse()'
html = html.replace(/\n/gm, '');
// get the interesting part of api reponse
const node = document.createElement('div');
node.innerHTML = html;
const infobox = node.querySelector('.infobox');
let infos = [...infobox.firstChild.children];
let output = {};
// parse title
output['title'] = infos[0].querySelector('th').innerText;
infos.shift();
// parse image url
output['image_url'] = infos[0].querySelector('a').getAttribute("href");
infos.shift();
// traverse the nodes to map captions with values
infos.forEach( tr => {
const key = tr.querySelector('th').innerText;
if(tr.querySelector('ul')) {
const lis = tr.querySelectorAll('li');
const values = [...lis].map( li => li.innerText);
output[key] = values;
} else {
const value = tr.querySelector('td').innerText;
output[key] = value;
}
});
// return beautified json
return JSON.stringify(output, null, 4);
}
fetch(url_api)
.then(response => response.text())
.then(text => {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
const WHAT_YOU_WANT = extractInfoboxFromWiki(doc);
const formated = `<pre>${WHAT_YOU_WANT}</pre>`;
document.write(formated);
});
如何使用 AJAX 获取在绝大多数搜索(图中所示)中始终显示在右侧框中的信息?我已经使用了 API https://en.wikipedia.org/w/api.php 但我没有找到我想要的信息。
好的,天哪,我从来没有花这么多时间在 Whosebug 上回答问题,
所以你在下面有一个工作片段,它很脏但它在工作:)
// wikipedia article (in url)
const wiki_article_title = 'Grand_Theft_Auto_V';
// please check https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
const url_api = `https://en.wikipedia.org/w/api.php?action=parse&page=${wiki_article_title}&prop=text&formatversion=2&origin=*`;
function extractInfoboxFromWiki(doc) {
// here we extract the json provided by api
const json = doc.querySelector('pre');
const obj = JSON.parse(json.innerText);
let html = obj.parse.text;
// for whatever reason '\n' substring are present in html text
// so we remove them with a regex to not break 'JSON.parse()'
html = html.replace(/\n/gm, '');
// get the interesting part of api reponse
const node = document.createElement('div');
node.innerHTML = html;
const infobox = node.querySelector('.infobox');
let infos = [...infobox.firstChild.children];
let output = {};
// parse title
output['title'] = infos[0].querySelector('th').innerText;
infos.shift();
// parse image url
output['image_url'] = infos[0].querySelector('a').getAttribute("href");
infos.shift();
// traverse the nodes to map captions with values
infos.forEach( tr => {
const key = tr.querySelector('th').innerText;
if(tr.querySelector('ul')) {
const lis = tr.querySelectorAll('li');
const values = [...lis].map( li => li.innerText);
output[key] = values;
} else {
const value = tr.querySelector('td').innerText;
output[key] = value;
}
});
// return beautified json
return JSON.stringify(output, null, 4);
}
fetch(url_api)
.then(response => response.text())
.then(text => {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
const WHAT_YOU_WANT = extractInfoboxFromWiki(doc);
const formated = `<pre>${WHAT_YOU_WANT}</pre>`;
document.write(formated);
});
对于 GTA V 文章,您获得:
{
"title": "Grand Theft Auto V",
"image_url": "/wiki/File:Grand_Theft_Auto_V.png",
"Developer(s)": "Rockstar North[a]",
"Publisher(s)": "Rockstar Games",
"Producer(s)": [
"Leslie Benzies",
"Imran Sarwar"
],
"Designer(s)": [
"Leslie Benzies",
"Imran Sarwar"
],
"Programmer(s)": "Adam Fowler",
"Artist(s)": "Aaron Garbut",
"Writer(s)": [
"Dan Houser",
"Rupert Humphries",
"Michael Unsworth"
],
"Composer(s)": [
"Tangerine Dream",
"Woody Jackson",
"The Alchemist",
"Oh No"
],
"Series": "Grand Theft Auto",
"Engine": "RAGE",
"Platform(s)": [
"PlayStation 3",
"Xbox 360",
"PlayStation 4",
"Xbox One",
"Microsoft Windows",
"PlayStation 5",
"Xbox Series X/S"
],
"Release": [
"PS3, Xbox 360",
"17 September 2013",
"PS4, Xbox One",
"18 November 2014",
"Microsoft Windows",
"14 April 2015",
"PS5, Xbox Series X/S",
"Q3/Q4 2021"
],
"Genre(s)": "Action-adventure",
"Mode(s)": "Single-player, multiplayer"
}
我认为我的代码不能用于不同外观的 wiki 页面,但不,它仍然可以工作:
// wikipedia article (in url)
const wiki_article_title = 'Richard_Stallman';
// please check https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
const url_api = `https://en.wikipedia.org/w/api.php?action=parse&page=${wiki_article_title}&prop=text&formatversion=2&origin=*`;
function extractInfoboxFromWiki(doc) {
// here we extract the json provided by api
const json = doc.querySelector('pre');
const obj = JSON.parse(json.innerText);
let html = obj.parse.text;
// for whatever reason '\n' substring are present in html text
// so we remove them with a regex to not break 'JSON.parse()'
html = html.replace(/\n/gm, '');
// get the interesting part of api reponse
const node = document.createElement('div');
node.innerHTML = html;
const infobox = node.querySelector('.infobox');
let infos = [...infobox.firstChild.children];
let output = {};
// parse title
output['title'] = infos[0].querySelector('th').innerText;
infos.shift();
// parse image url
output['image_url'] = infos[0].querySelector('a').getAttribute("href");
infos.shift();
// traverse the nodes to map captions with values
infos.forEach( tr => {
const key = tr.querySelector('th').innerText;
if(tr.querySelector('ul')) {
const lis = tr.querySelectorAll('li');
const values = [...lis].map( li => li.innerText);
output[key] = values;
} else {
const value = tr.querySelector('td').innerText;
output[key] = value;
}
});
// return beautified json
return JSON.stringify(output, null, 4);
}
fetch(url_api)
.then(response => response.text())
.then(text => {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
const WHAT_YOU_WANT = extractInfoboxFromWiki(doc);
const formated = `<pre>${WHAT_YOU_WANT}</pre>`;
document.write(formated);
});