html 使用 Node JS 进行操作

html manipulation with Node JS

我想从源(link 或文件,...)获取 html 并且 从中找到价值。 html 格式为:

<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>

首先我使用了cheerio。 根据我写的文档:

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null

根据相同的程序,我使用了 x-ray 和 jsdom,但它们都是 打印空值。

我做了以下事情:

let myhtml = `<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>`;

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);

它向控制台输出以下内容:

initialize {
  '0': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: null,
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '1': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '2': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '3': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n      ',
        parent: [Object],
        prev: [Circular],
        next: null } },
  options: 
   { withDomLvl1: true,
     normalizeWhitespace: false,
     xml: false,
     decodeEntities: true },
  _root: 
   initialize {
     '0': 
      { type: 'root',
        name: 'root',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: null,
        prev: null,
        next: null },
     options: 
      { withDomLvl1: true,
        normalizeWhitespace: false,
        xml: false,
        decodeEntities: true },
     length: 1,
     _root: [Circular] },
  length: 4,
  prevObject: 
   initialize {
     '0': 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     options: 
      { withDomLvl1: true,
        normalizeWhitespace: false,
        xml: false,
        decodeEntities: true },
     _root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] },
     length: 1,
     prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } }
html: a

Process finished with exit code 0