javascript,Hugo:将无序列表转换为 JSON

javascript, Hugo: convert unordered list to the JSON

TLDR

我要转换下面的代码

<nav id="TableOfContents">
  <ul>
    <li><a href="#js">JS</a>
      <ul>
        <li><a href="#h3-1">H3-1</a></li>
        <li><a href="#h3-2">H3-2</a></li>
      </ul>
    </li>
    <li><a href="#python">Python</a>
      <ul>
        <li><a href="#h3-1-1">H3-1</a></li>
        <li><a href="#h3-2-1">H3-2</a></li>
      </ul>
    </li>
  </ul>
</nav>

到以下 JSON 格式 Javascript(或 Hugo)

{"t": "root", "d": 0, "v": "", "c": [
    {"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
            {"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"}, 
            {"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
        ]
    }, 
    {"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
            {"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"}, 
            {"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
        ]
     }
]}

上面只是一个例子,你提供的函数应该可以将任何类似的结构转换成像上面那样的JSON字符串,没有任何问题(其实唯一的要求就是不要使用硬编码来完成)

您可以像下面这样输出您的代码

<!DOCTYPE html>
<html>
<head>
  <script>
    (()=>{
      var input_text = `<nav id="TableOfContents">...`;
      var output = my_func(input_text);
      console.log(output); /* expected result: {"t": "root", "d": 0, "v": "", "c": [ ... */
    }
    )()

    function my_func(text) {
      /* ... */
    }
  </script>
</head>
</html>

说来话长

我想做什么?

我想用思维导图(markmap, markmap-github) on Hugo and applying that to the article (single.html)作为内容table

Hugo 已经通过 TableOfContents

提供了 TOC

即My.md

## JS

### H3-1

### H3-2

## Python

### H3-1

### H3-2

然后{{ .TableOfContents }}会输出

<nav id="TableOfContents">
  <ul>
    <li><a href="#js">JS</a>
      <ul>
        <li><a href="#h3-1">H3-1</a></li>
        <li><a href="#h3-2">H3-2</a></li>
      </ul>
    </li>
    <li><a href="#python">Python</a>
      <ul>
        <li><a href="#h3-1-1">H3-1</a></li>
        <li><a href="#h3-2-1">H3-2</a></li>
      </ul>
    </li>
  </ul>
</nav>

但是,如果我使用 markmap,那么我必须为其提供一个 JSON 字符串,如下所示,

{"t": "root", "d": 0, "v": "", "c": [
    {"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
            {"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"}, 
            {"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
        ]
    }, 
    {"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
            {"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"}, 
            {"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
        ]
     }
]}

我做了什么?

我尝试只用 Hugo 的 functions 但失败了 (逻辑上可行,但语法上难以实现)

所以我寄希望于javascript,但是一直以来,我的JS都是找别人的代码修改,而且 这个例子对我来说是一个全新的例子;无从下手(说实话我是JS外行)

下面是我用Python实现的。

from bs4 import BeautifulSoup, Tag
from typing import Union
import os
from pathlib import Path
import json


def main():
    data = """<nav id="TableOfContents">
      <ul>
        <li><a href="#js">JS</a>
          <ul>
            <li><a href="#h3-1">H3-1</a></li>
            <li><a href="#h3-2">H3-2</a></li>
          </ul>
        </li>
        <li><a href="#python">Python</a>
          <ul>
            <li><a href="#h3-1-1">H3-1</a></li>
            <li><a href="#h3-2-1">H3-2</a></li>
          </ul>
        </li>
      </ul>
    </nav>"""
    soup = BeautifulSoup(data, "lxml")

    sub_list = []
    cur_level = 0
    dict_tree = dict(t='root', d=cur_level, v='', c=sub_list)
    root_ul: Tag = soup.find('ul')
    
    toc2json(root_ul, cur_level + 1, sub_list)  # <-- core
    
    toc_json: str = json.dumps(dict_tree)
    print(toc_json)
    output_file = Path('test.temp.html')
    create_markmap_html(toc_json, output_file)
    os.startfile(output_file)


def toc2json(tag: Tag, cur_level: int, sub_list):
    for li in tag:
        if not isinstance(li, Tag):
            continue
        a: Tag = li.find('a')
        ul: Union[Tag, None] = li.find('ul')
        if ul:
            new_sub_list = []
            toc2json(ul, cur_level + 1, new_sub_list)
            cur_obj = dict(t='h', d=cur_level, v=str(a), c=new_sub_list)
        else:
            cur_obj = dict(t='h', d=cur_level, v=str(a))
        sub_list.append(cur_obj)


def create_markmap_html(json_string: str, output_file: Path):
    import jinja2
    markmap_template = jinja2.Template("""
    <!DOCTYPE html>
    <html lang=en>
    <head>
      <script src="https://d3js.org/d3.v6.min.js"></script>
      <script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
      <style>      
      .mindmap {
        width: 100vw;
        height: 100vh;
      }
      </style>
    </head>
    <body>
      <main>
        <svg id="mindmap-test" class="mindmap"></svg>
      </main>
      
      <script>
        (
          (e, json_data)=>{
            const{Markmap:r}=e();
            window.mm=r.create("svg#mindmap-test",null,json_data);
          }
        )(
          ()=>window.markmap,
          {{ input_json }}
         );
      </script>
    </body>
    </html>
    """)

    html_string = markmap_template.render(dict(input_json=json_string))

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_string)


if __name__ == '__main__':
    main()

为了这一点点努力,请帮帮我。

如有必要,我很乐意继续改进代码:)

class Parser {
  constructor(htmlExpr){
    this.result = {t: 'root', d: 0, v: "", c:[]};
    this.data = htmlExpr.split('\n').map(row => row.trim()) // to lines ?
    this.open = RegExp('^<(?<main>[^\/>]*)>')           // some open tag
    this.close = RegExp('^<\/(?<main>[^>]*)>')          // some close tag
    this.target = RegExp('(?<main><a[^>]*>[^<]*<\/a>)') // what we looking for
  }
  test(str){
    let [o, c, v] = [ // all matches
      this.open.exec(str),
      this.close.exec(str),
      this.target.exec(str)
    ];
    // extract tagNames and value
    if (o) o = o.groups.main
    if (c) c = c.groups.main
    if (v) v = v.groups.main
    return [o, c, v];
  }
  parse(){
    const parents = [];
    let level = 0;
    let lastNode = this.result;
    let length = this.data.length;
    for(let i = 0; i < length; i++){
      const [o, c, v] = this.test(this.data[i])
      if (o === 'ul') {
        level +=1;
        lastNode.c=[]
        parents.push(lastNode.c)
      } else if (c === 'ul') {
        level -=1;
        parents.pop()
      }
      if (v) {
        lastNode = {t: 'h', d: level, v}; // template
        parents[level - 1].push(lastNode) // insert to result
      }
    }
    return this.result;
  }
}

let htmlExpr = `<nav id="TableOfContents">
      <ul>
        <li><a href="#js">JS</a>
          <ul>
            <li><a href="#h3-1">H3-1</a></li>
            <li><a href="#h3-2">H3-2</a></li>
          </ul>
        </li>
        <li><a href="#python">Python</a>
          <ul>
            <li><a href="#h3-1-1">H3-1</a></li>
            <li><a href="#h3-2-1">H3-2</a></li>
          </ul>
        </li>
      </ul>
    </nav>`
const parser = new Parser(htmlExpr);
const test = parser.parse();
console.log(JSON.stringify(test, null, 2))

我发现我的问题描述可能不够准确,

导致某些人使用特殊的解决方案来解决问题。

严格来说,它们没有问题,但这不适用于我的其他一些示例(参见 id=toc-compressid=toc-double_link),

而且我自己找到了答案,可以满足我所有的需求,

请看如下

class Toc {
  constructor(node_nav){
    this.data = node_nav;
  }
  create_mind_map(svg_id, dict_data){
    let e = ()=>window.markmap
    const {Markmap:r} = e();
    window.mm = r.create("svg#"+svg_id, null, dict_data)
  }

  _get_element(ul_node, c, cur_level){

    let li_list = Array.prototype.slice.call(ul_node.childNodes).filter(node => node.nodeName === 'LI' )
    li_list.forEach(li => {
      const inner_a = li.firstElementChild;
      const value = (()=>{
        // If it contains two links (one is an internal link and the other is an external link, then the internal link is used as the primary link)
        const inner_a_copy = inner_a.cloneNode(true);  // avoid modifying the original innerText
        const outer_a = ((RegExp('<a[^>]*>[^<]*<\/a><a[^>]*>[^<]*<\/a>').exec(li.innerHTML)) != null ?
         Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'A' )[1] :
         undefined
         );
        if (outer_a !== undefined) {
          inner_a_copy.innerText = outer_a.innerText
        }
        return inner_a_copy.outerHTML;
      })();

      let ul = Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'UL' )

      if (ul.length > 0){
        let sub_list = [];
        this._get_element(ul[0], sub_list, cur_level+1)
        c.push({t: 'h', d: cur_level, v: value, c:sub_list})
      }
      else {
        c.push({t: 'h', d: cur_level, v: value})
      }
    });
  }

  convert2dict(){
    let root_ul = Array.prototype.slice.call(this.data.childNodes).filter(node => node instanceof HTMLUListElement)[0]
    const sub_c = []
    const result_dict = {t: 'root', d: 0, v: "", c:sub_c};
    const level = 1
    this._get_element(root_ul, sub_c, level);
    // console.log(result_dict)
    // console.log(JSON.stringify(result_dict, null, 2))
    return result_dict
  }
};

function getNode(n, v) {
  /*  */
  n = document.createElementNS("http://www.w3.org/2000/svg", n);
  for (var p in v)
    n.setAttributeNS(null, p.replace(/[A-Z]/g, function(m, p, o, s) { return "-" + m.toLowerCase(); }), v[p]);
  return n
};

(
  ()=>{
    let nav_html_collection = document.getElementsByTagName('nav');
    let idx = 0;
    for (let node_nav of nav_html_collection){
      const toc = new Toc(node_nav);
      const dict_data = toc.convert2dict();
      const id_name = 'mindmap' + idx.toString();
      let node_svg = getNode("svg", {id: id_name});
      node_nav.appendChild(node_svg)
      toc.create_mind_map(id_name, dict_data)
      idx += 1;
    };
  }
)();
<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
  <script src="https://d3js.org/d3.v6.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
  <style>
    .mindmap {
      /*
      width: 100vw;
      height: 100vh;
      */
    }
  </style>
</head>
<body>
  <nav id="toc-normal" class="mindmap">
    <ul>
      <li><a href="#js">JS</a>
        <ul>
          <li><a href="#h3-1">H3-1</a></li>
          <li><a href="#h3-2">H3-2</a></li>
        </ul>
      </li>
      <li><a href="#python">Python</a>
        <ul>
          <li><a href="#h3-1-1">H3-1</a></li>
          <li><a href="#h3-2-1">H3-2</a></li>
        </ul>
      </li>
    </ul>
  </nav>

  <nav id="toc-compress"><ul><li><a href="#js">JS</a><ul><li><a href="#h3-1">H3-1</a></li><li><a href="#h3-2">H3-2</a></li></ul></li></ul></nav>

  <nav id="toc-double_link">
    <ul>
      <li><a href="#js"><a href="https://www.w3schools.com/js/DEFAULT.asp">JS</a></a>
        <ul>
          <li><a href="#h3-1">H3-1</a></li>
          <li><a href="#h3-2">H3-2</a></li>
        </ul>
      </li>
    </ul>
  </nav>
</body>
</html>