javascript,Hugo:将无序列表转换为 JSON
javascript, Hugo: convert unordered list to the JSON
TLDR
我要转换下面的代码
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
到以下 JSON 格式 Javascript(或 Hugo)
{"t": "root", "d": 0, "v": "", "c": [
{"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
]
},
{"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
上面只是一个例子,你提供的函数应该可以将任何类似的结构转换成像上面那样的JSON字符串,没有任何问题(其实唯一的要求就是不要使用硬编码来完成)
您可以像下面这样输出您的代码
<!DOCTYPE html>
<html>
<head>
<script>
(()=>{
var input_text = `<nav id="TableOfContents">...`;
var output = my_func(input_text);
console.log(output); /* expected result: {"t": "root", "d": 0, "v": "", "c": [ ... */
}
)()
function my_func(text) {
/* ... */
}
</script>
</head>
</html>
说来话长
我想做什么?
我想用思维导图(markmap, markmap-github) on Hugo and applying that to the article (single.html)作为内容table
Hugo 已经通过 TableOfContents
提供了 TOC
即My.md
## JS
### H3-1
### H3-2
## Python
### H3-1
### H3-2
然后{{ .TableOfContents }}
会输出
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
但是,如果我使用 markmap,那么我必须为其提供一个 JSON 字符串,如下所示,
{"t": "root", "d": 0, "v": "", "c": [
{"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
]
},
{"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
我做了什么?
我尝试只用 Hugo 的 functions 但失败了
(逻辑上可行,但语法上难以实现)
所以我寄希望于javascript,但是一直以来,我的JS都是找别人的代码修改,而且
这个例子对我来说是一个全新的例子;无从下手(说实话我是JS外行)
下面是我用Python实现的。
from bs4 import BeautifulSoup, Tag
from typing import Union
import os
from pathlib import Path
import json
def main():
data = """<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>"""
soup = BeautifulSoup(data, "lxml")
sub_list = []
cur_level = 0
dict_tree = dict(t='root', d=cur_level, v='', c=sub_list)
root_ul: Tag = soup.find('ul')
toc2json(root_ul, cur_level + 1, sub_list) # <-- core
toc_json: str = json.dumps(dict_tree)
print(toc_json)
output_file = Path('test.temp.html')
create_markmap_html(toc_json, output_file)
os.startfile(output_file)
def toc2json(tag: Tag, cur_level: int, sub_list):
for li in tag:
if not isinstance(li, Tag):
continue
a: Tag = li.find('a')
ul: Union[Tag, None] = li.find('ul')
if ul:
new_sub_list = []
toc2json(ul, cur_level + 1, new_sub_list)
cur_obj = dict(t='h', d=cur_level, v=str(a), c=new_sub_list)
else:
cur_obj = dict(t='h', d=cur_level, v=str(a))
sub_list.append(cur_obj)
def create_markmap_html(json_string: str, output_file: Path):
import jinja2
markmap_template = jinja2.Template("""
<!DOCTYPE html>
<html lang=en>
<head>
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
<style>
.mindmap {
width: 100vw;
height: 100vh;
}
</style>
</head>
<body>
<main>
<svg id="mindmap-test" class="mindmap"></svg>
</main>
<script>
(
(e, json_data)=>{
const{Markmap:r}=e();
window.mm=r.create("svg#mindmap-test",null,json_data);
}
)(
()=>window.markmap,
{{ input_json }}
);
</script>
</body>
</html>
""")
html_string = markmap_template.render(dict(input_json=json_string))
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_string)
if __name__ == '__main__':
main()
为了这一点点努力,请帮帮我。
如有必要,我很乐意继续改进代码:)
class Parser {
constructor(htmlExpr){
this.result = {t: 'root', d: 0, v: "", c:[]};
this.data = htmlExpr.split('\n').map(row => row.trim()) // to lines ?
this.open = RegExp('^<(?<main>[^\/>]*)>') // some open tag
this.close = RegExp('^<\/(?<main>[^>]*)>') // some close tag
this.target = RegExp('(?<main><a[^>]*>[^<]*<\/a>)') // what we looking for
}
test(str){
let [o, c, v] = [ // all matches
this.open.exec(str),
this.close.exec(str),
this.target.exec(str)
];
// extract tagNames and value
if (o) o = o.groups.main
if (c) c = c.groups.main
if (v) v = v.groups.main
return [o, c, v];
}
parse(){
const parents = [];
let level = 0;
let lastNode = this.result;
let length = this.data.length;
for(let i = 0; i < length; i++){
const [o, c, v] = this.test(this.data[i])
if (o === 'ul') {
level +=1;
lastNode.c=[]
parents.push(lastNode.c)
} else if (c === 'ul') {
level -=1;
parents.pop()
}
if (v) {
lastNode = {t: 'h', d: level, v}; // template
parents[level - 1].push(lastNode) // insert to result
}
}
return this.result;
}
}
let htmlExpr = `<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>`
const parser = new Parser(htmlExpr);
const test = parser.parse();
console.log(JSON.stringify(test, null, 2))
我发现我的问题描述可能不够准确,
导致某些人使用特殊的解决方案来解决问题。
严格来说,它们没有问题,但这不适用于我的其他一些示例(参见 id=toc-compress
和 id=toc-double_link
),
而且我自己找到了答案,可以满足我所有的需求,
请看如下
class Toc {
constructor(node_nav){
this.data = node_nav;
}
create_mind_map(svg_id, dict_data){
let e = ()=>window.markmap
const {Markmap:r} = e();
window.mm = r.create("svg#"+svg_id, null, dict_data)
}
_get_element(ul_node, c, cur_level){
let li_list = Array.prototype.slice.call(ul_node.childNodes).filter(node => node.nodeName === 'LI' )
li_list.forEach(li => {
const inner_a = li.firstElementChild;
const value = (()=>{
// If it contains two links (one is an internal link and the other is an external link, then the internal link is used as the primary link)
const inner_a_copy = inner_a.cloneNode(true); // avoid modifying the original innerText
const outer_a = ((RegExp('<a[^>]*>[^<]*<\/a><a[^>]*>[^<]*<\/a>').exec(li.innerHTML)) != null ?
Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'A' )[1] :
undefined
);
if (outer_a !== undefined) {
inner_a_copy.innerText = outer_a.innerText
}
return inner_a_copy.outerHTML;
})();
let ul = Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'UL' )
if (ul.length > 0){
let sub_list = [];
this._get_element(ul[0], sub_list, cur_level+1)
c.push({t: 'h', d: cur_level, v: value, c:sub_list})
}
else {
c.push({t: 'h', d: cur_level, v: value})
}
});
}
convert2dict(){
let root_ul = Array.prototype.slice.call(this.data.childNodes).filter(node => node instanceof HTMLUListElement)[0]
const sub_c = []
const result_dict = {t: 'root', d: 0, v: "", c:sub_c};
const level = 1
this._get_element(root_ul, sub_c, level);
// console.log(result_dict)
// console.log(JSON.stringify(result_dict, null, 2))
return result_dict
}
};
function getNode(n, v) {
/* */
n = document.createElementNS("http://www.w3.org/2000/svg", n);
for (var p in v)
n.setAttributeNS(null, p.replace(/[A-Z]/g, function(m, p, o, s) { return "-" + m.toLowerCase(); }), v[p]);
return n
};
(
()=>{
let nav_html_collection = document.getElementsByTagName('nav');
let idx = 0;
for (let node_nav of nav_html_collection){
const toc = new Toc(node_nav);
const dict_data = toc.convert2dict();
const id_name = 'mindmap' + idx.toString();
let node_svg = getNode("svg", {id: id_name});
node_nav.appendChild(node_svg)
toc.create_mind_map(id_name, dict_data)
idx += 1;
};
}
)();
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
<style>
.mindmap {
/*
width: 100vw;
height: 100vh;
*/
}
</style>
</head>
<body>
<nav id="toc-normal" class="mindmap">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
<nav id="toc-compress"><ul><li><a href="#js">JS</a><ul><li><a href="#h3-1">H3-1</a></li><li><a href="#h3-2">H3-2</a></li></ul></li></ul></nav>
<nav id="toc-double_link">
<ul>
<li><a href="#js"><a href="https://www.w3schools.com/js/DEFAULT.asp">JS</a></a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
</body>
</html>
TLDR
我要转换下面的代码
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
到以下 JSON 格式 Javascript(或 Hugo)
{"t": "root", "d": 0, "v": "", "c": [
{"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
]
},
{"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
上面只是一个例子,你提供的函数应该可以将任何类似的结构转换成像上面那样的JSON字符串,没有任何问题(其实唯一的要求就是不要使用硬编码来完成)
您可以像下面这样输出您的代码
<!DOCTYPE html>
<html>
<head>
<script>
(()=>{
var input_text = `<nav id="TableOfContents">...`;
var output = my_func(input_text);
console.log(output); /* expected result: {"t": "root", "d": 0, "v": "", "c": [ ... */
}
)()
function my_func(text) {
/* ... */
}
</script>
</head>
</html>
说来话长
我想做什么?
我想用思维导图(markmap, markmap-github) on Hugo and applying that to the article (single.html)作为内容table
Hugo 已经通过 TableOfContents
提供了 TOC即My.md
## JS
### H3-1
### H3-2
## Python
### H3-1
### H3-2
然后{{ .TableOfContents }}
会输出
<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
但是,如果我使用 markmap,那么我必须为其提供一个 JSON 字符串,如下所示,
{"t": "root", "d": 0, "v": "", "c": [
{"t": "h", "d": 1, "v": "<a href=\"#js\">JS</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2\">H3-2</a>"}
]
},
{"t": "h", "d": 1, "v": "<a href=\"#python\">Python</a>", "c": [
{"t": "h", "d": 2, "v": "<a href=\"#h3-1-1\">H3-1</a>"},
{"t": "h", "d": 2, "v": "<a href=\"#h3-2-1\">H3-2</a>"}
]
}
]}
我做了什么?
我尝试只用 Hugo 的 functions 但失败了 (逻辑上可行,但语法上难以实现)
所以我寄希望于javascript,但是一直以来,我的JS都是找别人的代码修改,而且 这个例子对我来说是一个全新的例子;无从下手(说实话我是JS外行)
下面是我用Python实现的。
from bs4 import BeautifulSoup, Tag
from typing import Union
import os
from pathlib import Path
import json
def main():
data = """<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>"""
soup = BeautifulSoup(data, "lxml")
sub_list = []
cur_level = 0
dict_tree = dict(t='root', d=cur_level, v='', c=sub_list)
root_ul: Tag = soup.find('ul')
toc2json(root_ul, cur_level + 1, sub_list) # <-- core
toc_json: str = json.dumps(dict_tree)
print(toc_json)
output_file = Path('test.temp.html')
create_markmap_html(toc_json, output_file)
os.startfile(output_file)
def toc2json(tag: Tag, cur_level: int, sub_list):
for li in tag:
if not isinstance(li, Tag):
continue
a: Tag = li.find('a')
ul: Union[Tag, None] = li.find('ul')
if ul:
new_sub_list = []
toc2json(ul, cur_level + 1, new_sub_list)
cur_obj = dict(t='h', d=cur_level, v=str(a), c=new_sub_list)
else:
cur_obj = dict(t='h', d=cur_level, v=str(a))
sub_list.append(cur_obj)
def create_markmap_html(json_string: str, output_file: Path):
import jinja2
markmap_template = jinja2.Template("""
<!DOCTYPE html>
<html lang=en>
<head>
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
<style>
.mindmap {
width: 100vw;
height: 100vh;
}
</style>
</head>
<body>
<main>
<svg id="mindmap-test" class="mindmap"></svg>
</main>
<script>
(
(e, json_data)=>{
const{Markmap:r}=e();
window.mm=r.create("svg#mindmap-test",null,json_data);
}
)(
()=>window.markmap,
{{ input_json }}
);
</script>
</body>
</html>
""")
html_string = markmap_template.render(dict(input_json=json_string))
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_string)
if __name__ == '__main__':
main()
为了这一点点努力,请帮帮我。
如有必要,我很乐意继续改进代码:)
class Parser {
constructor(htmlExpr){
this.result = {t: 'root', d: 0, v: "", c:[]};
this.data = htmlExpr.split('\n').map(row => row.trim()) // to lines ?
this.open = RegExp('^<(?<main>[^\/>]*)>') // some open tag
this.close = RegExp('^<\/(?<main>[^>]*)>') // some close tag
this.target = RegExp('(?<main><a[^>]*>[^<]*<\/a>)') // what we looking for
}
test(str){
let [o, c, v] = [ // all matches
this.open.exec(str),
this.close.exec(str),
this.target.exec(str)
];
// extract tagNames and value
if (o) o = o.groups.main
if (c) c = c.groups.main
if (v) v = v.groups.main
return [o, c, v];
}
parse(){
const parents = [];
let level = 0;
let lastNode = this.result;
let length = this.data.length;
for(let i = 0; i < length; i++){
const [o, c, v] = this.test(this.data[i])
if (o === 'ul') {
level +=1;
lastNode.c=[]
parents.push(lastNode.c)
} else if (c === 'ul') {
level -=1;
parents.pop()
}
if (v) {
lastNode = {t: 'h', d: level, v}; // template
parents[level - 1].push(lastNode) // insert to result
}
}
return this.result;
}
}
let htmlExpr = `<nav id="TableOfContents">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>`
const parser = new Parser(htmlExpr);
const test = parser.parse();
console.log(JSON.stringify(test, null, 2))
我发现我的问题描述可能不够准确,
导致某些人使用特殊的解决方案来解决问题。
严格来说,它们没有问题,但这不适用于我的其他一些示例(参见 id=toc-compress
和 id=toc-double_link
),
而且我自己找到了答案,可以满足我所有的需求,
请看如下
class Toc {
constructor(node_nav){
this.data = node_nav;
}
create_mind_map(svg_id, dict_data){
let e = ()=>window.markmap
const {Markmap:r} = e();
window.mm = r.create("svg#"+svg_id, null, dict_data)
}
_get_element(ul_node, c, cur_level){
let li_list = Array.prototype.slice.call(ul_node.childNodes).filter(node => node.nodeName === 'LI' )
li_list.forEach(li => {
const inner_a = li.firstElementChild;
const value = (()=>{
// If it contains two links (one is an internal link and the other is an external link, then the internal link is used as the primary link)
const inner_a_copy = inner_a.cloneNode(true); // avoid modifying the original innerText
const outer_a = ((RegExp('<a[^>]*>[^<]*<\/a><a[^>]*>[^<]*<\/a>').exec(li.innerHTML)) != null ?
Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'A' )[1] :
undefined
);
if (outer_a !== undefined) {
inner_a_copy.innerText = outer_a.innerText
}
return inner_a_copy.outerHTML;
})();
let ul = Array.prototype.slice.call(li.childNodes).filter(node => node.nodeName === 'UL' )
if (ul.length > 0){
let sub_list = [];
this._get_element(ul[0], sub_list, cur_level+1)
c.push({t: 'h', d: cur_level, v: value, c:sub_list})
}
else {
c.push({t: 'h', d: cur_level, v: value})
}
});
}
convert2dict(){
let root_ul = Array.prototype.slice.call(this.data.childNodes).filter(node => node instanceof HTMLUListElement)[0]
const sub_c = []
const result_dict = {t: 'root', d: 0, v: "", c:sub_c};
const level = 1
this._get_element(root_ul, sub_c, level);
// console.log(result_dict)
// console.log(JSON.stringify(result_dict, null, 2))
return result_dict
}
};
function getNode(n, v) {
/* */
n = document.createElementNS("http://www.w3.org/2000/svg", n);
for (var p in v)
n.setAttributeNS(null, p.replace(/[A-Z]/g, function(m, p, o, s) { return "-" + m.toLowerCase(); }), v[p]);
return n
};
(
()=>{
let nav_html_collection = document.getElementsByTagName('nav');
let idx = 0;
for (let node_nav of nav_html_collection){
const toc = new Toc(node_nav);
const dict_data = toc.convert2dict();
const id_name = 'mindmap' + idx.toString();
let node_svg = getNode("svg", {id: id_name});
node_nav.appendChild(node_svg)
toc.create_mind_map(id_name, dict_data)
idx += 1;
};
}
)();
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/markmap-view@0.2.0"></script>
<style>
.mindmap {
/*
width: 100vw;
height: 100vh;
*/
}
</style>
</head>
<body>
<nav id="toc-normal" class="mindmap">
<ul>
<li><a href="#js">JS</a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
<li><a href="#python">Python</a>
<ul>
<li><a href="#h3-1-1">H3-1</a></li>
<li><a href="#h3-2-1">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
<nav id="toc-compress"><ul><li><a href="#js">JS</a><ul><li><a href="#h3-1">H3-1</a></li><li><a href="#h3-2">H3-2</a></li></ul></li></ul></nav>
<nav id="toc-double_link">
<ul>
<li><a href="#js"><a href="https://www.w3schools.com/js/DEFAULT.asp">JS</a></a>
<ul>
<li><a href="#h3-1">H3-1</a></li>
<li><a href="#h3-2">H3-2</a></li>
</ul>
</li>
</ul>
</nav>
</body>
</html>