用于提取 HTML 标记子元素的正则表达式?
regex for extracting HTML tag child elements?
我在 HTML 字符串中有以下代码。
<h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3><h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3>
我想提取以下标签:
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
我写了以下正则表达式:
<h3[^>]+?>(.*)<\/h3>
但是它返回了错误的结果:
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3><h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
请帮我提取标签。
使用这个正则表达式:
<h3[^>]+?>([^$]+?)<\/h3>
此处示例:
你可以试试:
function getA(str) {
var regex = /<a\s+[\s\S]+?<\/a>/g;
while (found = regex.exec(str)) {
document.write(found[0] + '<br>');
}
}
var str = '<h3 class="large lheight20 margintop10">\n' +
'<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">\n' +
'<span>get the content</span>\n' +
'</a>\n' +
'\n' +
'</h3><h3 class="large lheight20 margintop10">\n' +
'<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">\n' +
'<span>get the content</span>\n' +
'</a>\n' +
'\n' +
'</h3>';
getA(str);
我在 HTML 字符串中有以下代码。
<h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3><h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3>
我想提取以下标签:
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
我写了以下正则表达式:
<h3[^>]+?>(.*)<\/h3>
但是它返回了错误的结果:
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
</h3><h3 class="large lheight20 margintop10">
<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">
<span>get the content</span>
</a>
请帮我提取标签。
使用这个正则表达式:
<h3[^>]+?>([^$]+?)<\/h3>
此处示例:
你可以试试:
function getA(str) {
var regex = /<a\s+[\s\S]+?<\/a>/g;
while (found = regex.exec(str)) {
document.write(found[0] + '<br>');
}
}
var str = '<h3 class="large lheight20 margintop10">\n' +
'<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">\n' +
'<span>get the content</span>\n' +
'</a>\n' +
'\n' +
'</h3><h3 class="large lheight20 margintop10">\n' +
'<a href="https://google.com" class="marginright5 link linkWithHash detailsLink">\n' +
'<span>get the content</span>\n' +
'</a>\n' +
'\n' +
'</h3>';
getA(str);