Select javascript 中几种不同类型标签之间的文本,使用正则表达式

Select text between several different kinds of tags in javascript by using regex

我正在尝试在 HTML、CSS 和 Javascript 中编写一些代码。 我在使用正则表达式时遇到了一些问题。

让我举一个简单的例子来解释我的问题,因为我找不到解决方案。

<script>
var str = "I am <b>a tennis player</b> but  I like also playing <i>football</i> and <i>rugby</i>, I am  <b>34</b> years old, I like <u>cooking</u> even if there is nothing in common with <i>tennis</i>, <i>football</i> or <i>rugby</i>.";

var result = str.match(/<b>(.*?)<\/b>/g).map(function(val){
   return val.replace(/<\/?b>/g,'');
});

alert(result)

</script>

您可能已经猜到了,我正在寻找 select 标记 <b></b>,<i></i>,<u></u> 之间的所有文本。更清楚地说,我希望能够 select "a tennis player", "football", "rubgy", "34", "cooking”等

目前,我只处理了一个标签。当我尝试几个时,我失败了。我没有正则表达式方面的经验(我没有在这个领域学习和工作),我在互联网上找到的课程也没有回答我的问题。我认为组合三个正则表达式并不困难,但我迷失了 clast、AND 或 OR 等。:/

查看下面的代码:

var str = "I am <b>a tennis player</b> but  I like also playing <i>football</i> and <i>rugby</i>, I am  <b>34</b> years old, I like <u>cooking</u> even if there is nothing in common with <i>tennis</i>, <i>football</i> or <i>rugby</i>.";

var result = str.match(/<(b|i|u)>(.*?)<\/>/g).map(function(val){
   return val.replace(/<\/?b>|<\/?i>|<\/?u>/g,'');
});

alert(result)

您可以使用以下正则表达式来提取元素的 innerText。

/<([biu])>(.*?)<\/>/gi

解释:

  1. <([biu])>:匹配 < 后跟 b/i/u>。也可以写成 <(b|i|u)> 并将 tagName 放在第一个捕获的组中。
  2. (.*?): Non-greedy 匹配。匹配尽可能多的字符以满足条件
  3. <\/>:匹配 </ 后跟第一个捕获组(参见上面的#1)后跟 >。从而匹配结束标记。
  4. gi: g: 匹配所有可能结果的全局标志。 i: Case-insensitive 匹配。

var str = "I am <b>a tennis player</b> but  I like also playing <i>football</i> and <i>rugby</i>, I am  <b>34</b> years old, I like <u>cooking</u> even if there is nothing in common with <i>tennis</i>, <i>football</i> or <i>rugby</i>.";

var regex = /<([biu])>(.*?)<\/>/gi,
    result = [];

while (match = regex.exec(str)) {
    result.push(match[2]);
}

console.log(result);
document.body.innerHTML = '<pre>' + JSON.stringify(result, 0, 4) + '</pre>';


您也可以使用jQuery。

var str = "I am <b>a tennis player</b> but  I like also playing <i>football</i> and <i>rugby</i>, I am  <b>34</b> years old, I like <u>cooking</u> even if there is nothing in common with <i>tennis</i>, <i>football</i> or <i>rugby</i>.";

var result = [];

$('<div/>').html(str).find('b, i, u').each(function(i, e) {
    result.push(e.innerText);
});
console.log(result);
$('body').html('<pre>' + JSON.stringify(result, 0, 4) + '</pre>');
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js"></script>

ubi 标签获取所有文本可以使用纯 JS DOM 解析器轻松实现:

function getTagTexts(str, tag) {
  var el = document.createElement( 'html' ); // create an empty element
  el.innerHTML = '<faketag>' + str + '</faketag>';  // init the innerHTML property of the element
  var arr = [];  // declare the array for the results
  [].forEach.call(el.getElementsByTagName(tag), function(v,i,a) { // iterate through the tags we want
      arr.push(v.innerText);  // and add the innerText property to the array
  });
  return arr;
}

var txt = "I am <b>a tennis player</b> but  I like also playing <i>football</i> and <i>rugby</i>, I am  <b>34</b> years old, I like <u>cooking</u> even if there is nothing in common with <i>tennis</i>, <i>football</i> or <i>rugby</i>.";

var arrayI = getTagTexts(txt, "i");
var arrayU = getTagTexts(txt, "u");
var arrayB = getTagTexts(txt, "b");
document.body.innerHTML += JSON.stringify(arrayI, 0, 4) + "<br/>"; // => ["football", "rugby", "tennis", "football", "rugby"]
document.body.innerHTML += JSON.stringify(arrayU, 0, 4) + "<br/>"; // => ["cooking"]
document.body.innerHTML += JSON.stringify(arrayB, 0, 4); // => ["a tennis player", "34"]

请注意,如果您需要解析没有 html/body 标签的 HTML 片段,则 faketag 是必需的。