如何获取和替换字符串中两个字符串之间的文本? Asp核心
How can I get and replace texts between two strings in a string? Asp Core
我有一个 HTML 字符串。我需要更改字符串以添加指向所有 header 标签的链接。例如,
第一个HTML
<h1> Title 1 </h1>
<p>Lorem ipsum dolor...</p>
<h2> Title 2 </h2>
<h2> Title 2 Different </h2>
我想要的HTML
<div><a class="header_link" href="#my_slugged__link_by_title"> <h1> Title 1 </h1> </a> </div>
<p>Lorem ipsum dolor...</p>
<div><a class="header_link" href="#my_slugged__link_by_title_2"> <h2> Title 2 </h2> </a> </div>
<div><a class="header_link" href="#my_slugged__link_by_title_2_different"> <h2> Title 2 Different </h2> </a> </div>
**my_slugged__link_by_title** --> I would like to create hash permalinks by Titles. (h1, h2, ...)
比如newdescription就是我的HTML字符串。
//Replace titles for adding backlinks
newDescription = "<h2> TEST </h2> <h2> TEST 2</h2>";
这适用于一个字符串
var oneTitle = Regex.Match(newDescription, @"<h2> (.+?)</h2>").Groups[1].Value
如何替换并获取所有这些?
foreach (var item in Regex.Match(newDescription, @"<h2> (.+?)</h2>").Groups)
{
string header_link = "<div class=\"blog_header__backlink_item\"><a href=\"#" + item + "\"><i class=\"fas fa-link\"></i></a></div>";
newDescription = newDescription.Replace("<h2>", header_link + "<h2>");
}
您不应该尝试用字符串方法解析或替换 HTML,即使使用正则表达式,如果 HTML get 很复杂,此任务也太复杂了。使用 HtmlAgilityPack (Demo):
string html = @"<h1> Title 1 </h1>
<p>Lorem ipsum dolor...</p>
<h2> Title 2 </h2>
<h2> Title 2 Different </h2>";
string resultHtml = ReplaceHeaderHtml(html);
private static string ReplaceHeaderHtml(string html)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var xpath = "//*[self::h1 or self::h2 or self::h3 or self::h4]";
HtmlNodeCollection headers = doc.DocumentNode.SelectNodes(xpath);
if (headers == null || headers.Count == 0)
return html;
var headerList = headers
.Where(node => !"a".Equals(node.PreviousSibling?.OriginalName, StringComparison.InvariantCultureIgnoreCase))
.ToList();
if (!headerList.Any())
return html;
for(int i = 0; i < headerList.Count; i++)
{
var header = headerList[i];
var parentNode = header.ParentNode;
int headerIndex = parentNode.ChildNodes.IndexOf(header);
HtmlNode div = doc.CreateElement("div");
HtmlNode anchor = doc.CreateElement("a");
string href;
switch(header.OriginalName)
{
case "h1": href = "#my_slugged__link_by_title"; break;
case "h2": href = "#my_slugged__link_by_title_2"; break;
case "h3": href = "#my_slugged__link_by_title_3"; break;
default: href = "#my_slugged__link_by_title"; break;
}
anchor.Attributes.Add("class", "header_link");
anchor.Attributes.Add("href", href);
div.ChildNodes.Add(anchor);
div.ChildNodes.Add(header);
parentNode.ChildNodes.Remove(header);
parentNode.ChildNodes.Insert(headerIndex, div);
}
return doc.DocumentNode.OuterHtml;
}
我有一个 HTML 字符串。我需要更改字符串以添加指向所有 header 标签的链接。例如,
第一个HTML
<h1> Title 1 </h1>
<p>Lorem ipsum dolor...</p>
<h2> Title 2 </h2>
<h2> Title 2 Different </h2>
我想要的HTML
<div><a class="header_link" href="#my_slugged__link_by_title"> <h1> Title 1 </h1> </a> </div>
<p>Lorem ipsum dolor...</p>
<div><a class="header_link" href="#my_slugged__link_by_title_2"> <h2> Title 2 </h2> </a> </div>
<div><a class="header_link" href="#my_slugged__link_by_title_2_different"> <h2> Title 2 Different </h2> </a> </div>
**my_slugged__link_by_title** --> I would like to create hash permalinks by Titles. (h1, h2, ...)
比如newdescription就是我的HTML字符串。
//Replace titles for adding backlinks
newDescription = "<h2> TEST </h2> <h2> TEST 2</h2>";
这适用于一个字符串
var oneTitle = Regex.Match(newDescription, @"<h2> (.+?)</h2>").Groups[1].Value
如何替换并获取所有这些?
foreach (var item in Regex.Match(newDescription, @"<h2> (.+?)</h2>").Groups)
{
string header_link = "<div class=\"blog_header__backlink_item\"><a href=\"#" + item + "\"><i class=\"fas fa-link\"></i></a></div>";
newDescription = newDescription.Replace("<h2>", header_link + "<h2>");
}
您不应该尝试用字符串方法解析或替换 HTML,即使使用正则表达式,如果 HTML get 很复杂,此任务也太复杂了。使用 HtmlAgilityPack (Demo):
string html = @"<h1> Title 1 </h1>
<p>Lorem ipsum dolor...</p>
<h2> Title 2 </h2>
<h2> Title 2 Different </h2>";
string resultHtml = ReplaceHeaderHtml(html);
private static string ReplaceHeaderHtml(string html)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var xpath = "//*[self::h1 or self::h2 or self::h3 or self::h4]";
HtmlNodeCollection headers = doc.DocumentNode.SelectNodes(xpath);
if (headers == null || headers.Count == 0)
return html;
var headerList = headers
.Where(node => !"a".Equals(node.PreviousSibling?.OriginalName, StringComparison.InvariantCultureIgnoreCase))
.ToList();
if (!headerList.Any())
return html;
for(int i = 0; i < headerList.Count; i++)
{
var header = headerList[i];
var parentNode = header.ParentNode;
int headerIndex = parentNode.ChildNodes.IndexOf(header);
HtmlNode div = doc.CreateElement("div");
HtmlNode anchor = doc.CreateElement("a");
string href;
switch(header.OriginalName)
{
case "h1": href = "#my_slugged__link_by_title"; break;
case "h2": href = "#my_slugged__link_by_title_2"; break;
case "h3": href = "#my_slugged__link_by_title_3"; break;
default: href = "#my_slugged__link_by_title"; break;
}
anchor.Attributes.Add("class", "header_link");
anchor.Attributes.Add("href", href);
div.ChildNodes.Add(anchor);
div.ChildNodes.Add(header);
parentNode.ChildNodes.Remove(header);
parentNode.ChildNodes.Insert(headerIndex, div);
}
return doc.DocumentNode.OuterHtml;
}