使用 C# htmlagilitypack 获得较短的文本预览版本 html
Using C# htmlagilitypack for a shorter preview version of text with html
我想用 htmlagilitypack 完成的事情是制作内容的较短版本,同时保持其 html。用于预览用途。
例如,假设我有 "1234567890<div></div>1234567890"
,但长度设置为最大 11。这应该给出 1234567890<div></div>1
的结果。
我现在不确定如何到达那里,因为仍然有效的 html 应该保留并且在检查具有 html 内容的文本的最大长度时被忽略. RegEx 不是一个有效的解决方案。
有什么想法吗?
您可以使用此方法 returns 一个新的 HtmlAgilityPack.HtmlDocument
:
public static HtmlAgilityPack.HtmlDocument GetPreview(HtmlAgilityPack.HtmlDocument orginal, int maxTextLength)
{
var docPreview = new HtmlAgilityPack.HtmlDocument();
docPreview.DocumentNode.CopyFrom(orginal.DocumentNode, false); // documentation bug in HtmlAgilityPack, false means deep-copy
string allText = docPreview.DocumentNode.InnerText;
int remainingDelete = allText.Length - maxTextLength;
if (remainingDelete <= 0)
return docPreview; // you are finished
// select only text nodes
HtmlNodeCollection allTextNodes = docPreview.DocumentNode.SelectNodes("//text()[normalize-space(.) != '']");
// iterate text nodes backwards
for (int i = allTextNodes.Count - 1; i >= 0; i--)
{
HtmlTextNode textNode = allTextNodes[i] as HtmlTextNode;
if (textNode == null) continue;
int length = remainingDelete >= textNode.Text.Length ? 0 : textNode.Text.Length - remainingDelete;
int removeLetterCount = textNode.Text.Length - length;
remainingDelete = remainingDelete - removeLetterCount;
textNode.Text = textNode.Text.Substring(0, length);
if (remainingDelete == 0)
break;
}
return docPreview;
}
您的样本:
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml("1234567890<div></div>1234567890");
HtmlAgilityPack.HtmlDocument docPreview = GetPreview(doc, 11);
Console.WriteLine(docPreview.DocumentNode.InnerHtml); // 1234567890<div></div>1
我想用 htmlagilitypack 完成的事情是制作内容的较短版本,同时保持其 html。用于预览用途。
例如,假设我有 "1234567890<div></div>1234567890"
,但长度设置为最大 11。这应该给出 1234567890<div></div>1
的结果。
我现在不确定如何到达那里,因为仍然有效的 html 应该保留并且在检查具有 html 内容的文本的最大长度时被忽略. RegEx 不是一个有效的解决方案。 有什么想法吗?
您可以使用此方法 returns 一个新的 HtmlAgilityPack.HtmlDocument
:
public static HtmlAgilityPack.HtmlDocument GetPreview(HtmlAgilityPack.HtmlDocument orginal, int maxTextLength)
{
var docPreview = new HtmlAgilityPack.HtmlDocument();
docPreview.DocumentNode.CopyFrom(orginal.DocumentNode, false); // documentation bug in HtmlAgilityPack, false means deep-copy
string allText = docPreview.DocumentNode.InnerText;
int remainingDelete = allText.Length - maxTextLength;
if (remainingDelete <= 0)
return docPreview; // you are finished
// select only text nodes
HtmlNodeCollection allTextNodes = docPreview.DocumentNode.SelectNodes("//text()[normalize-space(.) != '']");
// iterate text nodes backwards
for (int i = allTextNodes.Count - 1; i >= 0; i--)
{
HtmlTextNode textNode = allTextNodes[i] as HtmlTextNode;
if (textNode == null) continue;
int length = remainingDelete >= textNode.Text.Length ? 0 : textNode.Text.Length - remainingDelete;
int removeLetterCount = textNode.Text.Length - length;
remainingDelete = remainingDelete - removeLetterCount;
textNode.Text = textNode.Text.Substring(0, length);
if (remainingDelete == 0)
break;
}
return docPreview;
}
您的样本:
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml("1234567890<div></div>1234567890");
HtmlAgilityPack.HtmlDocument docPreview = GetPreview(doc, 11);
Console.WriteLine(docPreview.DocumentNode.InnerHtml); // 1234567890<div></div>1