如何使用 C# 将 html 标记替换为另一个字符串?
How to replace the an html tag with another string using c#?
我有一个 c# 代码可以读取 html 文件并且 return 它的内容为 string/text。
我需要做的一件事是解析 html 字符串,查找所有 <embed>
标记,获取 "src" 属性中的值,然后替换整个 <embed>
标签与在 src
标签中找到的文件内容。
我正在尝试使用 HtmlAgilityPack
来解析 html 代码。
我唯一不能做的是如何用另一个字符串替换 <embed>
标签,最后 return 没有 <embed>
标签的新字符串给用户.
这是我所做的
protected string ParseContent(string content)
{
if (content != null)
{
//Create a new document parser object
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
//load the content
document.LoadHtml(content);
//Get all embed tags
IEnumerable<HtmlNode> embedNodes = document.DocumentNode.Descendants("embed");
//Make sure the content contains at least one <embed> tag
if (embedNodes.Count() > 0)
{
// Outputs the href for external links
foreach (HtmlNode embedNode in embedNodes)
{
//Mak sure there is a source
if (embedNode.Attributes.Contains("src"))
{
//If the file ends with ".html"
if (embedNode.Attributes["src"].Value.EndsWith(".html"))
{
var newContent = GetContent(embedNode.Attributes["src"].Value);
//Here I need to be able to replace the entireembedNode with the newContent
}
}
}
}
return content;
}
return null;
}
protected string GetContent(string path)
{
if (System.IO.File.Exists(path))
{
//The file exists, read its content
return System.IO.File.ReadAllText(path);
}
return null;
}
如何用字符串替换 <embed>
标签?
我想你可以尝试获取当前节点的父节点 <embed>
然后替换父节点的子节点 <embed>
var newContent = GetContent(embedNode.Attributes["src"].Value);
var ParentNodeT =embedNode.ParentNode;
var newNodeTtext = "<p>"+newContent+"</p>";
var newNodeT = HtmlNode.CreateNode(newNodeStr);
ParentNodeT.ReplaceChild(newNodeT, embedNode);
我明白了。
感谢@COlD TOLD,他建议我将可枚举转换为列表
这是我所做的。
protected string ParseContent(string content)
{
if (content != null)
{
//Create a new document parser object
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
//load the content
document.LoadHtml(content);
//Get all embed tags
List<HtmlNode> embedNodes = document.DocumentNode.Descendants("embed").ToList();
//Make sure the content contains at least one <embed> tag
if (embedNodes.Count() > 0)
{
// Outputs the href for external links
foreach (HtmlNode embedNode in embedNodes)
{
//Mak sure there is a source
if (embedNode.Attributes.Contains("src"))
{
if (embedNode.Attributes["src"].Value.EndsWith(".html"))
{
//At this point we know that the source of the embed tag is set and it is an html file
//Get the full path
string embedPath = customBase + embedNode.Attributes["src"].Value;
//Get the
string newContent = GetContent(embedPath);
if (newContent != null)
{
//Create place holder div node
HtmlNode newNode = document.CreateElement("div");
//At this point we know the file exists, load it's content
newNode.InnerHtml = HtmlDocument.HtmlEncode(newContent);
//Here I need to be able to replace the entireembedNode with the newContent
document.DocumentNode.InsertAfter(newNode, embedNode);
//Remove the code after converting it
embedNode.Remove();
}
}
}
}
return document.DocumentNode.OuterHtml;
}
return content;
}
return null;
}
我有一个 c# 代码可以读取 html 文件并且 return 它的内容为 string/text。
我需要做的一件事是解析 html 字符串,查找所有 <embed>
标记,获取 "src" 属性中的值,然后替换整个 <embed>
标签与在 src
标签中找到的文件内容。
我正在尝试使用 HtmlAgilityPack
来解析 html 代码。
我唯一不能做的是如何用另一个字符串替换 <embed>
标签,最后 return 没有 <embed>
标签的新字符串给用户.
这是我所做的
protected string ParseContent(string content)
{
if (content != null)
{
//Create a new document parser object
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
//load the content
document.LoadHtml(content);
//Get all embed tags
IEnumerable<HtmlNode> embedNodes = document.DocumentNode.Descendants("embed");
//Make sure the content contains at least one <embed> tag
if (embedNodes.Count() > 0)
{
// Outputs the href for external links
foreach (HtmlNode embedNode in embedNodes)
{
//Mak sure there is a source
if (embedNode.Attributes.Contains("src"))
{
//If the file ends with ".html"
if (embedNode.Attributes["src"].Value.EndsWith(".html"))
{
var newContent = GetContent(embedNode.Attributes["src"].Value);
//Here I need to be able to replace the entireembedNode with the newContent
}
}
}
}
return content;
}
return null;
}
protected string GetContent(string path)
{
if (System.IO.File.Exists(path))
{
//The file exists, read its content
return System.IO.File.ReadAllText(path);
}
return null;
}
如何用字符串替换 <embed>
标签?
我想你可以尝试获取当前节点的父节点 <embed>
然后替换父节点的子节点 <embed>
var newContent = GetContent(embedNode.Attributes["src"].Value);
var ParentNodeT =embedNode.ParentNode;
var newNodeTtext = "<p>"+newContent+"</p>";
var newNodeT = HtmlNode.CreateNode(newNodeStr);
ParentNodeT.ReplaceChild(newNodeT, embedNode);
我明白了。 感谢@COlD TOLD,他建议我将可枚举转换为列表
这是我所做的。
protected string ParseContent(string content)
{
if (content != null)
{
//Create a new document parser object
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
//load the content
document.LoadHtml(content);
//Get all embed tags
List<HtmlNode> embedNodes = document.DocumentNode.Descendants("embed").ToList();
//Make sure the content contains at least one <embed> tag
if (embedNodes.Count() > 0)
{
// Outputs the href for external links
foreach (HtmlNode embedNode in embedNodes)
{
//Mak sure there is a source
if (embedNode.Attributes.Contains("src"))
{
if (embedNode.Attributes["src"].Value.EndsWith(".html"))
{
//At this point we know that the source of the embed tag is set and it is an html file
//Get the full path
string embedPath = customBase + embedNode.Attributes["src"].Value;
//Get the
string newContent = GetContent(embedPath);
if (newContent != null)
{
//Create place holder div node
HtmlNode newNode = document.CreateElement("div");
//At this point we know the file exists, load it's content
newNode.InnerHtml = HtmlDocument.HtmlEncode(newContent);
//Here I need to be able to replace the entireembedNode with the newContent
document.DocumentNode.InsertAfter(newNode, embedNode);
//Remove the code after converting it
embedNode.Remove();
}
}
}
}
return document.DocumentNode.OuterHtml;
}
return content;
}
return null;
}