c# 使用 ExCSS 解析 css
c# parse css using ExCSS
我刚才搜索了如何解析 css 并且我找到了 ExCSS https://github.com/TylerBrinks/ExCSS
我有一个 html 文件。我需要获取 csFC2BB1D1
的字体样式
即italic
为了阅读html。我使用 html 敏捷包。我使用 ExCSS 来解析 css
这是我的代码
HtmlDocument doc = new HtmlDocument();
doc.Load(htmlLocation);
var parser = new Parser();
var data = doc.DocumentNode.Descendants("style").FirstOrDefault();
var stylesheet = parser.Parse(data.OuterHtml);
foreach (var item in stylesheet.StyleRules
.Select(r => r.Selector)) {
Console.WriteLine(item);
}
循环语句的输出是
style type>.csC67CFA75
.cs3B0A1ABE
.cs6B2A4BAA
.cs7FB5C607
.csB0E2188C
.cs619CFE26
.cs80D9435B
.csE163F6C2
.cs5B41FA1C
.csC4CFBF3A
.csFC2BB1D1
.csC8468922
.cs21FA5D81
.cs95A8AE3D
.csCC736C83
.cs116BBDE0
.cs137E84BF
.cs6E4FDAEF
.cs92C3DA2B
.cs794D75A2
.csE6E4F8C4
为什么它还输出文本 style type>
而不仅仅是 selector
这是我的 html 文档
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>
</title>
<style type="text/css">
.csC67CFA75{text-align:center;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.cs3B0A1ABE{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.cs6B2A4BAA{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:6.5pt;font-weight:normal;font-style:normal;}
.cs7FB5C607{text-align:justify;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.csB0E2188C{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:bold;font-style:normal;}
.cs619CFE26{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:italic;}
.cs80D9435B{text-align:justify;text-indent:0pt;margin:0pt 0pt 0pt 0pt}
.csE163F6C2{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:bold;font-style:italic;}
.cs5B41FA1C{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:normal;}
.csC4CFBF3A{color:#000000;background-color:transparent;font-family:Courier New;font-size:9pt;font-weight:normal;font-style:normal;}
.csFC2BB1D1{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:20pt;font-weight:normal;font-style:italic;}
.csC8468922{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:11pt;font-weight:normal;font-style:italic;}
.cs21FA5D81{text-align:right;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.cs95A8AE3D{color:#FF0000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.csCC736C83{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:italic;}
.cs116BBDE0{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:7pt;font-weight:normal;font-style:normal;}
.cs137E84BF{text-align:justify;text-indent:0pt;margin:0pt 36pt 0pt 36pt}
.cs6E4FDAEF{color:#000000;background-color:#FFFF00;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.cs92C3DA2B{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;text-decoration: underline;}
.cs794D75A2{color:#FFFF00;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:normal;}
.csE6E4F8C4{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:8pt;font-weight:normal;font-style:normal;}
</style>
</head>
<body>
</body>
</html>
因为您传递的不仅是 CSS,还有父 style
标记到解析器。您应该使用 InnerHtml
而不是 OuterHtml
来仅传递 style
的内容。然后你可以通过如下方式获取目标字体样式:
// find among the CSS rules
var fontStyle = stylesheet.StyleRules
// rule for class `csFC2BB1D1`
.Where(r => r.Selector.ToString() == ".csFC2BB1D1")
// then select the value for `font-style`
.Select(o => o.Declarations.First(p => p.Name == "font-style").Term.ToString())
.First();
虽然在这种情况下我更喜欢查询语法:
var query = from rule in stylesheet.StyleRules
where rule.Selector.ToString() == ".csFC2BB1D1"
from declaration in rule.Declarations
where declaration.Name == "font-style"
select declaration.Term.ToString();
var fontStyle = query.FirstOrDefault();
我刚才搜索了如何解析 css 并且我找到了 ExCSS https://github.com/TylerBrinks/ExCSS
我有一个 html 文件。我需要获取 csFC2BB1D1
即italic
为了阅读html。我使用 html 敏捷包。我使用 ExCSS 来解析 css
这是我的代码
HtmlDocument doc = new HtmlDocument();
doc.Load(htmlLocation);
var parser = new Parser();
var data = doc.DocumentNode.Descendants("style").FirstOrDefault();
var stylesheet = parser.Parse(data.OuterHtml);
foreach (var item in stylesheet.StyleRules
.Select(r => r.Selector)) {
Console.WriteLine(item);
}
循环语句的输出是
style type>.csC67CFA75
.cs3B0A1ABE
.cs6B2A4BAA
.cs7FB5C607
.csB0E2188C
.cs619CFE26
.cs80D9435B
.csE163F6C2
.cs5B41FA1C
.csC4CFBF3A
.csFC2BB1D1
.csC8468922
.cs21FA5D81
.cs95A8AE3D
.csCC736C83
.cs116BBDE0
.cs137E84BF
.cs6E4FDAEF
.cs92C3DA2B
.cs794D75A2
.csE6E4F8C4
为什么它还输出文本 style type>
而不仅仅是 selector
这是我的 html 文档
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>
</title>
<style type="text/css">
.csC67CFA75{text-align:center;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.cs3B0A1ABE{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.cs6B2A4BAA{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:6.5pt;font-weight:normal;font-style:normal;}
.cs7FB5C607{text-align:justify;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.csB0E2188C{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:bold;font-style:normal;}
.cs619CFE26{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:italic;}
.cs80D9435B{text-align:justify;text-indent:0pt;margin:0pt 0pt 0pt 0pt}
.csE163F6C2{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:bold;font-style:italic;}
.cs5B41FA1C{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:normal;}
.csC4CFBF3A{color:#000000;background-color:transparent;font-family:Courier New;font-size:9pt;font-weight:normal;font-style:normal;}
.csFC2BB1D1{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:20pt;font-weight:normal;font-style:italic;}
.csC8468922{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:11pt;font-weight:normal;font-style:italic;}
.cs21FA5D81{text-align:right;text-indent:36pt;margin:0pt 0pt 0pt 0pt}
.cs95A8AE3D{color:#FF0000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.csCC736C83{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:italic;}
.cs116BBDE0{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:7pt;font-weight:normal;font-style:normal;}
.cs137E84BF{text-align:justify;text-indent:0pt;margin:0pt 36pt 0pt 36pt}
.cs6E4FDAEF{color:#000000;background-color:#FFFF00;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;}
.cs92C3DA2B{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:14pt;font-weight:normal;font-style:normal;text-decoration: underline;}
.cs794D75A2{color:#FFFF00;background-color:transparent;font-family:Times New Roman;font-size:9pt;font-weight:normal;font-style:normal;}
.csE6E4F8C4{color:#000000;background-color:transparent;font-family:Times New Roman;font-size:8pt;font-weight:normal;font-style:normal;}
</style>
</head>
<body>
</body>
</html>
因为您传递的不仅是 CSS,还有父 style
标记到解析器。您应该使用 InnerHtml
而不是 OuterHtml
来仅传递 style
的内容。然后你可以通过如下方式获取目标字体样式:
// find among the CSS rules
var fontStyle = stylesheet.StyleRules
// rule for class `csFC2BB1D1`
.Where(r => r.Selector.ToString() == ".csFC2BB1D1")
// then select the value for `font-style`
.Select(o => o.Declarations.First(p => p.Name == "font-style").Term.ToString())
.First();
虽然在这种情况下我更喜欢查询语法:
var query = from rule in stylesheet.StyleRules
where rule.Selector.ToString() == ".csFC2BB1D1"
from declaration in rule.Declarations
where declaration.Name == "font-style"
select declaration.Term.ToString();
var fontStyle = query.FirstOrDefault();