如何以编程方式通过 KB 编号获取 Microsoft 知识库文章的标题?
How can I programmatically get the title of Microsoft knowledge base article by KB number?
我正在尝试开发一个 C# 程序,该程序将获取可用 Windows 更新的列表并查找知识库文章以检索每个更新的标题。 (否则,他们看起来都很神秘"Update for Windows Server (KBxxxxx)")
我尝试检索每篇知识库文章的 HTML,但 HTML 中没有标题(我猜他们正在使用 angular 构建页面)
这是一个例子:https://support.microsoft.com/en-us/kb/3102429
当我查看源
时,浏览器中显示的文章标题没有出现在HTML中的任何地方
有什么好的方法吗?
如果您可以通过某种方式从 Windows 更新中获取 KB 编号,那么应该可以通过以下 URL:
访问该文章
https://support.microsoft.com/en-us/kb/YOUR_KB_NUMBER
而 id="mt5"
似乎就是标题。
编辑:
我的错,id
确实改变了,<section>
的第一个 child 和 class="section kb-article spacer-84-top"
是标题,但是这可能会改变......(就这样吧:)
正如 canon 在 Aybe 回答的评论中所指定的那样,KB 页面在页面加载后通过脚本加载源代码,因此您无法通过编程方式轻松获取此内容。
但是您可以直接使用 API link
https://support.microsoft.com/app/content/api/content/help/en-us/4034733
对于 2017 年 8 月之后发布的修补程序,新的 API link 似乎是 https://support.microsoft.com/app/content/api/content/help/en-us/4034733。
对于 2017 年 2 月之后发布的修补程序,新的 API link 似乎是 https://support.microsoft.com/api/content/help/3115489。
例如,如果您使用 Python 加载 JSON 数据,那么您可以在 "details" 下找到标题和其他有用信息。特别是,
d["details"]["id"] == u'3115489'
d["details"]["title"] == u'February 7, 2017, update for Office 2013 (KB3115489)'
d["details"]["publishedOn"] == u'2017-02-07T17:05:19.000368Z'
仅供参考,从api/content/help加载URLhttps://support.microsoft.com/kb/3115489 in Chrome with Developer Tools running, the network activity shows an XHR传输时:
我发现他们现在将一些预取脚本放入包含一些有用 json 的初始负载中。 (实际上:这是b.mcewan在currently accepted answer中提到的json)。
因为我已准备好使用这些内容....这里是一些代码的 link,这些代码将收集您机器上已安装的修补程序并提供一些详细信息,包括 KB 标题。
代码将在 LINQPad 中 运行
http://share.linqpad.net/l6tdxc.linq
如果你不使用 LP,这里是例程。 ParseTitle 使用一些自动生成的 classes 来反序列化 json。您将需要删除 .Dump() 扩展方法调用和 Hyperlinq class 引用并以其他方式呈现数据。 (编辑: 不仅仅是知识库文章标题由 ArticleInfo class 暴露出来......比如有关修补程序功能的详细信息,如何获取和安装它等等)
void Main()
{
const string query = "SELECT HotFixID, InstalledOn, InstalledBy, Description, Caption, * FROM Win32_QuickFixEngineering";
var result =
(from ManagementObject quickfix in new ManagementObjectSearcher(query).Get() //.AsParallel()
orderby Convert.ToDateTime(quickfix["InstalledOn"]) descending
let web = new WebClient()
let input = quickfix["Caption"].ToString()
let id = input.Substring(35, input.Length - 35)
let url = $"{input.Replace("microsoft.com/?kbid=", "microsoft.com/en-us/help/")}/kb{id}"
let html = web.DownloadString(url)
where string.IsNullOrEmpty( html ).Equals(false)
let kbInfo = ParseInfo( url, html )
where kbInfo != null
let pub = kbInfo.Details.PublishedOn
let title = kbInfo.Details.Title
let desc = Util.OnDemand( "More....", () =>
Util.RawHtml(string.Join(Environment.NewLine,
kbInfo.Details.Body
.Select(i => $"<span class=typeglyphx>{i.Title}</span>{i.Content.Single()}")))
)
select
new
{
HotFixID = Util.RawHtml($"<span class=typeglyphx>{quickfix["HotFixID"].ToString()}</span>"),
Published = pub.Date,
InstalledOn = quickfix["InstalledOn"].ToString(),
InstallDelay = $"{Convert.ToInt16((Convert.ToDateTime(quickfix["InstalledOn"].ToString()).Date - pub.Date).TotalDays)} days",
InstalledBy = quickfix["InstalledBy"].ToString(),
Description = new Hyperlinq(quickfix["Description"].ToString()),
Title = Util.RawHtml($"<span class=typeglyphx>{title}</span>") ?? $"{url} [Could not obtain KB title]",
Body = desc,
Link = new Hyperlinq(url),
}
).Dump(1);
}
#nullable enable
string? ParseTitle ( string html )
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var meta = doc.DocumentNode
.SelectNodes("//script");
var searchToken = "microsoft.support.prefetchedArticle = (function() ";
var nuggets = meta
.Where(i => i.OuterHtml.Contains(searchToken))
.Select(i => i.OuterHtml)
.Single();
var start = nuggets.IndexOf(":") + 1;
var length = nuggets.Length - start - 28;
var json = nuggets.Substring(start, length);
string? ret = null;
try
{
var articleInfo = MSKBPreFetched.ArticleInfo.FromJson(json);
ret = articleInfo.Details.Title;
}
catch{ json.DumpTrace("could not deserialize the json for this article"); // LP only}
return ret;
}
#nullable disable
// <auto-generated />
// json2csharp
// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do:
//
// using MSKBPreFetched;
//
// var articleInfo = ArticleInfo.FromJson(jsonString);
namespace MSKBPreFetched
{
using System;
using System.Collections.Generic;
using System.Globalization;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
public partial class ArticleInfo
{
[JsonProperty("sideNav")]
//[JsonConverter(typeof(ParseStringConverter))]
public string SideNav { get; set; }
[JsonProperty("details")]
public Details Details { get; set; }
[JsonProperty("_ts")]
public long Ts { get; set; }
}
public partial class Details
{
[JsonProperty("subType")]
public string SubType { get; set; }
[JsonProperty("heading")]
public string Heading { get; set; }
[JsonProperty("description")]
public string Description { get; set; }
[JsonProperty("body")]
public List<Body> Body { get; set; }
[JsonProperty("urltitle")]
public string Urltitle { get; set; }
[JsonProperty("keywords")]
public List<string> Keywords { get; set; }
[JsonProperty("keywordsLower")]
public List<string> KeywordsLower { get; set; }
[JsonProperty("os")]
public List<object> Os { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("id")]
[JsonConverter(typeof(ParseStringConverter))]
public long Id { get; set; }
[JsonProperty("locale")]
public string Locale { get; set; }
[JsonProperty("title")]
public string Title { get; set; }
[JsonProperty("titleLower")]
public string TitleLower { get; set; }
[JsonProperty("published")]
public bool Published { get; set; }
[JsonProperty("createdOn")]
public DateTimeOffset CreatedOn { get; set; }
[JsonProperty("publishedOn")]
public DateTimeOffset PublishedOn { get; set; }
[JsonProperty("version")]
public long Version { get; set; }
[JsonProperty("eolProject")]
public string EolProject { get; set; }
[JsonProperty("supportAreaPaths")]
public List<Guid> SupportAreaPaths { get; set; }
[JsonProperty("supportAreaPathNodes")]
public List<PrimarySupportAreaPath> SupportAreaPathNodes { get; set; }
[JsonProperty("disableVAPopup")]
public bool DisableVaPopup { get; set; }
[JsonProperty("primarySupportAreaPath")]
public List<PrimarySupportAreaPath> PrimarySupportAreaPath { get; set; }
[JsonProperty("isContentLocaleFallback")]
public bool IsContentLocaleFallback { get; set; }
[JsonProperty("contentLocale")]
public string ContentLocale { get; set; }
}
public partial class Body
{
[JsonProperty("meta")]
public Meta Meta { get; set; }
[JsonProperty("title")]
public string Title { get; set; }
[JsonProperty("content")]
public List<string> Content { get; set; }
}
public partial class Meta
{
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("products")]
public List<object> Products { get; set; }
[JsonProperty("supportAreaPaths")]
public List<object> SupportAreaPaths { get; set; }
[JsonProperty("isInternalContent")]
public bool IsInternalContent { get; set; }
[JsonProperty("id")]
public string Id { get; set; }
}
public partial class PrimarySupportAreaPath
{
[JsonProperty("id")]
public Guid Id { get; set; }
[JsonProperty("parent", NullValueHandling = NullValueHandling.Ignore)]
public Guid? Parent { get; set; }
[JsonProperty("name")]
public string Name { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("tree")]
public List<object> Tree { get; set; }
}
public partial class ArticleInfo
{
public static ArticleInfo FromJson(string json) => JsonConvert.DeserializeObject<ArticleInfo>(json, MSKBPreFetched.Converter.Settings);
}
public static class Serialize
{
public static string ToJson(this ArticleInfo self) => JsonConvert.SerializeObject(self, MSKBPreFetched.Converter.Settings);
}
internal static class Converter
{
public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings
{
MetadataPropertyHandling = MetadataPropertyHandling.Ignore,
DateParseHandling = DateParseHandling.None,
Converters =
{
new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal }
},
};
}
internal class ParseStringConverter : JsonConverter
{
public override bool CanConvert(Type t) => t == typeof(long) || t == typeof(long?);
public override object ReadJson(JsonReader reader, Type t, object existingValue, JsonSerializer serializer)
{
if (reader.TokenType == JsonToken.Null) return null;
var value = serializer.Deserialize<string>(reader);
long l;
if (Int64.TryParse(value, out l))
{
return l;
}
throw new Exception("Cannot unmarshal type long");
}
public override void WriteJson(JsonWriter writer, object untypedValue, JsonSerializer serializer)
{
if (untypedValue == null)
{
serializer.Serialize(writer, null);
return;
}
var value = (long)untypedValue;
serializer.Serialize(writer, value.ToString());
return;
}
public static readonly ParseStringConverter Singleton = new ParseStringConverter();
}
}
我正在尝试开发一个 C# 程序,该程序将获取可用 Windows 更新的列表并查找知识库文章以检索每个更新的标题。 (否则,他们看起来都很神秘"Update for Windows Server (KBxxxxx)")
我尝试检索每篇知识库文章的 HTML,但 HTML 中没有标题(我猜他们正在使用 angular 构建页面)
这是一个例子:https://support.microsoft.com/en-us/kb/3102429 当我查看源
时,浏览器中显示的文章标题没有出现在HTML中的任何地方有什么好的方法吗?
如果您可以通过某种方式从 Windows 更新中获取 KB 编号,那么应该可以通过以下 URL:
访问该文章https://support.microsoft.com/en-us/kb/YOUR_KB_NUMBER
而 id="mt5"
似乎就是标题。
编辑:
我的错,id
确实改变了,<section>
的第一个 child 和 class="section kb-article spacer-84-top"
是标题,但是这可能会改变......(就这样吧:)
正如 canon 在 Aybe 回答的评论中所指定的那样,KB 页面在页面加载后通过脚本加载源代码,因此您无法通过编程方式轻松获取此内容。
但是您可以直接使用 API link https://support.microsoft.com/app/content/api/content/help/en-us/4034733
对于 2017 年 8 月之后发布的修补程序,新的 API link 似乎是 https://support.microsoft.com/app/content/api/content/help/en-us/4034733。
对于 2017 年 2 月之后发布的修补程序,新的 API link 似乎是 https://support.microsoft.com/api/content/help/3115489。
例如,如果您使用 Python 加载 JSON 数据,那么您可以在 "details" 下找到标题和其他有用信息。特别是,
d["details"]["id"] == u'3115489'
d["details"]["title"] == u'February 7, 2017, update for Office 2013 (KB3115489)'
d["details"]["publishedOn"] == u'2017-02-07T17:05:19.000368Z'
仅供参考,从api/content/help加载URLhttps://support.microsoft.com/kb/3115489 in Chrome with Developer Tools running, the network activity shows an XHR传输时:
我发现他们现在将一些预取脚本放入包含一些有用 json 的初始负载中。 (实际上:这是b.mcewan在currently accepted answer中提到的json)。
因为我已准备好使用这些内容....这里是一些代码的 link,这些代码将收集您机器上已安装的修补程序并提供一些详细信息,包括 KB 标题。
代码将在 LINQPad 中 运行 http://share.linqpad.net/l6tdxc.linq
如果你不使用 LP,这里是例程。 ParseTitle 使用一些自动生成的 classes 来反序列化 json。您将需要删除 .Dump() 扩展方法调用和 Hyperlinq class 引用并以其他方式呈现数据。 (编辑: 不仅仅是知识库文章标题由 ArticleInfo class 暴露出来......比如有关修补程序功能的详细信息,如何获取和安装它等等)
void Main()
{
const string query = "SELECT HotFixID, InstalledOn, InstalledBy, Description, Caption, * FROM Win32_QuickFixEngineering";
var result =
(from ManagementObject quickfix in new ManagementObjectSearcher(query).Get() //.AsParallel()
orderby Convert.ToDateTime(quickfix["InstalledOn"]) descending
let web = new WebClient()
let input = quickfix["Caption"].ToString()
let id = input.Substring(35, input.Length - 35)
let url = $"{input.Replace("microsoft.com/?kbid=", "microsoft.com/en-us/help/")}/kb{id}"
let html = web.DownloadString(url)
where string.IsNullOrEmpty( html ).Equals(false)
let kbInfo = ParseInfo( url, html )
where kbInfo != null
let pub = kbInfo.Details.PublishedOn
let title = kbInfo.Details.Title
let desc = Util.OnDemand( "More....", () =>
Util.RawHtml(string.Join(Environment.NewLine,
kbInfo.Details.Body
.Select(i => $"<span class=typeglyphx>{i.Title}</span>{i.Content.Single()}")))
)
select
new
{
HotFixID = Util.RawHtml($"<span class=typeglyphx>{quickfix["HotFixID"].ToString()}</span>"),
Published = pub.Date,
InstalledOn = quickfix["InstalledOn"].ToString(),
InstallDelay = $"{Convert.ToInt16((Convert.ToDateTime(quickfix["InstalledOn"].ToString()).Date - pub.Date).TotalDays)} days",
InstalledBy = quickfix["InstalledBy"].ToString(),
Description = new Hyperlinq(quickfix["Description"].ToString()),
Title = Util.RawHtml($"<span class=typeglyphx>{title}</span>") ?? $"{url} [Could not obtain KB title]",
Body = desc,
Link = new Hyperlinq(url),
}
).Dump(1);
}
#nullable enable
string? ParseTitle ( string html )
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var meta = doc.DocumentNode
.SelectNodes("//script");
var searchToken = "microsoft.support.prefetchedArticle = (function() ";
var nuggets = meta
.Where(i => i.OuterHtml.Contains(searchToken))
.Select(i => i.OuterHtml)
.Single();
var start = nuggets.IndexOf(":") + 1;
var length = nuggets.Length - start - 28;
var json = nuggets.Substring(start, length);
string? ret = null;
try
{
var articleInfo = MSKBPreFetched.ArticleInfo.FromJson(json);
ret = articleInfo.Details.Title;
}
catch{ json.DumpTrace("could not deserialize the json for this article"); // LP only}
return ret;
}
#nullable disable
// <auto-generated />
// json2csharp
// To parse this JSON data, add NuGet 'Newtonsoft.Json' then do:
//
// using MSKBPreFetched;
//
// var articleInfo = ArticleInfo.FromJson(jsonString);
namespace MSKBPreFetched
{
using System;
using System.Collections.Generic;
using System.Globalization;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
public partial class ArticleInfo
{
[JsonProperty("sideNav")]
//[JsonConverter(typeof(ParseStringConverter))]
public string SideNav { get; set; }
[JsonProperty("details")]
public Details Details { get; set; }
[JsonProperty("_ts")]
public long Ts { get; set; }
}
public partial class Details
{
[JsonProperty("subType")]
public string SubType { get; set; }
[JsonProperty("heading")]
public string Heading { get; set; }
[JsonProperty("description")]
public string Description { get; set; }
[JsonProperty("body")]
public List<Body> Body { get; set; }
[JsonProperty("urltitle")]
public string Urltitle { get; set; }
[JsonProperty("keywords")]
public List<string> Keywords { get; set; }
[JsonProperty("keywordsLower")]
public List<string> KeywordsLower { get; set; }
[JsonProperty("os")]
public List<object> Os { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("id")]
[JsonConverter(typeof(ParseStringConverter))]
public long Id { get; set; }
[JsonProperty("locale")]
public string Locale { get; set; }
[JsonProperty("title")]
public string Title { get; set; }
[JsonProperty("titleLower")]
public string TitleLower { get; set; }
[JsonProperty("published")]
public bool Published { get; set; }
[JsonProperty("createdOn")]
public DateTimeOffset CreatedOn { get; set; }
[JsonProperty("publishedOn")]
public DateTimeOffset PublishedOn { get; set; }
[JsonProperty("version")]
public long Version { get; set; }
[JsonProperty("eolProject")]
public string EolProject { get; set; }
[JsonProperty("supportAreaPaths")]
public List<Guid> SupportAreaPaths { get; set; }
[JsonProperty("supportAreaPathNodes")]
public List<PrimarySupportAreaPath> SupportAreaPathNodes { get; set; }
[JsonProperty("disableVAPopup")]
public bool DisableVaPopup { get; set; }
[JsonProperty("primarySupportAreaPath")]
public List<PrimarySupportAreaPath> PrimarySupportAreaPath { get; set; }
[JsonProperty("isContentLocaleFallback")]
public bool IsContentLocaleFallback { get; set; }
[JsonProperty("contentLocale")]
public string ContentLocale { get; set; }
}
public partial class Body
{
[JsonProperty("meta")]
public Meta Meta { get; set; }
[JsonProperty("title")]
public string Title { get; set; }
[JsonProperty("content")]
public List<string> Content { get; set; }
}
public partial class Meta
{
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("products")]
public List<object> Products { get; set; }
[JsonProperty("supportAreaPaths")]
public List<object> SupportAreaPaths { get; set; }
[JsonProperty("isInternalContent")]
public bool IsInternalContent { get; set; }
[JsonProperty("id")]
public string Id { get; set; }
}
public partial class PrimarySupportAreaPath
{
[JsonProperty("id")]
public Guid Id { get; set; }
[JsonProperty("parent", NullValueHandling = NullValueHandling.Ignore)]
public Guid? Parent { get; set; }
[JsonProperty("name")]
public string Name { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("tree")]
public List<object> Tree { get; set; }
}
public partial class ArticleInfo
{
public static ArticleInfo FromJson(string json) => JsonConvert.DeserializeObject<ArticleInfo>(json, MSKBPreFetched.Converter.Settings);
}
public static class Serialize
{
public static string ToJson(this ArticleInfo self) => JsonConvert.SerializeObject(self, MSKBPreFetched.Converter.Settings);
}
internal static class Converter
{
public static readonly JsonSerializerSettings Settings = new JsonSerializerSettings
{
MetadataPropertyHandling = MetadataPropertyHandling.Ignore,
DateParseHandling = DateParseHandling.None,
Converters =
{
new IsoDateTimeConverter { DateTimeStyles = DateTimeStyles.AssumeUniversal }
},
};
}
internal class ParseStringConverter : JsonConverter
{
public override bool CanConvert(Type t) => t == typeof(long) || t == typeof(long?);
public override object ReadJson(JsonReader reader, Type t, object existingValue, JsonSerializer serializer)
{
if (reader.TokenType == JsonToken.Null) return null;
var value = serializer.Deserialize<string>(reader);
long l;
if (Int64.TryParse(value, out l))
{
return l;
}
throw new Exception("Cannot unmarshal type long");
}
public override void WriteJson(JsonWriter writer, object untypedValue, JsonSerializer serializer)
{
if (untypedValue == null)
{
serializer.Serialize(writer, null);
return;
}
var value = (long)untypedValue;
serializer.Serialize(writer, value.ToString());
return;
}
public static readonly ParseStringConverter Singleton = new ParseStringConverter();
}
}