如何通过抓取某些内容页面来获取 Twitch 上主播的名字
How to get the name of a streamer on Twitch by scraping certain content pages
我想得到一个随机的抽搐流光。
所以我认为解析“http://twitch.tv/directory/random”html 是最简单的,但不幸的是这些元素隐藏在其他地方。
在 C# 中处理此问题的最佳方法是什么?
希望你能帮助我,谢谢!
编辑:我的主要问题是在源代码中 html 你看不到 twitch 上显示的流媒体的名称。tv/directory/random 是什么。因此,在无法下载 url 的情况下,我需要另一种方式来获取该网站上的数据。所以我正在寻找的是一种 C# 方式的网络抓取 twitch。tv/directory/random 以获取该类别中顶级流媒体的名称。
您正在尝试做的是 web scraping。通常服务供应商不理解这一点。
我发现这个 Twitch-API on github though which might help you to accomplish your goal. If you want to use the API with C# I would recommend RestSharp (also on nuget) 用于实现对 API.
的 HTTP 调用
在快速浏览了 API 公开的方法后,这个方法可能会有所帮助:
它returns一个JSON对象如下:
{
"channels": [
{
"mature": false,
"status": "test status",
"broadcaster_language": "en",
"display_name": "test_channel",
"game": "StarCraft II: Heart of the Swarm",
"delay": 0,
"language": "en",
"_id": 12345,
"name": "test_channel",
"created_at": "2007-05-22T10:39:54Z",
"updated_at": "2015-02-12T04:15:49Z",
"logo": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_image-94a42b3a13c31c02-300x300.jpeg",
"banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_header_image-08dd874c17f39837-640x125.png",
"video_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_offline_image-b314c834d210dc1a-640x360.png",
"background": null,
"profile_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_banner-6936c61353e4aeed-480.png",
"profile_banner_background_color": "null",
"partner": true,
"url": "http://www.twitch.tv/test_channel",
"views": 49144894,
"followers": 215780,
"_links": {
"self": "https://api.twitch.tv/kraken/channels/test_channel",
"follows": "https://api.twitch.tv/kraken/channels/test_channel/follows",
"commercial": "https://api.twitch.tv/kraken/channels/test_channel/commercial",
"stream_key": "https://api.twitch.tv/kraken/channels/test_channel/stream_key",
"chat": "https://api.twitch.tv/kraken/chat/test_channel",
"features": "https://api.twitch.tv/kraken/channels/test_channel/features",
"subscriptions": "https://api.twitch.tv/kraken/channels/test_channel/subscriptions",
"editors": "https://api.twitch.tv/kraken/channels/test_channel/editors",
"teams": "https://api.twitch.tv/kraken/channels/test_channel/teams",
"videos": "https://api.twitch.tv/kraken/channels/test_channel/videos"
}
},
],
"_total": 42679,
"_links": {
"self": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=0&q=starcraft",
"next": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=10&q=starcraft"
}
}
现在使用 json2csharp 我们可以将其变成 class 层次结构:
public class Links
{
public string self { get; set; }
public string follows { get; set; }
public string commercial { get; set; }
public string stream_key { get; set; }
public string chat { get; set; }
public string features { get; set; }
public string subscriptions { get; set; }
public string editors { get; set; }
public string teams { get; set; }
public string videos { get; set; }
}
public class Channel
{
public bool mature { get; set; }
public string status { get; set; }
public string broadcaster_language { get; set; }
public string display_name { get; set; }
public string game { get; set; }
public int delay { get; set; }
public string language { get; set; }
public int _id { get; set; }
public string name { get; set; }
public string created_at { get; set; }
public string updated_at { get; set; }
public string logo { get; set; }
public string banner { get; set; }
public string video_banner { get; set; }
public object background { get; set; }
public string profile_banner { get; set; }
public string profile_banner_background_color { get; set; }
public bool partner { get; set; }
public string url { get; set; }
public int views { get; set; }
public int followers { get; set; }
public Links _links { get; set; }
}
public class Links2
{
public string self { get; set; }
public string next { get; set; }
}
public class Channels
{
public List<Channel> channels { get; set; }
public int _total { get; set; }
public Links2 _links { get; set; }
}
安装 RestSharp 和 Json.NET 之后,对该特定端点的调用可能如下所示:
public class TwitchSampleImplementation
{
private IList<RestResponseCookie> _cookies;
public void AuthenticateAndGetChannels()
{
// this is where your login credentials go which can be acquired here
// => https://github.com/justintv/Twitch-API/blob/master/authentication.md#developer-setup
Authenticate("yourClientId", "thatUrl", null, "thatState");
var channels = GetChannel("popularChannel");
Console.WriteLine(String.Format("Channels: ", channels.channels.Select(c =>
c.display_name).Aggregate("", (a, b) => a + b + ",").TrimEnd(',')));
}
public void Authenticate(string clientId,
string registeredRedirectURI, List<string> scopes, string state)
{
// reference: https://github.com/justintv/Twitch-API/blob/master/authentication.md
var login = Tuple.Create("client_id", clientId);
var redirectURI = Tuple.Create("redirect_uri", registeredRedirectURI);
var theScope = Tuple.Create("scope", scopes.Aggregate("", (a, b) => a + b + ",").TrimEnd(','));
var theState = Tuple.Create("state", state);
// reference: baseUrl => https://github.com/justintv/Twitch-API#formats
var client = new RestClient("https://api.twitch.tv/kraken/");
var request = new RestRequest("oauth2/authorize", Method.POST); // try Method.GET if that doesn't work
var type = ParameterType.GetOrPost;
var paramList = new List<Parameter>()
{
new Parameter {Name = login.Item1, Value = login.Item2, Type = type},
new Parameter {Name = redirectURI.Item1, Value = redirectURI.Item2, Type = type},
new Parameter {Name = theScope.Item1, Value = theScope.Item2, Type = type},
new Parameter {Name = theState.Item1, Value = theState.Item2, Type = type}
};
paramList.ForEach(p => request.AddParameter(p));
// reference: https://github.com/justintv/Twitch-API#api-versions-and-mime-types
request.RequestFormat = DataFormat.Json;
var response = client.Execute(request);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
_cookies = _cookies ?? new List<RestResponseCookie>();
response.Cookies.ToList().ForEach(c => _cookies.Add(c));
}
else
{
handleException(response);
}
}
public Channels GetChannel(string searchTerm)
{
var client = new RestClient(@"https://api.twitch.tv/kraken");
// reference: https://github.com/justintv/Twitch-API/blob/master/v3_resources/search.md#get-searchchannels
// reference: http://restsharp.org/
var request = new RestRequest(String.Format(@"search/channels?q={0}", searchTerm));
_cookies.ToList().ForEach(c => request.AddCookie(c.Name, c.Value));
var response = client.Execute(request);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
return JsonConvert.DeserializeObject<Channels>(response.Content);
}
handleException(response);
return null;
}
private void handleException(IRestResponse response)
{
throw new HttpRequestException(String.Format("Exception '{0}' with status code '{1}' occurred.",
response.Content, Enum.GetName(typeof(HttpStatusCode), response.StatusCode)));
}
}
我还没有尝试调用该代码,但它应该可以帮助您解决问题。
否则执行 google search for twitch scrapers 也可能会产生预期的结果。狩猎愉快,好运。希望这可以帮助。 ;)
我想得到一个随机的抽搐流光。 所以我认为解析“http://twitch.tv/directory/random”html 是最简单的,但不幸的是这些元素隐藏在其他地方。 在 C# 中处理此问题的最佳方法是什么?
希望你能帮助我,谢谢!
编辑:我的主要问题是在源代码中 html 你看不到 twitch 上显示的流媒体的名称。tv/directory/random 是什么。因此,在无法下载 url 的情况下,我需要另一种方式来获取该网站上的数据。所以我正在寻找的是一种 C# 方式的网络抓取 twitch。tv/directory/random 以获取该类别中顶级流媒体的名称。
您正在尝试做的是 web scraping。通常服务供应商不理解这一点。
我发现这个 Twitch-API on github though which might help you to accomplish your goal. If you want to use the API with C# I would recommend RestSharp (also on nuget) 用于实现对 API.
的 HTTP 调用在快速浏览了 API 公开的方法后,这个方法可能会有所帮助:
它returns一个JSON对象如下:
{
"channels": [
{
"mature": false,
"status": "test status",
"broadcaster_language": "en",
"display_name": "test_channel",
"game": "StarCraft II: Heart of the Swarm",
"delay": 0,
"language": "en",
"_id": 12345,
"name": "test_channel",
"created_at": "2007-05-22T10:39:54Z",
"updated_at": "2015-02-12T04:15:49Z",
"logo": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_image-94a42b3a13c31c02-300x300.jpeg",
"banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_header_image-08dd874c17f39837-640x125.png",
"video_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_offline_image-b314c834d210dc1a-640x360.png",
"background": null,
"profile_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_banner-6936c61353e4aeed-480.png",
"profile_banner_background_color": "null",
"partner": true,
"url": "http://www.twitch.tv/test_channel",
"views": 49144894,
"followers": 215780,
"_links": {
"self": "https://api.twitch.tv/kraken/channels/test_channel",
"follows": "https://api.twitch.tv/kraken/channels/test_channel/follows",
"commercial": "https://api.twitch.tv/kraken/channels/test_channel/commercial",
"stream_key": "https://api.twitch.tv/kraken/channels/test_channel/stream_key",
"chat": "https://api.twitch.tv/kraken/chat/test_channel",
"features": "https://api.twitch.tv/kraken/channels/test_channel/features",
"subscriptions": "https://api.twitch.tv/kraken/channels/test_channel/subscriptions",
"editors": "https://api.twitch.tv/kraken/channels/test_channel/editors",
"teams": "https://api.twitch.tv/kraken/channels/test_channel/teams",
"videos": "https://api.twitch.tv/kraken/channels/test_channel/videos"
}
},
],
"_total": 42679,
"_links": {
"self": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=0&q=starcraft",
"next": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=10&q=starcraft"
}
}
现在使用 json2csharp 我们可以将其变成 class 层次结构:
public class Links
{
public string self { get; set; }
public string follows { get; set; }
public string commercial { get; set; }
public string stream_key { get; set; }
public string chat { get; set; }
public string features { get; set; }
public string subscriptions { get; set; }
public string editors { get; set; }
public string teams { get; set; }
public string videos { get; set; }
}
public class Channel
{
public bool mature { get; set; }
public string status { get; set; }
public string broadcaster_language { get; set; }
public string display_name { get; set; }
public string game { get; set; }
public int delay { get; set; }
public string language { get; set; }
public int _id { get; set; }
public string name { get; set; }
public string created_at { get; set; }
public string updated_at { get; set; }
public string logo { get; set; }
public string banner { get; set; }
public string video_banner { get; set; }
public object background { get; set; }
public string profile_banner { get; set; }
public string profile_banner_background_color { get; set; }
public bool partner { get; set; }
public string url { get; set; }
public int views { get; set; }
public int followers { get; set; }
public Links _links { get; set; }
}
public class Links2
{
public string self { get; set; }
public string next { get; set; }
}
public class Channels
{
public List<Channel> channels { get; set; }
public int _total { get; set; }
public Links2 _links { get; set; }
}
安装 RestSharp 和 Json.NET 之后,对该特定端点的调用可能如下所示:
public class TwitchSampleImplementation
{
private IList<RestResponseCookie> _cookies;
public void AuthenticateAndGetChannels()
{
// this is where your login credentials go which can be acquired here
// => https://github.com/justintv/Twitch-API/blob/master/authentication.md#developer-setup
Authenticate("yourClientId", "thatUrl", null, "thatState");
var channels = GetChannel("popularChannel");
Console.WriteLine(String.Format("Channels: ", channels.channels.Select(c =>
c.display_name).Aggregate("", (a, b) => a + b + ",").TrimEnd(',')));
}
public void Authenticate(string clientId,
string registeredRedirectURI, List<string> scopes, string state)
{
// reference: https://github.com/justintv/Twitch-API/blob/master/authentication.md
var login = Tuple.Create("client_id", clientId);
var redirectURI = Tuple.Create("redirect_uri", registeredRedirectURI);
var theScope = Tuple.Create("scope", scopes.Aggregate("", (a, b) => a + b + ",").TrimEnd(','));
var theState = Tuple.Create("state", state);
// reference: baseUrl => https://github.com/justintv/Twitch-API#formats
var client = new RestClient("https://api.twitch.tv/kraken/");
var request = new RestRequest("oauth2/authorize", Method.POST); // try Method.GET if that doesn't work
var type = ParameterType.GetOrPost;
var paramList = new List<Parameter>()
{
new Parameter {Name = login.Item1, Value = login.Item2, Type = type},
new Parameter {Name = redirectURI.Item1, Value = redirectURI.Item2, Type = type},
new Parameter {Name = theScope.Item1, Value = theScope.Item2, Type = type},
new Parameter {Name = theState.Item1, Value = theState.Item2, Type = type}
};
paramList.ForEach(p => request.AddParameter(p));
// reference: https://github.com/justintv/Twitch-API#api-versions-and-mime-types
request.RequestFormat = DataFormat.Json;
var response = client.Execute(request);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
_cookies = _cookies ?? new List<RestResponseCookie>();
response.Cookies.ToList().ForEach(c => _cookies.Add(c));
}
else
{
handleException(response);
}
}
public Channels GetChannel(string searchTerm)
{
var client = new RestClient(@"https://api.twitch.tv/kraken");
// reference: https://github.com/justintv/Twitch-API/blob/master/v3_resources/search.md#get-searchchannels
// reference: http://restsharp.org/
var request = new RestRequest(String.Format(@"search/channels?q={0}", searchTerm));
_cookies.ToList().ForEach(c => request.AddCookie(c.Name, c.Value));
var response = client.Execute(request);
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
return JsonConvert.DeserializeObject<Channels>(response.Content);
}
handleException(response);
return null;
}
private void handleException(IRestResponse response)
{
throw new HttpRequestException(String.Format("Exception '{0}' with status code '{1}' occurred.",
response.Content, Enum.GetName(typeof(HttpStatusCode), response.StatusCode)));
}
}
我还没有尝试调用该代码,但它应该可以帮助您解决问题。
否则执行 google search for twitch scrapers 也可能会产生预期的结果。狩猎愉快,好运。希望这可以帮助。 ;)