使用 C# 的 Azure 文本分析给出错误
Azure Text Analytics using C# giving errors
我正在尝试使用从 github 获得的代码:https://github.com/liamca/azure-search-machine-learning-text-analytics 并且索引的创建工作完美,但关键字部分给了我一个 403 - 禁止访问:访问被拒绝错误。这发生在 TextExtractionHelper class 的以下代码行中:
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
根据评论中的信息,我在 link: https://datamarket.azure.com/account/keys 创建了一个帐户,并使用了它提供的密钥,但出现了上述错误。
如果您不想从 github 下载,请使用以下代码:
class Program
{
static string searchServiceName = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/search-what-is-azure-search/
static string searchServiceAPIKey = "<removed>";
static string azureMLTextAnalyticsKey = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main(string[] args)
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = TextExtraction.ProcessText(azureMLTextAnalyticsKey, filetext);
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
下面是TextExtractionHelperclass:
/// <summary>
/// This is a sample program that shows how to use the Azure ML Text Analytics app (https://datamarket.azure.com/dataset/amla/text-analytics)
/// </summary>
public class TextExtraction
{
private const string ServiceBaseUri = "https://api.datamarket.azure.com/";
public static KeyPhraseResult ProcessText(string accountKey, string inputText)
{
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
string inputTextEncoded = HttpUtility.UrlEncode(inputText);
httpClient.BaseAddress = new Uri(ServiceBaseUri);
string creds = "AccountKey:" + accountKey;
string authorizationHeader = "Basic " + Convert.ToBase64String(Encoding.ASCII.GetBytes(creds));
httpClient.DefaultRequestHeaders.Add("Authorization", authorizationHeader);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
// get key phrases
string keyPhrasesRequest = "data.ashx/amla/text-analytics/v1/GetKeyPhrases?Text=" + inputTextEncoded;
Task<HttpResponseMessage> responseTask = httpClient.GetAsync(keyPhrasesRequest);
responseTask.Wait();
HttpResponseMessage response = responseTask.Result;
Task<string> contentTask = response.Content.ReadAsStringAsync();
contentTask.Wait();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
keyPhraseResult = JsonConvert.DeserializeObject<KeyPhraseResult>(content);
}
return keyPhraseResult;
}
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
/// <summary>
/// Class to hold result of Sentiment call
/// </summary>
public class SentimentResult
{
public double Score { get; set; }
}
/// <summary>
/// Class to hold result of Language detection call
/// </summary>
public class LanguageResult
{
public bool UnknownLanguage { get; set; }
public IList<DetectedLanguage> DetectedLanguages { get; set; }
}
/// <summary>
/// Class to hold information about a single detected language
/// </summary>
public class DetectedLanguage
{
public string Name { get; set; }
/// <summary>
/// This is the short ISO 639-1 standard form of representing
/// all languages. The short form is a 2 letter representation of the language.
/// en = English, fr = French for example
/// </summary>
public string Iso6391Name { get; set; }
public double Score { get; set; }
}
更新
在使用不同的示例代码并尝试将它们组合在一起几个小时之后,我终于得到了一些 "kind of" 的东西。这是我的所有代码:
class Program
{
static string searchServiceName = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/search-what-is-azure-search/
static string searchServiceAPIKey = "<removed>";
//static string azureMLTextAnalyticsKey = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main()
{
MakeRequests();
Console.WriteLine("Hit ENTER to exit...");
Console.ReadLine();
}
static async void MakeRequests()
{
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = await TextExtraction.ProcessText();
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
这是我的 TextExtractionHelper class:
public class TextExtraction
{
static string azureMLTextAnalyticsKey = "<Removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
private const string ServiceBaseUri = "https://westus.api.cognitive.microsoft.com/";
public static async Task<KeyPhraseResult> ProcessText()
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
httpClient.BaseAddress = new Uri(ServiceBaseUri);
// Request headers.
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", azureMLTextAnalyticsKey);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
"{\"id\":\"1\",\"text\":\"" + filetext + "\"},]}");
//byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
// "{\"id\":\"1\",\"text\":\"Build great search experiences for your web and mobile apps." +
// "Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios." +
// "Reduce complexity with a fully managed service. " +
// "Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.\"}," +
// "]}");
// Detect key phrases:
var keyPhrasesRequest = "text/analytics/v2.0/keyPhrases";
//var response = await CallEndpoint(httpClient, uri, byteData);
// get key phrases
using (var getcontent = new ByteArrayContent(byteData))
{
getcontent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
var response = await httpClient.PostAsync(keyPhrasesRequest, getcontent);
Task<string> contentTask = response.Content.ReadAsStringAsync();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
keyPhraseResult = JsonConvert.DeserializeObject<KeyPhraseResult>(content);
//return await response.Content.ReadAsStringAsync();
}
}
return keyPhraseResult;
}
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
/// <summary>
/// Class to hold result of Sentiment call
/// </summary>
public class SentimentResult
{
public double Score { get; set; }
}
/// <summary>
/// Class to hold result of Language detection call
/// </summary>
public class LanguageResult
{
public bool UnknownLanguage { get; set; }
public IList<DetectedLanguage> DetectedLanguages { get; set; }
}
/// <summary>
/// Class to hold information about a single detected language
/// </summary>
public class DetectedLanguage
{
public string Name { get; set; }
/// <summary>
/// This is the short ISO 639-1 standard form of representing
/// all languages. The short form is a 2 letter representation of the language.
/// en = English, fr = French for example
/// </summary>
public string Iso6391Name { get; set; }
public double Score { get; set; }
}
所以我现在可以从文本中提取关键短语了!但是,现在我遇到了一个问题,似乎 JSON 字符串没有被反序列化,我的 keyPhraseResult 现在得到一个空值。
我错过了什么?
如果有人能提供帮助,我将不胜感激。
谢谢!
所以我成功了!在这个 link: 的帮助下,我发布了它以简化我现在发生问题的地方。
所以这段代码的作用如下:
- 在 Azure 中创建一个名为 textanalytics 的索引。
- 正在创建所提供文本的 JSON 字符串。
- 检索关键短语并将它们添加到上面第 1 点中创建的索引中。
以下是我的全部代码,以防对其他人有帮助:
(请确保您添加了来自 Nuget 包的相关引用:Microsoft.Azure.Search 和 Newtonsoft.Json)
Program.cs(这是一个控制台应用程序):
using Microsoft.Azure.Search;
using System;
using System.Configuration;
using System.IO;
using System.Threading;
namespace AzureSearchTextAnalytics
{
class Program
{
static string searchServiceName = "<removed>"; // This is the Azure Search service name that you create in Azure
static string searchServiceAPIKey = "<removed>"; // This is the Primary key that is provided after creating a Azure Search Service
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main()
{
MakeRequests();
Console.WriteLine("Hit ENTER to exit...");
Console.ReadLine();
}
static async void MakeRequests()
{
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = await TextExtraction.ProcessText();
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
}
我的TextExtractionHelper.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
using System.Web;
using Newtonsoft.Json;
using System.Configuration; // get it from http://www.newtonsoft.com/json
using Newtonsoft.Json.Linq;
namespace AzureSearchTextAnalytics
{
/// </summary>
public class TextExtraction
{
static string azureMLTextAnalyticsKey = "<removed>"; // This key you will get when you have added TextAnalytics in Azure.
private const string ServiceBaseUri = "https://westus.api.cognitive.microsoft.com/"; //This you will get when you have added TextAnalytics in Azure
public static async Task<KeyPhraseResult> ProcessText()
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
httpClient.BaseAddress = new Uri(ServiceBaseUri);
// Request headers.
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", azureMLTextAnalyticsKey);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
"{\"id\":\"1\",\"text\":\"" + filetext + "\"},]}");
// Detect key phrases:
var keyPhrasesRequest = "text/analytics/v2.0/keyPhrases";
// get key phrases
using (var getcontent = new ByteArrayContent(byteData))
{
getcontent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
var response = await httpClient.PostAsync(keyPhrasesRequest, getcontent);
Task<string> contentTask = response.Content.ReadAsStringAsync();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
var result = JsonConvert.DeserializeObject<RootObject>(content);
keyPhraseResult.KeyPhrases = result.documents[0].keyPhrases;
}
}
return keyPhraseResult;
}
}
public class Documents
{
public List<string> keyPhrases { get; set; }
public string id { get; set; }
}
public class RootObject
{
public List<Documents> documents { get; set; }
public List<object> errors { get; set; }
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
}
AzureSearch.cs:
using Microsoft.Azure.Search;
using Microsoft.Azure.Search.Models;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AzureSearchTextAnalytics
{
public class AzureSearch
{
public static void CreateIndex(SearchServiceClient serviceClient, string indexName)
{
if (serviceClient.Indexes.Exists(indexName))
{
serviceClient.Indexes.Delete(indexName);
}
var definition = new Index()
{
Name = indexName,
Fields = new[]
{
new Field("fileId", DataType.String) { IsKey = true },
new Field("fileText", DataType.String) { IsSearchable = true, IsFilterable = false, IsSortable = false, IsFacetable = false },
new Field("keyPhrases", DataType.Collection(DataType.String)) { IsSearchable = true, IsFilterable = true, IsFacetable = true }
}
};
serviceClient.Indexes.Create(definition);
}
public static void UploadDocuments(SearchIndexClient indexClient, string fileId, KeyPhraseResult keyPhraseResult)
{
List<IndexAction> indexOperations = new List<IndexAction>();
var doc = new Document();
doc.Add("fileId", fileId);
doc.Add("keyPhrases", keyPhraseResult.KeyPhrases.ToList());
indexOperations.Add(IndexAction.Upload(doc));
try
{
indexClient.Documents.Index(new IndexBatch(indexOperations));
}
catch (IndexBatchException e)
{
// Sometimes when your Search service is under load, indexing will fail for some of the documents in
// the batch. Depending on your application, you can take compensating actions like delaying and
// retrying. For this simple demo, we just log the failed document keys and continue.
Console.WriteLine(
"Failed to index some of the documents: {0}",
String.Join(", ", e.IndexingResults.Where(r => !r.Succeeded).Select(r => r.Key)));
}
}
public static void SearchDocuments(SearchIndexClient indexClient, string searchText)
{
// Search using the supplied searchText and output documents that match
try
{
var sp = new SearchParameters();
DocumentSearchResult<OCRTextIndex> response = indexClient.Documents.Search<OCRTextIndex>(searchText, sp);
foreach (SearchResult<OCRTextIndex> result in response.Results)
{
Console.WriteLine("File ID: {0}", result.Document.fileId);
Console.WriteLine("Key Phrases: {0}", string.Join(",", result.Document.keyPhrases));
}
}
catch (Exception e)
{
Console.WriteLine("Failed search: {0}", e.Message.ToString());
}
}
}
}
DataModel.cs
using Microsoft.Azure.Search.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AzureSearchTextAnalytics
{
[SerializePropertyNamesAsCamelCase]
public class OCRTextIndex
{
public string fileId { get; set; }
public string[] keyPhrases { get; set; }
}
}
我正在尝试使用从 github 获得的代码:https://github.com/liamca/azure-search-machine-learning-text-analytics 并且索引的创建工作完美,但关键字部分给了我一个 403 - 禁止访问:访问被拒绝错误。这发生在 TextExtractionHelper class 的以下代码行中:
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
根据评论中的信息,我在 link: https://datamarket.azure.com/account/keys 创建了一个帐户,并使用了它提供的密钥,但出现了上述错误。
如果您不想从 github 下载,请使用以下代码:
class Program
{
static string searchServiceName = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/search-what-is-azure-search/
static string searchServiceAPIKey = "<removed>";
static string azureMLTextAnalyticsKey = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main(string[] args)
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = TextExtraction.ProcessText(azureMLTextAnalyticsKey, filetext);
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
下面是TextExtractionHelperclass:
/// <summary>
/// This is a sample program that shows how to use the Azure ML Text Analytics app (https://datamarket.azure.com/dataset/amla/text-analytics)
/// </summary>
public class TextExtraction
{
private const string ServiceBaseUri = "https://api.datamarket.azure.com/";
public static KeyPhraseResult ProcessText(string accountKey, string inputText)
{
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
string inputTextEncoded = HttpUtility.UrlEncode(inputText);
httpClient.BaseAddress = new Uri(ServiceBaseUri);
string creds = "AccountKey:" + accountKey;
string authorizationHeader = "Basic " + Convert.ToBase64String(Encoding.ASCII.GetBytes(creds));
httpClient.DefaultRequestHeaders.Add("Authorization", authorizationHeader);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
// get key phrases
string keyPhrasesRequest = "data.ashx/amla/text-analytics/v1/GetKeyPhrases?Text=" + inputTextEncoded;
Task<HttpResponseMessage> responseTask = httpClient.GetAsync(keyPhrasesRequest);
responseTask.Wait();
HttpResponseMessage response = responseTask.Result;
Task<string> contentTask = response.Content.ReadAsStringAsync();
contentTask.Wait();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
keyPhraseResult = JsonConvert.DeserializeObject<KeyPhraseResult>(content);
}
return keyPhraseResult;
}
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
/// <summary>
/// Class to hold result of Sentiment call
/// </summary>
public class SentimentResult
{
public double Score { get; set; }
}
/// <summary>
/// Class to hold result of Language detection call
/// </summary>
public class LanguageResult
{
public bool UnknownLanguage { get; set; }
public IList<DetectedLanguage> DetectedLanguages { get; set; }
}
/// <summary>
/// Class to hold information about a single detected language
/// </summary>
public class DetectedLanguage
{
public string Name { get; set; }
/// <summary>
/// This is the short ISO 639-1 standard form of representing
/// all languages. The short form is a 2 letter representation of the language.
/// en = English, fr = French for example
/// </summary>
public string Iso6391Name { get; set; }
public double Score { get; set; }
}
更新
在使用不同的示例代码并尝试将它们组合在一起几个小时之后,我终于得到了一些 "kind of" 的东西。这是我的所有代码:
class Program
{
static string searchServiceName = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/search-what-is-azure-search/
static string searchServiceAPIKey = "<removed>";
//static string azureMLTextAnalyticsKey = "<removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main()
{
MakeRequests();
Console.WriteLine("Hit ENTER to exit...");
Console.ReadLine();
}
static async void MakeRequests()
{
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = await TextExtraction.ProcessText();
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
这是我的 TextExtractionHelper class:
public class TextExtraction
{
static string azureMLTextAnalyticsKey = "<Removed>"; // Learn more here: https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-text-analytics/
private const string ServiceBaseUri = "https://westus.api.cognitive.microsoft.com/";
public static async Task<KeyPhraseResult> ProcessText()
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
httpClient.BaseAddress = new Uri(ServiceBaseUri);
// Request headers.
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", azureMLTextAnalyticsKey);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
"{\"id\":\"1\",\"text\":\"" + filetext + "\"},]}");
//byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
// "{\"id\":\"1\",\"text\":\"Build great search experiences for your web and mobile apps." +
// "Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios." +
// "Reduce complexity with a fully managed service. " +
// "Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.\"}," +
// "]}");
// Detect key phrases:
var keyPhrasesRequest = "text/analytics/v2.0/keyPhrases";
//var response = await CallEndpoint(httpClient, uri, byteData);
// get key phrases
using (var getcontent = new ByteArrayContent(byteData))
{
getcontent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
var response = await httpClient.PostAsync(keyPhrasesRequest, getcontent);
Task<string> contentTask = response.Content.ReadAsStringAsync();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
keyPhraseResult = JsonConvert.DeserializeObject<KeyPhraseResult>(content);
//return await response.Content.ReadAsStringAsync();
}
}
return keyPhraseResult;
}
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
/// <summary>
/// Class to hold result of Sentiment call
/// </summary>
public class SentimentResult
{
public double Score { get; set; }
}
/// <summary>
/// Class to hold result of Language detection call
/// </summary>
public class LanguageResult
{
public bool UnknownLanguage { get; set; }
public IList<DetectedLanguage> DetectedLanguages { get; set; }
}
/// <summary>
/// Class to hold information about a single detected language
/// </summary>
public class DetectedLanguage
{
public string Name { get; set; }
/// <summary>
/// This is the short ISO 639-1 standard form of representing
/// all languages. The short form is a 2 letter representation of the language.
/// en = English, fr = French for example
/// </summary>
public string Iso6391Name { get; set; }
public double Score { get; set; }
}
所以我现在可以从文本中提取关键短语了!但是,现在我遇到了一个问题,似乎 JSON 字符串没有被反序列化,我的 keyPhraseResult 现在得到一个空值。
我错过了什么?
如果有人能提供帮助,我将不胜感激。
谢谢!
所以我成功了!在这个 link:
所以这段代码的作用如下:
- 在 Azure 中创建一个名为 textanalytics 的索引。
- 正在创建所提供文本的 JSON 字符串。
- 检索关键短语并将它们添加到上面第 1 点中创建的索引中。
以下是我的全部代码,以防对其他人有帮助:
(请确保您添加了来自 Nuget 包的相关引用:Microsoft.Azure.Search 和 Newtonsoft.Json)
Program.cs(这是一个控制台应用程序):
using Microsoft.Azure.Search;
using System;
using System.Configuration;
using System.IO;
using System.Threading;
namespace AzureSearchTextAnalytics
{
class Program
{
static string searchServiceName = "<removed>"; // This is the Azure Search service name that you create in Azure
static string searchServiceAPIKey = "<removed>"; // This is the Primary key that is provided after creating a Azure Search Service
static string indexName = "textanalytics";
static SearchServiceClient serviceClient = new SearchServiceClient(searchServiceName, new SearchCredentials(searchServiceAPIKey));
static SearchIndexClient indexClient = serviceClient.Indexes.GetClient(indexName);
static void Main()
{
MakeRequests();
Console.WriteLine("Hit ENTER to exit...");
Console.ReadLine();
}
static async void MakeRequests()
{
// Note, this will create a new Azure Search Index for the text and the key phrases
Console.WriteLine("Creating Azure Search index...");
AzureSearch.CreateIndex(serviceClient, indexName);
// Apply the Machine Learning Text Extraction to retrieve only the key phrases
Console.WriteLine("Extracting key phrases from processed text... \r\n");
KeyPhraseResult keyPhraseResult = await TextExtraction.ProcessText();
Console.WriteLine("Found the following phrases... \r\n");
foreach (var phrase in keyPhraseResult.KeyPhrases)
Console.WriteLine(phrase);
// Take the resulting key phrases to a new Azure Search Index
// It is highly recommended that you upload documents in batches rather
// individually like is done here
Console.WriteLine("Uploading extracted text to Azure Search...\r\n");
AzureSearch.UploadDocuments(indexClient, "1", keyPhraseResult);
Console.WriteLine("Wait 5 seconds for content to become searchable...\r\n");
Thread.Sleep(5000);
// Execute a test search
Console.WriteLine("Execute Search...");
AzureSearch.SearchDocuments(indexClient, "Azure Search");
Console.WriteLine("All done. Press any key to continue.");
Console.ReadLine();
}
}
}
我的TextExtractionHelper.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
using System.Web;
using Newtonsoft.Json;
using System.Configuration; // get it from http://www.newtonsoft.com/json
using Newtonsoft.Json.Linq;
namespace AzureSearchTextAnalytics
{
/// </summary>
public class TextExtraction
{
static string azureMLTextAnalyticsKey = "<removed>"; // This key you will get when you have added TextAnalytics in Azure.
private const string ServiceBaseUri = "https://westus.api.cognitive.microsoft.com/"; //This you will get when you have added TextAnalytics in Azure
public static async Task<KeyPhraseResult> ProcessText()
{
string filetext = "Build great search experiences for your web and mobile apps. " +
"Many applications use search as the primary interaction pattern for their users. When it comes to search, user expectations are high. They expect great relevance, suggestions, near-instantaneous responses, multiple languages, faceting, and more. Azure Search makes it easy to add powerful and sophisticated search capabilities to your website or application. The integrated Microsoft natural language stack, also used in Bing and Office, has been improved over 16 years of development. Quickly and easily tune search results, and construct rich, fine-tuned ranking models to tie search results to business goals. Reliable throughput and storage provide fast search indexing and querying to support time-sensitive search scenarios. " +
"Reduce complexity with a fully managed service. " +
"Azure Search removes the complexity of setting up and managing your own search index. This fully managed service helps you avoid the hassle of dealing with index corruption, service availability, scaling, and service updates. Create multiple indexes with no incremental cost per index. Easily scale up or down as the traffic and data volume of your application changes.";
KeyPhraseResult keyPhraseResult = new KeyPhraseResult();
using (var httpClient = new HttpClient())
{
httpClient.BaseAddress = new Uri(ServiceBaseUri);
// Request headers.
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", azureMLTextAnalyticsKey);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
byte[] byteData = Encoding.UTF8.GetBytes("{\"documents\":[" +
"{\"id\":\"1\",\"text\":\"" + filetext + "\"},]}");
// Detect key phrases:
var keyPhrasesRequest = "text/analytics/v2.0/keyPhrases";
// get key phrases
using (var getcontent = new ByteArrayContent(byteData))
{
getcontent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
var response = await httpClient.PostAsync(keyPhrasesRequest, getcontent);
Task<string> contentTask = response.Content.ReadAsStringAsync();
string content = contentTask.Result;
if (!response.IsSuccessStatusCode)
{
throw new Exception("Call to get key phrases failed with HTTP status code: " +
response.StatusCode + " and contents: " + content);
}
var result = JsonConvert.DeserializeObject<RootObject>(content);
keyPhraseResult.KeyPhrases = result.documents[0].keyPhrases;
}
}
return keyPhraseResult;
}
}
public class Documents
{
public List<string> keyPhrases { get; set; }
public string id { get; set; }
}
public class RootObject
{
public List<Documents> documents { get; set; }
public List<object> errors { get; set; }
}
/// <summary>
/// Class to hold result of Key Phrases call
/// </summary>
public class KeyPhraseResult
{
public List<string> KeyPhrases { get; set; }
}
}
AzureSearch.cs:
using Microsoft.Azure.Search;
using Microsoft.Azure.Search.Models;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AzureSearchTextAnalytics
{
public class AzureSearch
{
public static void CreateIndex(SearchServiceClient serviceClient, string indexName)
{
if (serviceClient.Indexes.Exists(indexName))
{
serviceClient.Indexes.Delete(indexName);
}
var definition = new Index()
{
Name = indexName,
Fields = new[]
{
new Field("fileId", DataType.String) { IsKey = true },
new Field("fileText", DataType.String) { IsSearchable = true, IsFilterable = false, IsSortable = false, IsFacetable = false },
new Field("keyPhrases", DataType.Collection(DataType.String)) { IsSearchable = true, IsFilterable = true, IsFacetable = true }
}
};
serviceClient.Indexes.Create(definition);
}
public static void UploadDocuments(SearchIndexClient indexClient, string fileId, KeyPhraseResult keyPhraseResult)
{
List<IndexAction> indexOperations = new List<IndexAction>();
var doc = new Document();
doc.Add("fileId", fileId);
doc.Add("keyPhrases", keyPhraseResult.KeyPhrases.ToList());
indexOperations.Add(IndexAction.Upload(doc));
try
{
indexClient.Documents.Index(new IndexBatch(indexOperations));
}
catch (IndexBatchException e)
{
// Sometimes when your Search service is under load, indexing will fail for some of the documents in
// the batch. Depending on your application, you can take compensating actions like delaying and
// retrying. For this simple demo, we just log the failed document keys and continue.
Console.WriteLine(
"Failed to index some of the documents: {0}",
String.Join(", ", e.IndexingResults.Where(r => !r.Succeeded).Select(r => r.Key)));
}
}
public static void SearchDocuments(SearchIndexClient indexClient, string searchText)
{
// Search using the supplied searchText and output documents that match
try
{
var sp = new SearchParameters();
DocumentSearchResult<OCRTextIndex> response = indexClient.Documents.Search<OCRTextIndex>(searchText, sp);
foreach (SearchResult<OCRTextIndex> result in response.Results)
{
Console.WriteLine("File ID: {0}", result.Document.fileId);
Console.WriteLine("Key Phrases: {0}", string.Join(",", result.Document.keyPhrases));
}
}
catch (Exception e)
{
Console.WriteLine("Failed search: {0}", e.Message.ToString());
}
}
}
}
DataModel.cs
using Microsoft.Azure.Search.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AzureSearchTextAnalytics
{
[SerializePropertyNamesAsCamelCase]
public class OCRTextIndex
{
public string fileId { get; set; }
public string[] keyPhrases { get; set; }
}
}