使用 Accord.Net 框架的决策树
Decision tree using Accord.Net framework
我正在尝试学习如何在 C# 中实现决策树。我找到的例子对我没有帮助。我想做的是为用户报告的问题提供解决方案,我有一个包含三千个问题和答案的数据库。我正在使用 dotnet 并尝试使用 Accord.net 框架,但我没有得到它,我需要一个示例或有关如何实施的提示。
正如我所说,我有一个包含旧问题和答案的数据库,将用于训练算法并为未来的问题提供可能的答案。
P.S。我正在使用 visual studio 社区 2017。
有助于理解如何将这个问题映射到决策树问题的第一件事是:每个问题都应该被视为决策树中的一个分裂节点(或决策节点),以及答案的集合给每个问题,有助于形成每个分裂节点的子节点。在您的这种情况下,我们会将每个问题视为数据 table 的一列,并将 table 的每一行视为提供给每个问题的一组答案。
第二个要注意的是决策树需要预测 "something",在你的情况下它是其中一个问题的答案,我们将其视为 table 中的最后一列。
由于没有提供书面示例,因此我假设有一个医学数据库,其中包含一组问题和一组答案,如下面的代码所示。您可以轻松地将其映射到任何其他数据类型,只要它也是一组问题和答案即可。预测 column/question 是下面给出的示例中诊断出的疾病。
[ - 此代码的灵感来自提供的示例 @ http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_ID3Learning.htm]
[- 先决条件:您需要安装 Accord.Net 个软件包:Install-Package Accord.MachineLearning
]
using Accord.MachineLearning.DecisionTrees;
using Accord.MachineLearning.DecisionTrees.Learning;
using Accord.Math.Optimization.Losses;
using System;
using System.Data;
using Accord.Statistics.Filters;
using System.Linq;
using Accord.Math;
namespace DecisionTreeExamples
{
public class QandADecisionTree{
public static void QandADecisionTreeExample()
{
// Step1: Since there was no sample provided in this question, we first construct our data
DataTable data = new DataTable("QandA");
// Step2: Adding questions. Each question is a column in the data table.
PopulateQuestions(data); /*Note that the last column/question is the prediction column. */
// Step3: Adding the history of 6 patients and the corresponding diagnosis (This is also our training data)
PopulateAnswers(data);
//Step4: Next we need to convert our data table to its numerical representation, which is what ID3 algorithm expects as input.
Codification codification = new Codification(data);
DataTable codifiedData = codification.Apply(data);
//Note: specify all the columns except the last column, which is the value that the decision tree should predict,
int[][] input = codifiedData.ToJagged<int>("Are you above 50 years old?",
"What is your gender?",
"Do you have headache?",
"Has your blood pressure gone above 12 in the last 30 days?",
"Do you have a running nose?",
"Do you have a soat throat?",
"Do you have fever?");
int[] predictions = codifiedData.ToArray<int>("Diagnosed Disease");
// Step4: produce Decision tree.
ID3Learning decisionTreeLearningAlgorithm = new ID3Learning { };
DecisionTree decisionTree = decisionTreeLearningAlgorithm.Learn(input, predictions);
// Step5: Use the produced decision tree on a test data
int[] query = codification.Transform(new[,]{ {"Are you above 50 years old?","no" },
{ "What is your gender?","male" },
{ "Do you have headache?" ,"yes"},
{ "Has your blood pressure gone above 12 in the last 30 days?","no" },
{ "Do you have a running nose?","yes" },
{ "Do you have a soat throat?" ,"yes"},
{ "Do you have fever?" ,"yes"} });
int result = decisionTree.Decide(query);
string diagnosis = codification.Revert("Diagnosed Disease", result);
Console.WriteLine($"Diagnosed disease: {diagnosis}"); // Prints Common Cold
}
/// <summary>
/// Populates data table with questions, that in this example each question happens to be a column.
/// </summary>
/// <param name="data">Specifies the data base in this example which is a collection of questions and answers.</param>
private static void PopulateQuestions(DataTable data)
{
data.Columns.Add(new DataColumn("Are you above 50 years old?", typeof(string))); //Q1
data.Columns.Add(new DataColumn("What is your gender?", typeof(string))); //Q2
data.Columns.Add(new DataColumn("Do you have headache?", typeof(string))); //Q3
data.Columns.Add(new DataColumn("Has your blood pressure gone above 12 in the last 30 days?", typeof(string))); //Q4
data.Columns.Add(new DataColumn("Do you have a running nose?", typeof(string))); //Q5
data.Columns.Add(new DataColumn("Do you have a soat throat?", typeof(string))); //Q6
data.Columns.Add(new DataColumn("Do you have fever?", typeof(string))); //Q7
data.Columns.Add(new DataColumn("Diagnosed Disease", typeof(string))); // Prediction Column
}
/// <summary>
/// Populates data table with rows, that in this example each row happens to represent a patient, and each column, except the last one is a question. The value of a column in a row means the reponse patient provided to that question.
/// </summary>
/// <param name="data">Specifies the data base in this example which is a collection of questions and answers.</param>
private static void PopulateAnswers(DataTable data)
{
var patient1 = data.NewRow();
patient1["Are you above 50 years old?"] = "no";
patient1["What is your gender?"] = "male";
patient1["Do you have headache?"] = "yes";
patient1["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient1["Do you have a running nose?"] = "yes";
patient1["Do you have a soat throat?"] = "yes";
patient1["Do you have fever?"] = "yes";
patient1["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient1);
var patient2 = data.NewRow();
patient2["Are you above 50 years old?"] = "yes";
patient2["What is your gender?"] = "female";
patient2["Do you have headache?"] = "yes";
patient2["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient2["Do you have a running nose?"] = "no";
patient2["Do you have a soat throat?"] = "no";
patient2["Do you have fever?"] = "no";
patient2["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient2);
var patient3 = data.NewRow();
patient3["Are you above 50 years old?"] = "yes";
patient3["What is your gender?"] = "male";
patient3["Do you have headache?"] = "yes";
patient3["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient3["Do you have a running nose?"] = "no";
patient3["Do you have a soat throat?"] = "no";
patient3["Do you have fever?"] = "no";
patient3["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient3);
var patient4 = data.NewRow();
patient4["Are you above 50 years old?"] = "no";
patient4["What is your gender?"] = "female";
patient4["Do you have headache?"] = "yes";
patient4["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient4["Do you have a running nose?"] = "yes";
patient4["Do you have a soat throat?"] = "yes";
patient4["Do you have fever?"] = "yes";
patient4["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient4);
var patient5 = data.NewRow();
patient5["Are you above 50 years old?"] = "yes";
patient5["What is your gender?"] = "female";
patient5["Do you have headache?"] = "yes";
patient5["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient5["Do you have a running nose?"] = "yes";
patient5["Do you have a soat throat?"] = "yes";
patient5["Do you have fever?"] = "yes";
patient5["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient5);
var patient6 = data.NewRow();
patient6["Are you above 50 years old?"] = "yes";
patient6["What is your gender?"] = "female";
patient6["Do you have headache?"] = "no";
patient6["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient6["Do you have a running nose?"] = "no";
patient6["Do you have a soat throat?"] = "no";
patient6["Do you have fever?"] = "no";
patient6["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient6);
}
}
}
我正在尝试学习如何在 C# 中实现决策树。我找到的例子对我没有帮助。我想做的是为用户报告的问题提供解决方案,我有一个包含三千个问题和答案的数据库。我正在使用 dotnet 并尝试使用 Accord.net 框架,但我没有得到它,我需要一个示例或有关如何实施的提示。
正如我所说,我有一个包含旧问题和答案的数据库,将用于训练算法并为未来的问题提供可能的答案。
P.S。我正在使用 visual studio 社区 2017。
有助于理解如何将这个问题映射到决策树问题的第一件事是:每个问题都应该被视为决策树中的一个分裂节点(或决策节点),以及答案的集合给每个问题,有助于形成每个分裂节点的子节点。在您的这种情况下,我们会将每个问题视为数据 table 的一列,并将 table 的每一行视为提供给每个问题的一组答案。 第二个要注意的是决策树需要预测 "something",在你的情况下它是其中一个问题的答案,我们将其视为 table 中的最后一列。 由于没有提供书面示例,因此我假设有一个医学数据库,其中包含一组问题和一组答案,如下面的代码所示。您可以轻松地将其映射到任何其他数据类型,只要它也是一组问题和答案即可。预测 column/question 是下面给出的示例中诊断出的疾病。
[ - 此代码的灵感来自提供的示例 @ http://accord-framework.net/docs/html/T_Accord_MachineLearning_DecisionTrees_Learning_ID3Learning.htm]
[- 先决条件:您需要安装 Accord.Net 个软件包:Install-Package Accord.MachineLearning
]
using Accord.MachineLearning.DecisionTrees;
using Accord.MachineLearning.DecisionTrees.Learning;
using Accord.Math.Optimization.Losses;
using System;
using System.Data;
using Accord.Statistics.Filters;
using System.Linq;
using Accord.Math;
namespace DecisionTreeExamples
{
public class QandADecisionTree{
public static void QandADecisionTreeExample()
{
// Step1: Since there was no sample provided in this question, we first construct our data
DataTable data = new DataTable("QandA");
// Step2: Adding questions. Each question is a column in the data table.
PopulateQuestions(data); /*Note that the last column/question is the prediction column. */
// Step3: Adding the history of 6 patients and the corresponding diagnosis (This is also our training data)
PopulateAnswers(data);
//Step4: Next we need to convert our data table to its numerical representation, which is what ID3 algorithm expects as input.
Codification codification = new Codification(data);
DataTable codifiedData = codification.Apply(data);
//Note: specify all the columns except the last column, which is the value that the decision tree should predict,
int[][] input = codifiedData.ToJagged<int>("Are you above 50 years old?",
"What is your gender?",
"Do you have headache?",
"Has your blood pressure gone above 12 in the last 30 days?",
"Do you have a running nose?",
"Do you have a soat throat?",
"Do you have fever?");
int[] predictions = codifiedData.ToArray<int>("Diagnosed Disease");
// Step4: produce Decision tree.
ID3Learning decisionTreeLearningAlgorithm = new ID3Learning { };
DecisionTree decisionTree = decisionTreeLearningAlgorithm.Learn(input, predictions);
// Step5: Use the produced decision tree on a test data
int[] query = codification.Transform(new[,]{ {"Are you above 50 years old?","no" },
{ "What is your gender?","male" },
{ "Do you have headache?" ,"yes"},
{ "Has your blood pressure gone above 12 in the last 30 days?","no" },
{ "Do you have a running nose?","yes" },
{ "Do you have a soat throat?" ,"yes"},
{ "Do you have fever?" ,"yes"} });
int result = decisionTree.Decide(query);
string diagnosis = codification.Revert("Diagnosed Disease", result);
Console.WriteLine($"Diagnosed disease: {diagnosis}"); // Prints Common Cold
}
/// <summary>
/// Populates data table with questions, that in this example each question happens to be a column.
/// </summary>
/// <param name="data">Specifies the data base in this example which is a collection of questions and answers.</param>
private static void PopulateQuestions(DataTable data)
{
data.Columns.Add(new DataColumn("Are you above 50 years old?", typeof(string))); //Q1
data.Columns.Add(new DataColumn("What is your gender?", typeof(string))); //Q2
data.Columns.Add(new DataColumn("Do you have headache?", typeof(string))); //Q3
data.Columns.Add(new DataColumn("Has your blood pressure gone above 12 in the last 30 days?", typeof(string))); //Q4
data.Columns.Add(new DataColumn("Do you have a running nose?", typeof(string))); //Q5
data.Columns.Add(new DataColumn("Do you have a soat throat?", typeof(string))); //Q6
data.Columns.Add(new DataColumn("Do you have fever?", typeof(string))); //Q7
data.Columns.Add(new DataColumn("Diagnosed Disease", typeof(string))); // Prediction Column
}
/// <summary>
/// Populates data table with rows, that in this example each row happens to represent a patient, and each column, except the last one is a question. The value of a column in a row means the reponse patient provided to that question.
/// </summary>
/// <param name="data">Specifies the data base in this example which is a collection of questions and answers.</param>
private static void PopulateAnswers(DataTable data)
{
var patient1 = data.NewRow();
patient1["Are you above 50 years old?"] = "no";
patient1["What is your gender?"] = "male";
patient1["Do you have headache?"] = "yes";
patient1["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient1["Do you have a running nose?"] = "yes";
patient1["Do you have a soat throat?"] = "yes";
patient1["Do you have fever?"] = "yes";
patient1["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient1);
var patient2 = data.NewRow();
patient2["Are you above 50 years old?"] = "yes";
patient2["What is your gender?"] = "female";
patient2["Do you have headache?"] = "yes";
patient2["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient2["Do you have a running nose?"] = "no";
patient2["Do you have a soat throat?"] = "no";
patient2["Do you have fever?"] = "no";
patient2["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient2);
var patient3 = data.NewRow();
patient3["Are you above 50 years old?"] = "yes";
patient3["What is your gender?"] = "male";
patient3["Do you have headache?"] = "yes";
patient3["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient3["Do you have a running nose?"] = "no";
patient3["Do you have a soat throat?"] = "no";
patient3["Do you have fever?"] = "no";
patient3["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient3);
var patient4 = data.NewRow();
patient4["Are you above 50 years old?"] = "no";
patient4["What is your gender?"] = "female";
patient4["Do you have headache?"] = "yes";
patient4["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient4["Do you have a running nose?"] = "yes";
patient4["Do you have a soat throat?"] = "yes";
patient4["Do you have fever?"] = "yes";
patient4["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient4);
var patient5 = data.NewRow();
patient5["Are you above 50 years old?"] = "yes";
patient5["What is your gender?"] = "female";
patient5["Do you have headache?"] = "yes";
patient5["Has your blood pressure gone above 12 in the last 30 days?"] = "no";
patient5["Do you have a running nose?"] = "yes";
patient5["Do you have a soat throat?"] = "yes";
patient5["Do you have fever?"] = "yes";
patient5["Diagnosed Disease"] = "Common Cold";
data.Rows.Add(patient5);
var patient6 = data.NewRow();
patient6["Are you above 50 years old?"] = "yes";
patient6["What is your gender?"] = "female";
patient6["Do you have headache?"] = "no";
patient6["Has your blood pressure gone above 12 in the last 30 days?"] = "yes";
patient6["Do you have a running nose?"] = "no";
patient6["Do you have a soat throat?"] = "no";
patient6["Do you have fever?"] = "no";
patient6["Diagnosed Disease"] = "High Blood Pressure";
data.Rows.Add(patient6);
}
}
}