如何从 k-d 树实现 K-NN 分类?
How to implement K-NN classification from a k-d tree?
我试图在不使用任何库的情况下使用 k-d 树编写 K-NN 分类代码。到目前为止,我已经能够编写 k-d 树的代码,但我似乎无法理解一旦从训练集形成树后如何找到 k 最近的邻居。
k-d树代码:
#include<bits/stdc++.h>
using namespace std;
const int k = 2; // 2-dimensions
struct Node
{
int point[k];
Node *left, *right;
};
struct Node* newNode(int arr[])
{
struct Node* temp = new Node;
for (int i=0; i<k; i++)
temp->point[i] = arr[i];
temp->left = temp->right = NULL;
return temp;
}
// Inserts a new node and returns root of modified tree
Node *insertRec(Node *root, int point[], unsigned depth)
{
if (root == NULL)
return newNode(point);
unsigned cd = depth % k;
if (point[cd] < (root->point[cd]))
root->left = insertRec(root->left, point, depth + 1);
else
root->right = insertRec(root->right, point, depth + 1);
return root;
}
// Function to insert a new point with given point and return new root
Node* insert(Node *root, int point[])
{
return insertRec(root, point, 0);
}
// driver
int main()
{
struct Node *root = NULL;
int points[][k] = {{3, 6}, {17, 15}, {13, 15}, {6, 12},
{9, 1}, {2, 7}, {10, 19}};
int n = sizeof(points)/sizeof(points[0]);
for (int i=0; i<n; i++)
root = insert(root, points[i]);
return 0;
}
首先不要使用<bits/stdc++.h>
。错了。
要找到k个最近的元素,你需要以一种先遍历最近的元素的方式遍历树。然后,如果你没有足够的元素,去遍历更远的元素。
这里就不写代码了,只写伪代码(因为我已经做了一个a long time ago):
list l; # list of the elements, sorted by distance
heap p; # heap of nodes to traverse, sorted by distance
p.push(root)
while (!p.empty())
{
node = p.pop(); # Get a new node
d = distance(point, node); # compute the closest distance from the point to the node
if(l.empty() or distance(point, l.back()) > d)
{
add(node->left); # iteration on subnodes
add(node->right);
l.push(points); # Add points from the current node
}
l.pop_elements(k); # pop elements to keep only k
}
我试图在不使用任何库的情况下使用 k-d 树编写 K-NN 分类代码。到目前为止,我已经能够编写 k-d 树的代码,但我似乎无法理解一旦从训练集形成树后如何找到 k 最近的邻居。 k-d树代码:
#include<bits/stdc++.h>
using namespace std;
const int k = 2; // 2-dimensions
struct Node
{
int point[k];
Node *left, *right;
};
struct Node* newNode(int arr[])
{
struct Node* temp = new Node;
for (int i=0; i<k; i++)
temp->point[i] = arr[i];
temp->left = temp->right = NULL;
return temp;
}
// Inserts a new node and returns root of modified tree
Node *insertRec(Node *root, int point[], unsigned depth)
{
if (root == NULL)
return newNode(point);
unsigned cd = depth % k;
if (point[cd] < (root->point[cd]))
root->left = insertRec(root->left, point, depth + 1);
else
root->right = insertRec(root->right, point, depth + 1);
return root;
}
// Function to insert a new point with given point and return new root
Node* insert(Node *root, int point[])
{
return insertRec(root, point, 0);
}
// driver
int main()
{
struct Node *root = NULL;
int points[][k] = {{3, 6}, {17, 15}, {13, 15}, {6, 12},
{9, 1}, {2, 7}, {10, 19}};
int n = sizeof(points)/sizeof(points[0]);
for (int i=0; i<n; i++)
root = insert(root, points[i]);
return 0;
}
首先不要使用<bits/stdc++.h>
。错了。
要找到k个最近的元素,你需要以一种先遍历最近的元素的方式遍历树。然后,如果你没有足够的元素,去遍历更远的元素。
这里就不写代码了,只写伪代码(因为我已经做了一个a long time ago):
list l; # list of the elements, sorted by distance
heap p; # heap of nodes to traverse, sorted by distance
p.push(root)
while (!p.empty())
{
node = p.pop(); # Get a new node
d = distance(point, node); # compute the closest distance from the point to the node
if(l.empty() or distance(point, l.back()) > d)
{
add(node->left); # iteration on subnodes
add(node->right);
l.push(points); # Add points from the current node
}
l.pop_elements(k); # pop elements to keep only k
}