比较 "similarity" 的数组?
Comparing arrays for "similarity"?
我正在尝试比较相同大小的数组。
给定以下数组:
我正在寻找一种算法来告诉我最多 "similar" 数组的输入。我知道 "similar" 这个词不是很具体,但我不知道如何更具体。
例如以下与输入非常相似。
下面有点类似
下面的很不一样
您可以对数组应用平滑核,然后在其上计算 L2 范数(欧氏距离)。
这通常用于比较,例如神经脉冲序列或其他连续信号。
http://www.cs.utah.edu/~suresh/papers/kerneld/kerneld.pdf
你没有指定语言...我碰巧有 C++ 代码(可能不是最有效的)。
首先,您根据所需的内核宽度对矢量进行平滑处理,并根据 scale/desired 数量 "blur" 等对其进行参数化。例如:
下面代码的输出(按预期运行):
riveale@rv-mba:~/tmpdir$ g++ -std=c++11 test.cpp -o test.exe
riveale@rv-mba:~/tmpdir$ ./test.exe
Distance [1] to [2]: [31.488026] (should be far)
Distance [2] to [3]: [26.591297] (should be far)
Distance [1] to [3]: [12.468342] (should be closer)
和代码(test.cpp):
#include <vector>
#include <cstdlib>
#include <cstdio>
#include <cmath>
double gauss_kernel_funct(const size_t& sourcetime, const size_t& thistime)
{
const double tauval = 5.0; //width of kernel
double dist = ((sourcetime-thistime)/tauval); //distance between the points in the vector
double retval = exp(-1 * dist*dist); //exponential decay away from center of that point, squared....
return retval;
}
std::vector<double> convolvegauss( const std::vector<double>& v1)
{
std::vector<double> convolved( v1.size(), 0.0 );
for(size_t t=0; t<v1.size(); ++t)
{
for(size_t u=0; u<v1.size(); ++u)
{
double coeff = gauss_kernel_funct(u, t);
convolved[t]+=v1[u] * coeff;
}
}
return (convolved);
}
double eucliddist( const std::vector<double>& v1, const std::vector<double>& v2 )
{
if(v1.size() != v2.size()) { fprintf(stderr, "ERROR v1!=v2 sizes\n"); exit(1); }
double sum=0.0;
for(size_t x=0; x<v1.size(); ++x)
{
double tmp = (v1[x] - v2[x]);
sum += tmp*tmp; //sum += distance of this dimension squared
}
return (sqrt( sum ));
}
double vectdist( const std::vector<double>& v1, const std::vector<double>& v2 )
{
std::vector<double> convolved1 = convolvegauss( v1 );
std::vector<double> convolved2 = convolvegauss( v2 );
return (eucliddist( convolved1, convolved2 ));
}
int main()
{
//Original 3 vectors. (1 and 3) are closer than (1 and 2) or (2 and 3)...like your example.
std::vector<double> myvector1 = {1.0, 32.0, 10.0, 5.0, 2.0};
std::vector<double> myvector2 = {2.0, 3.0, 10.0, 22.0, 2.0};
std::vector<double> myvector3 = {2.0, 20.0, 17.0, 1.0, 2.0};
//Now run the vectdist on each, which convolves each vector with the gaussian kernel, and takes the euclid distance between the convovled vectors)
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be far)\n", 1, 2, vectdist(myvector1, myvector2) );
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be far)\n", 2, 3, vectdist(myvector2, myvector3) );
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be closer)\n", 1, 3, vectdist(myvector1, myvector3) );
return 0;
}
我正在尝试比较相同大小的数组。
给定以下数组:
我正在寻找一种算法来告诉我最多 "similar" 数组的输入。我知道 "similar" 这个词不是很具体,但我不知道如何更具体。
例如以下与输入非常相似。
下面有点类似
下面的很不一样
您可以对数组应用平滑核,然后在其上计算 L2 范数(欧氏距离)。
这通常用于比较,例如神经脉冲序列或其他连续信号。
http://www.cs.utah.edu/~suresh/papers/kerneld/kerneld.pdf
你没有指定语言...我碰巧有 C++ 代码(可能不是最有效的)。
首先,您根据所需的内核宽度对矢量进行平滑处理,并根据 scale/desired 数量 "blur" 等对其进行参数化。例如:
下面代码的输出(按预期运行):
riveale@rv-mba:~/tmpdir$ g++ -std=c++11 test.cpp -o test.exe
riveale@rv-mba:~/tmpdir$ ./test.exe
Distance [1] to [2]: [31.488026] (should be far)
Distance [2] to [3]: [26.591297] (should be far)
Distance [1] to [3]: [12.468342] (should be closer)
和代码(test.cpp):
#include <vector>
#include <cstdlib>
#include <cstdio>
#include <cmath>
double gauss_kernel_funct(const size_t& sourcetime, const size_t& thistime)
{
const double tauval = 5.0; //width of kernel
double dist = ((sourcetime-thistime)/tauval); //distance between the points in the vector
double retval = exp(-1 * dist*dist); //exponential decay away from center of that point, squared....
return retval;
}
std::vector<double> convolvegauss( const std::vector<double>& v1)
{
std::vector<double> convolved( v1.size(), 0.0 );
for(size_t t=0; t<v1.size(); ++t)
{
for(size_t u=0; u<v1.size(); ++u)
{
double coeff = gauss_kernel_funct(u, t);
convolved[t]+=v1[u] * coeff;
}
}
return (convolved);
}
double eucliddist( const std::vector<double>& v1, const std::vector<double>& v2 )
{
if(v1.size() != v2.size()) { fprintf(stderr, "ERROR v1!=v2 sizes\n"); exit(1); }
double sum=0.0;
for(size_t x=0; x<v1.size(); ++x)
{
double tmp = (v1[x] - v2[x]);
sum += tmp*tmp; //sum += distance of this dimension squared
}
return (sqrt( sum ));
}
double vectdist( const std::vector<double>& v1, const std::vector<double>& v2 )
{
std::vector<double> convolved1 = convolvegauss( v1 );
std::vector<double> convolved2 = convolvegauss( v2 );
return (eucliddist( convolved1, convolved2 ));
}
int main()
{
//Original 3 vectors. (1 and 3) are closer than (1 and 2) or (2 and 3)...like your example.
std::vector<double> myvector1 = {1.0, 32.0, 10.0, 5.0, 2.0};
std::vector<double> myvector2 = {2.0, 3.0, 10.0, 22.0, 2.0};
std::vector<double> myvector3 = {2.0, 20.0, 17.0, 1.0, 2.0};
//Now run the vectdist on each, which convolves each vector with the gaussian kernel, and takes the euclid distance between the convovled vectors)
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be far)\n", 1, 2, vectdist(myvector1, myvector2) );
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be far)\n", 2, 3, vectdist(myvector2, myvector3) );
fprintf(stdout, "Distance [%d] to [%d]: [%lf] (should be closer)\n", 1, 3, vectdist(myvector1, myvector3) );
return 0;
}