Java 中的余弦相似度
Cosine Similarity in Java
我想计算一个矩阵的行的相似度,比如D,但是结果不正确!!我的代码有什么问题?
在计算矩阵 U 中行的相似度时,我做了如下操作。
结果显示,行的相似度在1.0和-1.0之间,我认为这是错误的!!
{
public void run(String[] args) throws Exception {
Matrix A = new Matrix(array);
for(int i = 0; i < A.getRowDimension(); i++)
System.out.println("similar is : " + cosineSimilarity(i, A));
}
private ArrayList cosineSimilarity(int rowIndex, Matrix D) {
double dotProduct = 0.0, firstNorm = 0.0, secondNorm = 0.0;
double cosinSimilarity;
ArrayList<Double> similarRows = new ArrayList<>();
for(int row = 0; row < D.getRowDimension(); row++){
for (int column = 0; column < D.getColumnDimension(); column++) {
dotProduct = + (D.get(rowIndex, column) * D.get(row, column));
firstNorm = + pow(D.get(rowIndex, column),2);
secondNorm = + pow(D.get(row, column), 2);
// Matrix f = D.getMatrix(row, column);
}
cosinSimilarity = (dotProduct / (sqrt(firstNorm) * sqrt(secondNorm)));
similarRows.add(row, cosinSimilarity);
}
return similarRows;
}
}
结果是:
A is :
0.067174 -0.862994 -0.435024 0.123151 -0.214891 0.011754
0.502582 -0.205973 0.093513 0.031561 0.821020 0.145506
0.406919 -0.032555 0.413105 0.623333 -0.246395 -0.462002
0.394209 0.218539 -0.497640 -0.386091 -0.002859 -0.632551
0.571882 0.300883 -0.279673 0.132980 -0.354327 0.600810
0.308004 -0.271047 0.552712 -0.654632 -0.305748 0.064427
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [-1.0, -1.0, 1.0, 1.0, -1.0, -1.0]
similar is : [-1.0, -1.0, 1.0, 1.0, -1.0, -1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
您想计算给定行与矩阵中每一行之间的相似度。因此,内积和范数必须计算 getRowDimension 次。
但是初始化位置错误 - 将它们移到所有行的循环中。
并且您想使用 += 而不是 = +!
private ArrayList cosineSimilarity(int rowIndex, Matrix D) {
ArrayList<Double> similarRows = new ArrayList<>();
for(int row = 0; row < D.getRowDimension(); row++){
double dotProduct = 0.0, firstNorm = 0.0, secondNorm = 0.0;
for (int column = 0; column < D.getColumnDimension(); column++) {
dotProduct += (D.get(rowIndex, column) * D.get(row, column));
firstNorm += pow(D.get(rowIndex, column),2);
secondNorm += pow(D.get(row, column), 2);
// Matrix f = D.getMatrix(row, column);
}
double cosinSimilarity = (dotProduct / (sqrt(firstNorm) * sqrt(secondNorm)));
similarRows.add(row, cosinSimilarity);
}
我想计算一个矩阵的行的相似度,比如D,但是结果不正确!!我的代码有什么问题? 在计算矩阵 U 中行的相似度时,我做了如下操作。 结果显示,行的相似度在1.0和-1.0之间,我认为这是错误的!!
{
public void run(String[] args) throws Exception {
Matrix A = new Matrix(array);
for(int i = 0; i < A.getRowDimension(); i++)
System.out.println("similar is : " + cosineSimilarity(i, A));
}
private ArrayList cosineSimilarity(int rowIndex, Matrix D) {
double dotProduct = 0.0, firstNorm = 0.0, secondNorm = 0.0;
double cosinSimilarity;
ArrayList<Double> similarRows = new ArrayList<>();
for(int row = 0; row < D.getRowDimension(); row++){
for (int column = 0; column < D.getColumnDimension(); column++) {
dotProduct = + (D.get(rowIndex, column) * D.get(row, column));
firstNorm = + pow(D.get(rowIndex, column),2);
secondNorm = + pow(D.get(row, column), 2);
// Matrix f = D.getMatrix(row, column);
}
cosinSimilarity = (dotProduct / (sqrt(firstNorm) * sqrt(secondNorm)));
similarRows.add(row, cosinSimilarity);
}
return similarRows;
}
}
结果是:
A is :
0.067174 -0.862994 -0.435024 0.123151 -0.214891 0.011754
0.502582 -0.205973 0.093513 0.031561 0.821020 0.145506
0.406919 -0.032555 0.413105 0.623333 -0.246395 -0.462002
0.394209 0.218539 -0.497640 -0.386091 -0.002859 -0.632551
0.571882 0.300883 -0.279673 0.132980 -0.354327 0.600810
0.308004 -0.271047 0.552712 -0.654632 -0.305748 0.064427
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [-1.0, -1.0, 1.0, 1.0, -1.0, -1.0]
similar is : [-1.0, -1.0, 1.0, 1.0, -1.0, -1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
similar is : [1.0, 1.0, -1.0, -1.0, 1.0, 1.0]
您想计算给定行与矩阵中每一行之间的相似度。因此,内积和范数必须计算 getRowDimension 次。
但是初始化位置错误 - 将它们移到所有行的循环中。
并且您想使用 += 而不是 = +!
private ArrayList cosineSimilarity(int rowIndex, Matrix D) {
ArrayList<Double> similarRows = new ArrayList<>();
for(int row = 0; row < D.getRowDimension(); row++){
double dotProduct = 0.0, firstNorm = 0.0, secondNorm = 0.0;
for (int column = 0; column < D.getColumnDimension(); column++) {
dotProduct += (D.get(rowIndex, column) * D.get(row, column));
firstNorm += pow(D.get(rowIndex, column),2);
secondNorm += pow(D.get(row, column), 2);
// Matrix f = D.getMatrix(row, column);
}
double cosinSimilarity = (dotProduct / (sqrt(firstNorm) * sqrt(secondNorm)));
similarRows.add(row, cosinSimilarity);
}