什么是 C# 全模拟 PHP 函数 "similar_text()"?
What is C# full analog PHP function "similar_text()"?
什么是C#完全模拟PHP函数similar_text()
?
我试过这个代码
int ComputeLevenshteinDistance(string source, string target)
{
if ((source == null) || (target == null)) return 0;
if ((source.Length == 0) || (target.Length == 0)) return 0;
if (source == target) return source.Length;
int sourceWordCount = source.Length;
int targetWordCount = target.Length;
// Step 1
if (sourceWordCount == 0)
return targetWordCount;
if (targetWordCount == 0)
return sourceWordCount;
int[,] distance = new int[sourceWordCount + 1, targetWordCount + 1];
// Step 2
for (int i = 0; i <= sourceWordCount; distance[i, 0] = i++) ;
for (int j = 0; j <= targetWordCount; distance[0, j] = j++) ;
for (int i = 1; i <= sourceWordCount; i++)
{
for (int j = 1; j <= targetWordCount; j++)
{
// Step 3
int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
// Step 4
distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
}
}
return distance[sourceWordCount, targetWordCount];
}
double CalculateSimilarity(string source, string target)
{
if ((source == null) || (target == null)) return 0.0;
if ((source.Length == 0) || (target.Length == 0)) return 0.0;
if (source == target) return 1.0;
int stepsToSame = ComputeLevenshteinDistance(source, target);
return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
}
结果值不相等 php。
PHP Similar_text() function:这会计算两个 string
之间的 相似度 ,如
中所述
Programming Classics: Implementing the World's Best Algorithms by
Oliver
(ISBN 0-131-00413-1).
请注意,此实现不像 Oliver 的伪代码那样使用堆栈,而是 递归调用 这可能会或可能不会加快整个过程。另请注意,此算法的复杂度为 O(N**3)
,其中 N
是最长字符串的长度。
首先,你必须实施编辑距离,比如 Levenstein 一个;让我们尽可能通用:
见
用于编辑序列和更多详细信息。
代码:
using System.Linq;
...
private static double EditDistance<T>(IEnumerable<T> from,
IEnumerable<T> to,
Func<T, double> insertCost,
Func<T, double> deleteCost,
Func<T, T, double> editCost) {
T[] source = from.ToArray();
T[] target = to.ToArray();
// Minimum cost so far
double[][] D = Enumerable
.Range(0, source.Length + 1)
.Select(line => new double[target.Length + 1])
.ToArray();
// Edge: all removes
double sum = 0.0;
for (int i = 1; i <= source.Length; ++i)
D[i][0] = (sum += deleteCost(source[i - 1]));
// Edge: all inserts
sum = 0.0;
for (int i = 1; i <= target.Length; ++i)
D[0][i] = (sum += insertCost(target[i - 1]));
// Having fit N - 1, K - 1 characters let's fit N, K
for (int i = 1; i <= source.Length; ++i)
for (int j = 1; j <= target.Length; ++j) {
// here we choose the operation with the least cost
double insert = D[i][j - 1] + insertCost(target[j - 1]);
double delete = D[i - 1][j] + deleteCost(source[i - 1]);
double edit = D[i - 1][j - 1] + editCost(source[i - 1], target[j - 1]);
D[i][j] = Math.Min(Math.Min(insert, delete), edit);
}
return D[source.Length][target.Length];
}
现在好写了Similar_text
例程:
public static double Similar_text(string left, string right) {
left = left ?? "";
right = right ?? "";
return left.Equals(right)
? 1.0
: 1.0 - EditDistance(left, right,
insert => 1.0,
delete => 1.0,
(from, to) => from == to ? 0.0 : 2.0)
/ Math.Max(left.Length, right.Length);
}
时间复杂度为 O(left.Length * right.Length)
或 O(N**2)
如果两个字符串的长度大致相等 (N
)
什么是C#完全模拟PHP函数similar_text()
?
我试过这个代码
int ComputeLevenshteinDistance(string source, string target)
{
if ((source == null) || (target == null)) return 0;
if ((source.Length == 0) || (target.Length == 0)) return 0;
if (source == target) return source.Length;
int sourceWordCount = source.Length;
int targetWordCount = target.Length;
// Step 1
if (sourceWordCount == 0)
return targetWordCount;
if (targetWordCount == 0)
return sourceWordCount;
int[,] distance = new int[sourceWordCount + 1, targetWordCount + 1];
// Step 2
for (int i = 0; i <= sourceWordCount; distance[i, 0] = i++) ;
for (int j = 0; j <= targetWordCount; distance[0, j] = j++) ;
for (int i = 1; i <= sourceWordCount; i++)
{
for (int j = 1; j <= targetWordCount; j++)
{
// Step 3
int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
// Step 4
distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
}
}
return distance[sourceWordCount, targetWordCount];
}
double CalculateSimilarity(string source, string target)
{
if ((source == null) || (target == null)) return 0.0;
if ((source.Length == 0) || (target.Length == 0)) return 0.0;
if (source == target) return 1.0;
int stepsToSame = ComputeLevenshteinDistance(source, target);
return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
}
结果值不相等 php。
PHP Similar_text() function:这会计算两个 string
之间的 相似度 ,如
Programming Classics: Implementing the World's Best Algorithms by Oliver
(ISBN 0-131-00413-1).
请注意,此实现不像 Oliver 的伪代码那样使用堆栈,而是 递归调用 这可能会或可能不会加快整个过程。另请注意,此算法的复杂度为 O(N**3)
,其中 N
是最长字符串的长度。
首先,你必须实施编辑距离,比如 Levenstein 一个;让我们尽可能通用:
见
用于编辑序列和更多详细信息。
代码:
using System.Linq;
...
private static double EditDistance<T>(IEnumerable<T> from,
IEnumerable<T> to,
Func<T, double> insertCost,
Func<T, double> deleteCost,
Func<T, T, double> editCost) {
T[] source = from.ToArray();
T[] target = to.ToArray();
// Minimum cost so far
double[][] D = Enumerable
.Range(0, source.Length + 1)
.Select(line => new double[target.Length + 1])
.ToArray();
// Edge: all removes
double sum = 0.0;
for (int i = 1; i <= source.Length; ++i)
D[i][0] = (sum += deleteCost(source[i - 1]));
// Edge: all inserts
sum = 0.0;
for (int i = 1; i <= target.Length; ++i)
D[0][i] = (sum += insertCost(target[i - 1]));
// Having fit N - 1, K - 1 characters let's fit N, K
for (int i = 1; i <= source.Length; ++i)
for (int j = 1; j <= target.Length; ++j) {
// here we choose the operation with the least cost
double insert = D[i][j - 1] + insertCost(target[j - 1]);
double delete = D[i - 1][j] + deleteCost(source[i - 1]);
double edit = D[i - 1][j - 1] + editCost(source[i - 1], target[j - 1]);
D[i][j] = Math.Min(Math.Min(insert, delete), edit);
}
return D[source.Length][target.Length];
}
现在好写了Similar_text
例程:
public static double Similar_text(string left, string right) {
left = left ?? "";
right = right ?? "";
return left.Equals(right)
? 1.0
: 1.0 - EditDistance(left, right,
insert => 1.0,
delete => 1.0,
(from, to) => from == to ? 0.0 : 2.0)
/ Math.Max(left.Length, right.Length);
}
时间复杂度为 O(left.Length * right.Length)
或 O(N**2)
如果两个字符串的长度大致相等 (N
)