如何为字符串相似度算法实现数组列表输入?
How can I implement array list input for string similarity algorithm?
我实现了 jarowinkler 算法。在该算法中,我采用了字符串源和字符串目标。字符串目标作为输入字符串源作为数组,如源[0]。如何实现 jarowinkler 算法作为数组列表输入?请看下面的代码
public static class JaroWinklerDistance
{
private static readonly double mWeightThreshold = 0.7;
private static readonly int mNumChars = 4;
public static double distance(string source, string target)
{
return 1.0 - proximity(source, target);
}
public static double proximity(string aString1, string aString2)
{
int lLen1 = aString1.Length;
int lLen2 = aString2.Length;
if (lLen1 == 0)
return lLen2 == 0 ? 1.0 : 0.0;
int lSearchRange = Math.Max(0, Math.Max(lLen1, lLen2) / 2 - 1);
bool[] lMatched1 = new bool[lLen1];
bool[] lMatched2 = new bool[lLen2];
int lNumCommon = 0;
for (int i = 0; i < lLen1; ++i)
{
int lStart = Math.Max(0, i - lSearchRange);
int lEnd = Math.Min(i + lSearchRange + 1, lLen2);
for (int j = lStart; j < lEnd; ++j)
{
if (lMatched2[j]) continue;
if (aString1[i] != aString2[j])
continue;
lMatched1[i] = true;
lMatched2[j] = true;
++lNumCommon;
break;
}
}
if (lNumCommon == 0) return 0.0;
int lNumHalfTransposed = 0;
int k = 0;
for (int i = 0; i < lLen1; ++i)
{
if (!lMatched1[i]) continue;
while (!lMatched2[k]) ++k;
if (aString1[i] != aString2[k])
++lNumHalfTransposed;
++k;
}
int lNumTransposed = lNumHalfTransposed / 2;
double lNumCommonD = lNumCommon;
double lWeight = (lNumCommonD / lLen1
+ lNumCommonD / lLen2
+ (lNumCommon - lNumTransposed) / lNumCommonD) / 3.0;
if (lWeight <= mWeightThreshold) return lWeight;
int lMax = Math.Min(mNumChars, Math.Min(aString1.Length, aString2.Length));
int lPos = 0;
while (lPos < lMax && aString1[lPos] == aString2[lPos])
++lPos;
if (lPos == 0) return lWeight;
return lWeight + 0.1 * lPos * (1.0 - lWeight);
}
}
上面的代码是静态的 class 需要实现为 class 的数组列表。数组列表必须按照 jaro winkler 算法进行比较。下面的代码我发布了我的主要 class.
class Program
{
static void Main(string[] args)
{
string target;
string[] source1 = new string[]
{
"xyz technology solutions"
};
while (true)
{
Console.Write("Please enter target string: ");
target = Console.ReadLine();
numbersInput.Add(target);
Console.WriteLine("jarowinkler::{0}%", JaroWinklerDistance.proximity(source1[0].ToLower(), target.ToLower()) * 100);
}
}
}}
所以我需要为这段代码实现数组列表,从数组列表中获取输入。将该列表拆分为单词。单词比较必须比较源和目标然后它给出匹配百分比。
您需要提供所需的用法示例。据我了解,您会喜欢这样的东西:
ArrayList targets = new ArrayList();
targets.Add("word1");
targets.Add("word2");
targets.Add("word3");
double distPercentage = JaroWinklerDistance.meanProximity("sourceWord", targets);
在这种情况下,您应该只遍历 ArrayList 元素并为转换为字符串的每个项目调用“.proximity”。
顺便说一句,最好使用
List<string>
而不是 ArrayList,因为您专门处理字符串
我实现了 jarowinkler 算法。在该算法中,我采用了字符串源和字符串目标。字符串目标作为输入字符串源作为数组,如源[0]。如何实现 jarowinkler 算法作为数组列表输入?请看下面的代码
public static class JaroWinklerDistance
{
private static readonly double mWeightThreshold = 0.7;
private static readonly int mNumChars = 4;
public static double distance(string source, string target)
{
return 1.0 - proximity(source, target);
}
public static double proximity(string aString1, string aString2)
{
int lLen1 = aString1.Length;
int lLen2 = aString2.Length;
if (lLen1 == 0)
return lLen2 == 0 ? 1.0 : 0.0;
int lSearchRange = Math.Max(0, Math.Max(lLen1, lLen2) / 2 - 1);
bool[] lMatched1 = new bool[lLen1];
bool[] lMatched2 = new bool[lLen2];
int lNumCommon = 0;
for (int i = 0; i < lLen1; ++i)
{
int lStart = Math.Max(0, i - lSearchRange);
int lEnd = Math.Min(i + lSearchRange + 1, lLen2);
for (int j = lStart; j < lEnd; ++j)
{
if (lMatched2[j]) continue;
if (aString1[i] != aString2[j])
continue;
lMatched1[i] = true;
lMatched2[j] = true;
++lNumCommon;
break;
}
}
if (lNumCommon == 0) return 0.0;
int lNumHalfTransposed = 0;
int k = 0;
for (int i = 0; i < lLen1; ++i)
{
if (!lMatched1[i]) continue;
while (!lMatched2[k]) ++k;
if (aString1[i] != aString2[k])
++lNumHalfTransposed;
++k;
}
int lNumTransposed = lNumHalfTransposed / 2;
double lNumCommonD = lNumCommon;
double lWeight = (lNumCommonD / lLen1
+ lNumCommonD / lLen2
+ (lNumCommon - lNumTransposed) / lNumCommonD) / 3.0;
if (lWeight <= mWeightThreshold) return lWeight;
int lMax = Math.Min(mNumChars, Math.Min(aString1.Length, aString2.Length));
int lPos = 0;
while (lPos < lMax && aString1[lPos] == aString2[lPos])
++lPos;
if (lPos == 0) return lWeight;
return lWeight + 0.1 * lPos * (1.0 - lWeight);
}
}
上面的代码是静态的 class 需要实现为 class 的数组列表。数组列表必须按照 jaro winkler 算法进行比较。下面的代码我发布了我的主要 class.
class Program
{
static void Main(string[] args)
{
string target;
string[] source1 = new string[]
{
"xyz technology solutions"
};
while (true)
{
Console.Write("Please enter target string: ");
target = Console.ReadLine();
numbersInput.Add(target);
Console.WriteLine("jarowinkler::{0}%", JaroWinklerDistance.proximity(source1[0].ToLower(), target.ToLower()) * 100);
}
}
}}
所以我需要为这段代码实现数组列表,从数组列表中获取输入。将该列表拆分为单词。单词比较必须比较源和目标然后它给出匹配百分比。
您需要提供所需的用法示例。据我了解,您会喜欢这样的东西:
ArrayList targets = new ArrayList();
targets.Add("word1");
targets.Add("word2");
targets.Add("word3");
double distPercentage = JaroWinklerDistance.meanProximity("sourceWord", targets);
在这种情况下,您应该只遍历 ArrayList 元素并为转换为字符串的每个项目调用“.proximity”。
顺便说一句,最好使用
List<string>
而不是 ArrayList,因为您专门处理字符串