如何找到从string1到string2的最大子串
How to find biggest substring from string1 into string2
假设我有两个字符串 string1
和 string2
.
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
我需要找到匹配 string2
的 string1
的最大子串。
这里最大的子串将是 "canadian geese"
匹配 string2
.
我怎样才能找到它?我尝试将 string1
分解为 char[]
并找到单词然后合并匹配的单词但是我的 objective.
失败了
经典循环方法 - 结果包括 te space 在 geese "canadian geese "
之后
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
string result = "";
for (int i = 0; i < string1.Length; i++)
{
for (int j = 0; j < string1.Length - i; j++)
{
//add .Trim() here if you want to ignore space characters
string searchpattern = string1.Substring(i, j);
if (string2.IndexOf(searchpattern, StringComparison.OrdinalIgnoreCase) > -1 && searchpattern.Length > result.Length)
{
result = searchpattern;
}
}
}
https://dotnetfiddle.net/q3rHjI
旁注:
canadian
和 Canadian
不相等,因此如果要搜索不区分大小写,则必须使用 StringComparison.OrdinalIgnoreCase
看看下面的代码https://dotnetfiddle.net/aPyw3o
public class Program {
static IEnumerable<string> substrings(string s, int length) {
for (int i = 0 ; i + length <= s.Length; i++) {
var ss = s.Substring(i, length);
if (!(ss.StartsWith(" ") || ss.EndsWith(" ")))
yield return ss;
}
}
public static void Main()
{
int count = 0;
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
string result = null;
for (int i = string1.Length; i>0 && string.IsNullOrEmpty(result); i--) {
foreach (string s in substrings(string1, i)) {
count++;
if (string2.IndexOf(s, StringComparison.CurrentCultureIgnoreCase) >= 0) {
result = s;
break;
}
}
}
if (string.IsNullOrEmpty(result))
Console.WriteLine("no common substrings found");
else
Console.WriteLine("'" + result + "'");
Console.WriteLine(count);
}
}
substrings
方法 returns 字符串 s
的所有子字符串,长度为 length
(对于 yield
请查看文档https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/yield) 我们跳过以 space 开头或结尾的子字符串,因为我们不希望 whitespaces 使子字符串比实际长度长)
外层循环遍历子字符串的所有可能长度值,从最长的(即string1.Length
)到最短的(即1
)。然后检查每个找到的长度为 i
的子字符串,如果它也是 string2
的子字符串。如果是这种情况,我们可以停止,因为不再有公共子串,因为我们在之前的迭代中检查了所有较长的子串。但当然可能还有其他长度为 i
的公共子串
我将再添加一个,使用 span/readonlymemory 这样您就可以避免分配当前答案创建的所有字符串。请注意,我没有对开始 space 或结束 space 进行任何检查,因为这似乎不是问题的必要条件。这确实会进行不区分大小写的搜索,如果您不想这样做,可以通过使用内置的 indexof 并删除不区分大小写的比较来提高效率。
static void Main(string[] _)
{
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
var longest = FindLongestMatchingSubstring(string1, string2);
Console.WriteLine(longest);
}
static string FindLongestMatchingSubstring(string lhs, string rhs)
{
var left = lhs.AsMemory();
var right = rhs.AsMemory();
ReadOnlyMemory<char> longest = ReadOnlyMemory<char>.Empty;
for (int i = 0; i < left.Length; ++i)
{
foreach (var block in FindMatchingSubSpans(left, i, right))
{
if (block.Length > longest.Length)
longest = block;
}
}
if (longest.IsEmpty)
return string.Empty;
return longest.ToString();
}
static IEnumerable<ReadOnlyMemory<char>> FindMatchingSubSpans(ReadOnlyMemory<char> source, int pos, ReadOnlyMemory<char> matchFrom)
{
int lastMatch = 0;
for (int i = pos; i < source.Length; ++i)
{
var ch = source.Span[i];
int match = IndexOfChar(matchFrom, lastMatch, ch);
if (-1 != match)
{
lastMatch = match + 1;
int end = i;
while (++end < source.Length && ++match < matchFrom.Length)
{
char lhs = source.Span[end];
char rhs = matchFrom.Span[match];
if (lhs != rhs && lhs != (char.IsUpper(rhs) ? char.ToLower(rhs) : char.ToUpper(rhs)))
{
break;
}
}
yield return source.Slice(i, end - i);
}
}
}
static int IndexOfChar(ReadOnlyMemory<char> source, int pos, char ch)
{
char alt = char.IsUpper(ch) ? char.ToLower(ch) : char.ToUpper(ch);
for (int i = pos; i < source.Length; ++i)
{
char m = source.Span[i];
if (m == ch || m == alt)
return i;
}
return -1;
}
假设我有两个字符串 string1
和 string2
.
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
我需要找到匹配 string2
的 string1
的最大子串。
这里最大的子串将是 "canadian geese"
匹配 string2
.
我怎样才能找到它?我尝试将 string1
分解为 char[]
并找到单词然后合并匹配的单词但是我的 objective.
经典循环方法 - 结果包括 te space 在 geese "canadian geese "
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
string result = "";
for (int i = 0; i < string1.Length; i++)
{
for (int j = 0; j < string1.Length - i; j++)
{
//add .Trim() here if you want to ignore space characters
string searchpattern = string1.Substring(i, j);
if (string2.IndexOf(searchpattern, StringComparison.OrdinalIgnoreCase) > -1 && searchpattern.Length > result.Length)
{
result = searchpattern;
}
}
}
https://dotnetfiddle.net/q3rHjI
旁注:
canadian
和 Canadian
不相等,因此如果要搜索不区分大小写,则必须使用 StringComparison.OrdinalIgnoreCase
看看下面的代码https://dotnetfiddle.net/aPyw3o
public class Program {
static IEnumerable<string> substrings(string s, int length) {
for (int i = 0 ; i + length <= s.Length; i++) {
var ss = s.Substring(i, length);
if (!(ss.StartsWith(" ") || ss.EndsWith(" ")))
yield return ss;
}
}
public static void Main()
{
int count = 0;
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
string result = null;
for (int i = string1.Length; i>0 && string.IsNullOrEmpty(result); i--) {
foreach (string s in substrings(string1, i)) {
count++;
if (string2.IndexOf(s, StringComparison.CurrentCultureIgnoreCase) >= 0) {
result = s;
break;
}
}
}
if (string.IsNullOrEmpty(result))
Console.WriteLine("no common substrings found");
else
Console.WriteLine("'" + result + "'");
Console.WriteLine(count);
}
}
substrings
方法 returns 字符串 s
的所有子字符串,长度为 length
(对于 yield
请查看文档https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/yield) 我们跳过以 space 开头或结尾的子字符串,因为我们不希望 whitespaces 使子字符串比实际长度长)
外层循环遍历子字符串的所有可能长度值,从最长的(即string1.Length
)到最短的(即1
)。然后检查每个找到的长度为 i
的子字符串,如果它也是 string2
的子字符串。如果是这种情况,我们可以停止,因为不再有公共子串,因为我们在之前的迭代中检查了所有较长的子串。但当然可能还有其他长度为 i
我将再添加一个,使用 span/readonlymemory 这样您就可以避免分配当前答案创建的所有字符串。请注意,我没有对开始 space 或结束 space 进行任何检查,因为这似乎不是问题的必要条件。这确实会进行不区分大小写的搜索,如果您不想这样做,可以通过使用内置的 indexof 并删除不区分大小写的比较来提高效率。
static void Main(string[] _)
{
var string1 = "images of canadian geese goslings";
var string2 = "Canadian geese with goslings pictures to choose from, with no signup needed";
var longest = FindLongestMatchingSubstring(string1, string2);
Console.WriteLine(longest);
}
static string FindLongestMatchingSubstring(string lhs, string rhs)
{
var left = lhs.AsMemory();
var right = rhs.AsMemory();
ReadOnlyMemory<char> longest = ReadOnlyMemory<char>.Empty;
for (int i = 0; i < left.Length; ++i)
{
foreach (var block in FindMatchingSubSpans(left, i, right))
{
if (block.Length > longest.Length)
longest = block;
}
}
if (longest.IsEmpty)
return string.Empty;
return longest.ToString();
}
static IEnumerable<ReadOnlyMemory<char>> FindMatchingSubSpans(ReadOnlyMemory<char> source, int pos, ReadOnlyMemory<char> matchFrom)
{
int lastMatch = 0;
for (int i = pos; i < source.Length; ++i)
{
var ch = source.Span[i];
int match = IndexOfChar(matchFrom, lastMatch, ch);
if (-1 != match)
{
lastMatch = match + 1;
int end = i;
while (++end < source.Length && ++match < matchFrom.Length)
{
char lhs = source.Span[end];
char rhs = matchFrom.Span[match];
if (lhs != rhs && lhs != (char.IsUpper(rhs) ? char.ToLower(rhs) : char.ToUpper(rhs)))
{
break;
}
}
yield return source.Slice(i, end - i);
}
}
}
static int IndexOfChar(ReadOnlyMemory<char> source, int pos, char ch)
{
char alt = char.IsUpper(ch) ? char.ToLower(ch) : char.ToUpper(ch);
for (int i = pos; i < source.Length; ++i)
{
char m = source.Span[i];
if (m == ch || m == alt)
return i;
}
return -1;
}