在 C# 中有效地连接两个有序序列
Join two ordered sequences efficiently in C#
假定这两个有序序列:
var outer = new char[] { 'a', 'b', 'b', 'c', 'd', 'd', 'e' };
var inner = new char[] { 'a', 'b', 'c', 'c', 'd', 'd' };
知道两个序列中的元素都是有序的,如何才能比 Enumerable.Join
更有效地进行内部连接以生成以下元组序列?
{ 'a', 'a' }
{ 'b', 'b' }
{ 'b', 'b' }
{ 'c', 'c' }
{ 'c', 'c' }
{ 'd', 'd' }
{ 'd', 'd' }
{ 'd', 'd' }
{ 'd', 'd' }
请注意,与仅从两个序列中生成不同元素的 Enumerable.Intersect
不同,此处的输出序列 returns 代表一对一、一对一的元素的每个组合对多或多对多关系。
语义与 SQL 服务器中的 INNER JOIN
非常相似。但是,更具体地说,我正在寻找具有 merge join 算法 (INNER MERGE JOIN
) 性能特征的 C# 实现,即 returns 和 IEnumerable
延迟执行。
所需的方法签名可能如下所示:
IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(
this IEnumerable<TOuter> outer,
IEnumerable<TInner> inner,
Func<TOuter, TKey> outerKeySelector,
Func<TInner, TKey> innerKeySelector,
Func<TOuter, TInner, TResult> resultSelector)
我不知道有任何现有的 Enumerable
扩展方法可以实现这一点,没有人应该比我花更多的时间来想出这个。
public static IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector)
{
if (outer == null)
throw new ArgumentNullException(nameof(outer));
if (inner == null)
throw new ArgumentNullException(nameof(inner));
if (outerKeySelector == null)
throw new ArgumentNullException(nameof(outerKeySelector));
if (innerKeySelector == null)
throw new ArgumentNullException(nameof(innerKeySelector));
if (resultSelector == null)
throw new ArgumentNullException(nameof(resultSelector));
return MergeJoinIterator(outer, inner, outerKeySelector, innerKeySelector, resultSelector, Comparer<TKey>.Default);
}
public static IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector, IComparer<TKey> comparer)
{
if (outer == null)
throw new ArgumentNullException(nameof(outer));
if (inner == null)
throw new ArgumentNullException(nameof(inner));
if (outerKeySelector == null)
throw new ArgumentNullException(nameof(outerKeySelector));
if (innerKeySelector == null)
throw new ArgumentNullException(nameof(innerKeySelector));
if (resultSelector == null)
throw new ArgumentNullException(nameof(resultSelector));
if (comparer == null)
throw new ArgumentNullException(nameof(comparer));
return MergeJoinIterator(outer, inner, outerKeySelector, innerKeySelector, resultSelector, comparer);
}
private static IEnumerable<TResult> MergeJoinIterator<TOuter, TInner, TKey, TResult>(IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector, IComparer<TKey> comparer)
{
IEnumerator<TOuter> outerEnumerator = outer.GetEnumerator();
if (!outerEnumerator.MoveNext())
yield break;
IEnumerator<TInner> innerEnumerator = inner.GetEnumerator();
if (!innerEnumerator.MoveNext())
yield break;
TOuter outerElement = outerEnumerator.Current;
TKey outerKey = outerKeySelector(outerElement);
TInner innerElement = innerEnumerator.Current;
TKey innerKey = innerKeySelector(innerElement);
int comp = comparer.Compare(innerKey, outerKey);
while (true)
{
if (comp < 0)
{
if (!innerEnumerator.MoveNext())
break;
innerElement = innerEnumerator.Current;
innerKey = innerKeySelector(innerElement);
comp = comparer.Compare(innerKey, outerKey);
continue;
}
if (comp > 0)
{
if (!outerEnumerator.MoveNext())
break;
outerElement = outerEnumerator.Current;
outerKey = outerKeySelector(outerElement);
comp = comparer.Compare(innerKey, outerKey);
continue;
}
yield return resultSelector(outerElement, innerElement);
if (!outerEnumerator.MoveNext())
{
while (true)
{
if (!innerEnumerator.MoveNext())
break;
innerElement = innerEnumerator.Current;
innerKey = innerKeySelector(innerElement);
comp = comparer.Compare(innerKey, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElement);
}
break;
}
if (!innerEnumerator.MoveNext())
{
while (true)
{
outerElement = outerEnumerator.Current;
outerKey = outerKeySelector(outerElement);
comp = comparer.Compare(innerKey, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElement);
if (!outerEnumerator.MoveNext())
break;
}
break;
}
TOuter outerElementNext = outerEnumerator.Current;
TKey outerKeyNext = outerKeySelector(outerElementNext);
TInner innerElementNext = innerEnumerator.Current;
TKey innerKeyNext = innerKeySelector(innerElementNext);
comp = comparer.Compare(outerKeyNext, outerKey);
bool stop = false;
if (comp != 0)
{
comp = comparer.Compare(innerKeyNext, innerKey);
if (comp == 0)
{
yield return resultSelector(outerElement, innerElementNext);
while (true)
{
if (!innerEnumerator.MoveNext())
{
stop = true;
break;
}
innerElementNext = innerEnumerator.Current;
innerKeyNext = innerKeySelector(innerElementNext);
comp = comparer.Compare(innerKeyNext, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElementNext);
}
if (stop)
break;
}
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementNext;
innerKey = innerKeyNext;
comp = comparer.Compare(innerKey, outerKey);
continue;
}
comp = comparer.Compare(innerKeyNext, innerKey);
if (comp != 0)
{
yield return resultSelector(outerElementNext, innerElement);
while (true)
{
if (!outerEnumerator.MoveNext())
{
stop = true;
break;
}
outerElementNext = outerEnumerator.Current;
outerKeyNext = outerKeySelector(outerElementNext);
comp = comparer.Compare(innerKey, outerKeyNext);
if (comp != 0)
break;
yield return resultSelector(outerElementNext, innerElement);
}
if (stop)
break;
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementNext;
innerKey = innerKeyNext;
comp = comparer.Compare(innerKey, outerKey);
continue;
}
yield return resultSelector(outerElement, innerElementNext);
var innerRest = new List<TInner>();
TInner innerElementFollowing = default(TInner);
TKey innerKeyFollowing = default(TKey);
while (true)
{
if (!innerEnumerator.MoveNext())
{
stop = true;
break;
}
innerElementFollowing = innerEnumerator.Current;
innerKeyFollowing = innerKeySelector(innerElementFollowing);
comp = comparer.Compare(innerKeyFollowing, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElementFollowing);
innerRest.Add(innerElementFollowing);
}
yield return resultSelector(outerElementNext, innerElement);
yield return resultSelector(outerElementNext, innerElementNext);
for (int i = 0; i < innerRest.Count; i++)
yield return resultSelector(outerElementNext, innerRest[i]);
while (true)
{
if (!outerEnumerator.MoveNext())
{
stop = true;
break;
}
outerElementNext = outerEnumerator.Current;
outerKeyNext = outerKeySelector(outerElementNext);
comp = comparer.Compare(innerKey, outerKeyNext);
if (comp != 0)
break;
yield return resultSelector(outerElementNext, innerElement);
yield return resultSelector(outerElementNext, innerElementNext);
for (int i = 0; i < innerRest.Count; i++)
yield return resultSelector(outerElementNext, innerRest[i]);
}
if (stop)
break;
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementFollowing;
innerKey = innerKeyFollowing;
comp = comparer.Compare(innerKey, outerKey);
}
}
如果两个序列都已排序,MoreEnumerable.OrderedMerge
来自 MoreLinq 库。
https://github.com/morelinq/MoreLINQ
using MoreLinq;
IEnumerable<char> result = outer.OrderedMerge(innner);
与内连接相比,效率不错。
当N和M为每个序列的长度时,
内部连接生成笛卡尔积,因此时间与 NxM 成正比
OrderedMerge遍历每个集合一次所以时间正比于N+M
如果序列未排序,标准 Linq Enumerable.OrderBy
将完成这项工作。
还有一些重载:
// to provide a custom comparison criteria
public static IEnumerable<T> OrderedMerge<T>(this IEnumerable<T> first, IEnumerable<T> second, IComparer<T> comparer);
// to provide the key for comparisons
IEnumerable<T> OrderedMerge<T, TKey>(this IEnumerable<T> first, IEnumerable<T> second, Func<T, TKey> keySelector);
// + other overloads to select element to be merged when first element is less than second,
// when second element is less than first
// when first and second element are equal
假定这两个有序序列:
var outer = new char[] { 'a', 'b', 'b', 'c', 'd', 'd', 'e' };
var inner = new char[] { 'a', 'b', 'c', 'c', 'd', 'd' };
知道两个序列中的元素都是有序的,如何才能比 Enumerable.Join
更有效地进行内部连接以生成以下元组序列?
{ 'a', 'a' }
{ 'b', 'b' }
{ 'b', 'b' }
{ 'c', 'c' }
{ 'c', 'c' }
{ 'd', 'd' }
{ 'd', 'd' }
{ 'd', 'd' }
{ 'd', 'd' }
请注意,与仅从两个序列中生成不同元素的 Enumerable.Intersect
不同,此处的输出序列 returns 代表一对一、一对一的元素的每个组合对多或多对多关系。
语义与 SQL 服务器中的 INNER JOIN
非常相似。但是,更具体地说,我正在寻找具有 merge join 算法 (INNER MERGE JOIN
) 性能特征的 C# 实现,即 returns 和 IEnumerable
延迟执行。
所需的方法签名可能如下所示:
IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(
this IEnumerable<TOuter> outer,
IEnumerable<TInner> inner,
Func<TOuter, TKey> outerKeySelector,
Func<TInner, TKey> innerKeySelector,
Func<TOuter, TInner, TResult> resultSelector)
我不知道有任何现有的 Enumerable
扩展方法可以实现这一点,没有人应该比我花更多的时间来想出这个。
public static IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector)
{
if (outer == null)
throw new ArgumentNullException(nameof(outer));
if (inner == null)
throw new ArgumentNullException(nameof(inner));
if (outerKeySelector == null)
throw new ArgumentNullException(nameof(outerKeySelector));
if (innerKeySelector == null)
throw new ArgumentNullException(nameof(innerKeySelector));
if (resultSelector == null)
throw new ArgumentNullException(nameof(resultSelector));
return MergeJoinIterator(outer, inner, outerKeySelector, innerKeySelector, resultSelector, Comparer<TKey>.Default);
}
public static IEnumerable<TResult> MergeJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector, IComparer<TKey> comparer)
{
if (outer == null)
throw new ArgumentNullException(nameof(outer));
if (inner == null)
throw new ArgumentNullException(nameof(inner));
if (outerKeySelector == null)
throw new ArgumentNullException(nameof(outerKeySelector));
if (innerKeySelector == null)
throw new ArgumentNullException(nameof(innerKeySelector));
if (resultSelector == null)
throw new ArgumentNullException(nameof(resultSelector));
if (comparer == null)
throw new ArgumentNullException(nameof(comparer));
return MergeJoinIterator(outer, inner, outerKeySelector, innerKeySelector, resultSelector, comparer);
}
private static IEnumerable<TResult> MergeJoinIterator<TOuter, TInner, TKey, TResult>(IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector, IComparer<TKey> comparer)
{
IEnumerator<TOuter> outerEnumerator = outer.GetEnumerator();
if (!outerEnumerator.MoveNext())
yield break;
IEnumerator<TInner> innerEnumerator = inner.GetEnumerator();
if (!innerEnumerator.MoveNext())
yield break;
TOuter outerElement = outerEnumerator.Current;
TKey outerKey = outerKeySelector(outerElement);
TInner innerElement = innerEnumerator.Current;
TKey innerKey = innerKeySelector(innerElement);
int comp = comparer.Compare(innerKey, outerKey);
while (true)
{
if (comp < 0)
{
if (!innerEnumerator.MoveNext())
break;
innerElement = innerEnumerator.Current;
innerKey = innerKeySelector(innerElement);
comp = comparer.Compare(innerKey, outerKey);
continue;
}
if (comp > 0)
{
if (!outerEnumerator.MoveNext())
break;
outerElement = outerEnumerator.Current;
outerKey = outerKeySelector(outerElement);
comp = comparer.Compare(innerKey, outerKey);
continue;
}
yield return resultSelector(outerElement, innerElement);
if (!outerEnumerator.MoveNext())
{
while (true)
{
if (!innerEnumerator.MoveNext())
break;
innerElement = innerEnumerator.Current;
innerKey = innerKeySelector(innerElement);
comp = comparer.Compare(innerKey, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElement);
}
break;
}
if (!innerEnumerator.MoveNext())
{
while (true)
{
outerElement = outerEnumerator.Current;
outerKey = outerKeySelector(outerElement);
comp = comparer.Compare(innerKey, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElement);
if (!outerEnumerator.MoveNext())
break;
}
break;
}
TOuter outerElementNext = outerEnumerator.Current;
TKey outerKeyNext = outerKeySelector(outerElementNext);
TInner innerElementNext = innerEnumerator.Current;
TKey innerKeyNext = innerKeySelector(innerElementNext);
comp = comparer.Compare(outerKeyNext, outerKey);
bool stop = false;
if (comp != 0)
{
comp = comparer.Compare(innerKeyNext, innerKey);
if (comp == 0)
{
yield return resultSelector(outerElement, innerElementNext);
while (true)
{
if (!innerEnumerator.MoveNext())
{
stop = true;
break;
}
innerElementNext = innerEnumerator.Current;
innerKeyNext = innerKeySelector(innerElementNext);
comp = comparer.Compare(innerKeyNext, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElementNext);
}
if (stop)
break;
}
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementNext;
innerKey = innerKeyNext;
comp = comparer.Compare(innerKey, outerKey);
continue;
}
comp = comparer.Compare(innerKeyNext, innerKey);
if (comp != 0)
{
yield return resultSelector(outerElementNext, innerElement);
while (true)
{
if (!outerEnumerator.MoveNext())
{
stop = true;
break;
}
outerElementNext = outerEnumerator.Current;
outerKeyNext = outerKeySelector(outerElementNext);
comp = comparer.Compare(innerKey, outerKeyNext);
if (comp != 0)
break;
yield return resultSelector(outerElementNext, innerElement);
}
if (stop)
break;
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementNext;
innerKey = innerKeyNext;
comp = comparer.Compare(innerKey, outerKey);
continue;
}
yield return resultSelector(outerElement, innerElementNext);
var innerRest = new List<TInner>();
TInner innerElementFollowing = default(TInner);
TKey innerKeyFollowing = default(TKey);
while (true)
{
if (!innerEnumerator.MoveNext())
{
stop = true;
break;
}
innerElementFollowing = innerEnumerator.Current;
innerKeyFollowing = innerKeySelector(innerElementFollowing);
comp = comparer.Compare(innerKeyFollowing, outerKey);
if (comp != 0)
break;
yield return resultSelector(outerElement, innerElementFollowing);
innerRest.Add(innerElementFollowing);
}
yield return resultSelector(outerElementNext, innerElement);
yield return resultSelector(outerElementNext, innerElementNext);
for (int i = 0; i < innerRest.Count; i++)
yield return resultSelector(outerElementNext, innerRest[i]);
while (true)
{
if (!outerEnumerator.MoveNext())
{
stop = true;
break;
}
outerElementNext = outerEnumerator.Current;
outerKeyNext = outerKeySelector(outerElementNext);
comp = comparer.Compare(innerKey, outerKeyNext);
if (comp != 0)
break;
yield return resultSelector(outerElementNext, innerElement);
yield return resultSelector(outerElementNext, innerElementNext);
for (int i = 0; i < innerRest.Count; i++)
yield return resultSelector(outerElementNext, innerRest[i]);
}
if (stop)
break;
outerElement = outerElementNext;
outerKey = outerKeyNext;
innerElement = innerElementFollowing;
innerKey = innerKeyFollowing;
comp = comparer.Compare(innerKey, outerKey);
}
}
MoreEnumerable.OrderedMerge
来自 MoreLinq 库。
https://github.com/morelinq/MoreLINQ
using MoreLinq;
IEnumerable<char> result = outer.OrderedMerge(innner);
与内连接相比,效率不错。 当N和M为每个序列的长度时, 内部连接生成笛卡尔积,因此时间与 NxM 成正比 OrderedMerge遍历每个集合一次所以时间正比于N+M
如果序列未排序,标准 Linq Enumerable.OrderBy
将完成这项工作。
还有一些重载:
// to provide a custom comparison criteria
public static IEnumerable<T> OrderedMerge<T>(this IEnumerable<T> first, IEnumerable<T> second, IComparer<T> comparer);
// to provide the key for comparisons
IEnumerable<T> OrderedMerge<T, TKey>(this IEnumerable<T> first, IEnumerable<T> second, Func<T, TKey> keySelector);
// + other overloads to select element to be merged when first element is less than second,
// when second element is less than first
// when first and second element are equal