单链表C++的Quickselect算法
Quickselect algorithm for singly linked list C++
我需要一种算法,它可以在线性时间复杂度 O(n) 和常数 space 复杂度 O(1) 中找到单个 linked 列表的中值。
编辑:单 linked 列表是 C 风格的单 linked 列表。不允许 stl(没有容器,没有函数,所有 stl 都被禁止,例如没有 std::forward_list)。不允许移动任何其他容器(如数组)中的数字。
O(logn) 的 space 复杂度是可以接受的,因为对于我的列表,这实际上什至低于 100。此外,我不允许使用像 nth_element
这样的 STL 函数
基本上我有 linked 列表,其中包含 3 * 10^6 个元素,我需要在 3 秒内获得中位数,所以我不能使用排序算法对列表进行排序(这将是 O(nlogn) 并且可能需要 10-14 秒。
我在网上做了一些搜索,发现可以在 O(n) 和 O(1) space 中找到 std::vector 的中位数 quickselect(最坏情况在O(n^2),但很少见),例子:https://www.geeksforgeeks.org/quickselect-a-simple-iterative-implementation/
但是我找不到任何针对 linked 列表执行此操作的算法。问题是我可以使用数组索引随机访问向量如果我想修改该算法,复杂性会大得多,因为。例如,当我将 pivotindex 更改为左侧时,我实际上需要遍历列表以获取该新元素并走得更远(这将使我至少得到 O(kn),我的列表有一个大的 k,甚至是 O(n^ 2)...).
编辑 2:
我知道我有太多变量,但我一直在测试不同的东西,而且我仍在编写我的代码...
我当前的代码:
#include <bits/stdc++.h>
using namespace std;
template <class T> class Node {
public:
T data;
Node<T> *next;
};
template <class T> class List {
public:
Node<T> *first;
};
template <class T> T getMedianValue(List<T> & l) {
Node<T> *crt,*pivot,*incpivot;
int left, right, lung, idx, lungrel,lungrel2, left2, right2, aux, offset;
pivot = l.first;
crt = pivot->next;
lung = 1;
//lung is the lenght of the linked list (yeah it's lenght in romanian...)
//lungrel and lungrel2 are the relative lenghts of the part of
//the list I am processing, e.g: 2 3 4 in a list with 1 2 3 4 5
right = left = 0;
while (crt != NULL) {
if(crt->data < pivot->data){
aux = pivot->data;
pivot->data = crt->data;
crt->data = pivot->next->data;
pivot->next->data = aux;
pivot = pivot->next;
left++;
}
else right++;
// cout<<crt->data<<endl;
crt = crt->next;
lung++;
}
if(right > left) offset = left;
// cout<<endl;
// cout<<pivot->data<<" "<<left<<" "<<right<<endl;
// printList(l);
// cout<<endl;
lungrel = lung;
incpivot = l.first;
// offset = 0;
while(left != right){
//cout<<"parcurgere"<<endl;
if(left > right){
//cout<<endl;
//printList(l);
//cout<<endl;
//cout<<"testleft "<<incpivot->data<<" "<<left<<" "<<right<<endl;
crt = incpivot->next;
pivot = incpivot;
idx = offset;left2 = right2 = lungrel = 0;
//cout<<idx<<endl;
while(idx < left && crt!=NULL){
if(pivot->data > crt->data){
// cout<<"1crt "<<crt->data<<endl;
aux = pivot->data;
pivot->data = crt->data;
crt->data = pivot->next->data;
pivot->next->data = aux;
pivot = pivot->next;
left2++;lungrel++;
}
else {
right2++;lungrel++;
//cout<<crt->data<<" "<<right2<<endl;
}
//cout<<crt->data<<endl;
crt = crt->next;
idx++;
}
left = left2 + offset;
right = lung - left - 1;
if(right > left) offset = left;
//if(pivot->data == 18) return 18;
//cout<<endl;
//cout<<"l "<<pivot->data<<" "<<left<<" "<<right<<" "<<right2<<endl;
// printList(l);
}
else if(left < right && pivot->next!=NULL){
idx = left;left2 = right2 = 0;
incpivot = pivot->next;offset++;left++;
//cout<<endl;
//printList(l);
//cout<<endl;
//cout<<"testright "<<incpivot->data<<" "<<left<<" "<<right<<endl;
pivot = pivot->next;
crt = pivot->next;
lungrel2 = lungrel;
lungrel = 0;
// cout<<"p right"<<pivot->data<<" "<<left<<" "<<right<<endl;
while((idx < lungrel2 + offset - 1) && crt!=NULL){
if(crt->data < pivot->data){
// cout<<"crt "<<crt->data<<endl;
aux = pivot->data;
pivot->data = crt->data;
crt->data = (pivot->next)->data;
(pivot->next)->data = aux;
pivot = pivot->next;
// cout<<"crt2 "<<crt->data<<endl;
left2++;lungrel++;
}
else right2++;lungrel++;
//cout<<crt->data<<endl;
crt = crt->next;
idx++;
}
left = left2 + left;
right = lung - left - 1;
if(right > left) offset = left;
// cout<<"r "<<pivot->data<<" "<<left<<" "<<right<<endl;
// printList(l);
}
else{
//cout<<cmx<<endl;
return pivot->data;
}
}
//cout<<cmx<<endl;
return pivot->data;
}
template <class T> void printList(List<T> const & l) {
Node<T> *tmp;
if(l.first != NULL){
tmp = l.first;
while(tmp != NULL){
cout<<tmp->data<<" ";
tmp = tmp->next;
}
}
}
template <class T> void push_front(List<T> & l, int x)
{
Node<T>* tmp = new Node<T>;
tmp->data = x;
tmp->next = l.first;
l.first = tmp;
}
int main(){
List<int> l;
int n = 0;
push_front(l, 19);
push_front(l, 12);
push_front(l, 11);
push_front(l, 101);
push_front(l, 91);
push_front(l, 21);
push_front(l, 9);
push_front(l, 6);
push_front(l, 25);
push_front(l, 4);
push_front(l, 18);
push_front(l, 2);
push_front(l, 8);
push_front(l, 10);
push_front(l, 200);
push_front(l, 225);
push_front(l, 170);
printList(l);
n=getMedianValue(l);
cout<<endl;
cout<<n;
return 0;
}
关于如何使快速选择适应单独列出的 link 或其他适用于我的问题的算法,您有什么建议吗?
所以你可以做的就是使用迭代器来保持位置。我已经编写了上面的算法来处理 std::forward_list。我知道这并不完美,但很快就写了下来,希望对您有所帮助。
int partition(int leftPos, int rightPos, std::forward_list<int>::iterator& currIter,
std::forward_list<int>::iterator lowIter, std::forward_list<int>::iterator highIter) {
auto iter = lowIter;
int i = leftPos - 1;
for(int j = leftPos; j < rightPos - 1; j++) {
if(*iter <= *highIter) {
++currIter;
++i;
std::iter_swap(currIter, iter);
}
iter++;
}
std::forward_list<int>::iterator newIter = currIter;
std::iter_swap(++newIter, highIter);
return i + 1;
}
std::forward_list<int>::iterator kthSmallest(std::forward_list<int>& list,
std::forward_list<int>::iterator left, std::forward_list<int>::iterator right, int size, int k) {
int leftPos {0};
int rightPos {size};
int pivotPos {0};
std::forward_list<int>::iterator resetIter = left;
std::forward_list<int>::iterator currIter = left;
++left;
while(leftPos <= rightPos) {
pivotPos = partition(leftPos, rightPos, currIter, left, right);
if(pivotPos == (k-1)) {
return currIter;
} else if(pivotPos > (k-1)) {
right = currIter;
rightPos = pivotPos - 1;
} else {
left = currIter;
++left;
resetIter = left;
++left;
leftPos = pivotPos + 1;
}
currIter = resetIter;
}
return list.end();
}
调用第 k 个迭代器时,左边的迭代器应该比您打算开始的迭代器少一个。这使我们在 partition()
中落后 low
一个位置。这是一个执行它的例子:
int main() {
std::forward_list<int> list {10, 12, 12, 13, 4, 5, 8, 11, 6, 26, 15, 21};
auto startIter = list.before_begin();
int k = 6;
int size = getSize(list);
auto kthIter = kthSmallest(list, startIter, getEnd(list), size - 1, k);
std::cout << k << "th smallest: " << *kthIter << std::endl;
return 0;
}
6th smallest: 10
在您的问题中,您提到您在 select 设置不在列表开头的主元时遇到问题,因为这需要遍历列表。如果你做对了,你只需要遍历整个列表两次:
- 一次用于找到列表的中间和末尾以便 select 一个好的主元(例如使用 "median-of-three" 规则)
- 实际排序一次
如果您不太关心 select 一个好的主元并且您很乐意 select 将列表的第一个元素作为主元 (如果数据已经排序,这会导致最坏情况 O(n^2) time complexity。
如果您通过维护指向末尾的指针在第一次遍历列表时记住了列表的末尾,那么您永远不必再次遍历它来找到末尾。此外,如果您使用的是标准 Lomuto partition scheme(由于下述原因我没有使用它),那么您还必须维护指向列表的两个指针,它们代表 i
和 j
标准 Lomuto 分区方案的索引。通过使用这些指针,永远不必遍历列表来访问单个元素。
另外,如果你维护一个指向每个分区的中间和末尾的指针,那么,当你以后必须对这些分区之一进行排序时,你将不必再次遍历该分区来找到中间和末尾。
我现在已经为链表创建了自己的 QuickSelect 算法实现,我已在下面发布。
既然你说链表是单链的,不能升级为双向链表,那我就不能用霍尔分区方案,因为迭代一个向后的单链表非常昂贵。因此,我改用通常效率较低的 Lomuto partition scheme。
当使用 Lomuto 分区方案时,第一个元素或最后一个元素通常 selected 作为主元。但是,selecting 中的任何一个都有一个缺点,即排序数据将导致算法具有 O(n^2) 的最坏情况时间复杂度。这可以通过 select 根据 "median-of-three" rule 设置一个枢轴来防止,该枢轴是从第一个元素、中间元素和最后一个元素的中值 select 一个枢轴。因此,在我的实施中,我使用了这个“三中位数”规则。
此外,Lomuto 分区方案通常会创建两个分区,一个用于小于主元的值,一个用于大于或等于主元的值。但是,如果所有值都相同,这将导致 O(n^2) 的最坏情况时间复杂度。因此,在我的实现中,我创建了三个分区,一个用于小于主元的值,一个用于大于主元的值,一个用于等于主元的值。
虽然这些措施并没有完全消除 O(n^2) 的最坏情况时间复杂度的可能性,但它们至少使它不太可能发生(除非输入是由恶意攻击者提供的)。为了保证 O(n) 的时间复杂度,必须使用更复杂的枢轴 selection 算法,例如 median of medians.
我遇到的一个重要问题是,对于偶数个元素,median is defined as the arithmetic mean of the two "middle" or "median" elements. For this reason, I can't simply write a function similar to std::nth_element
,因为如果,例如,元素总数是 14,那么我将寻找第 7 和第 8 个最大的元素。这意味着我必须调用这样的函数两次,这样效率很低。因此,我编写了一个可以同时搜索两个“中值”元素的函数。虽然这会使代码更加复杂,但与不必两次调用相同函数的优势相比,由于额外的代码复杂性而导致的性能损失应该是最小的。
请注意,尽管我的实现可以在 C++ 编译器上完美编译,但我不会将其称为教科书 C++ 代码,因为问题表明我不允许使用 C++ 标准模板库中的任何内容。因此,我的代码是 C 代码和 C++ 代码的混合体。
在下面的代码中,我只使用标准模板库(特别是函数std::nth_element
)来测试我的算法和验证结果。我在实际算法中没有使用任何这些函数。
#include <iostream>
#include <iomanip>
#include <cassert>
// The following two headers are only required for testing the algorithm and verifying
// the correctness of its results. They are not used in the algorithm itself.
#include <random>
#include <algorithm>
// The following setting can be changed to print extra debugging information
// possible settings:
// 0: no extra debugging information
// 1: print the state and length of all partitions in every loop iteraton
// 2: additionally print the contents of all partitions (if they are not too big)
#define PRINT_DEBUG_LEVEL 0
template <typename T>
struct Node
{
T data;
Node<T> *next;
};
// NOTE:
// The return type is not necessarily the same as the data type. The reason for this is
// that, for example, the data type "int" requires a "double" as a return type, so that
// the arithmetic mean of "3" and "6" returns "4.5".
// This function may require template specializations to handle overflow or wrapping.
template<typename T, typename U>
U arithmetic_mean( const T &first, const T &second )
{
return ( static_cast<U>(first) + static_cast<U>(second) ) / 2;
}
//the main loop of the function find_median can be in one of the following three states
enum LoopState
{
//we are looking for one median value
LOOPSTATE_LOOKINGFORONE,
//we are looking for two median values, and the returned median
//will be the arithmetic mean of the two
LOOPSTATE_LOOKINGFORTWO,
//one of the median values has been found, but we are still searching for
//the second one
LOOPSTATE_FOUNDONE
};
template <
typename T, //type of the data
typename U //type of the return value
>
U find_median( Node<T> *list )
{
//This variable points to the pointer to the first element of the current partition.
//During the partition phase, the linked list will be broken and reassembled afterwards, so
//the pointer this pointer points to will be nullptr until it is reassembled.
Node<T> **pp_start = &list;
//This pointer represents nothing more than the cached value of *pp_start and it is
//not always valid
Node<T> *p_start = *pp_start;
//These pointers are maintained for accessing the middle of the list for selecting a pivot
//using the "median-of-three" rule.
Node<T> *p_middle;
Node<T> *p_end;
//result is not defined if list is empty
assert( p_start != nullptr );
//in the main loop, this variable always holds the number of elements in the current partition
int num_total = 1;
// First, we must traverse the entire linked list in order to determine the number of elements,
// in order to calculate k1 and k2. If it is odd, then the median is defined as the k'th smallest
// element where k = n / 2. If the number of elements is even, then the median is defined as the
// arithmetic mean of the k'th element and the (k+1)'th element.
// We also set a pointer to the nodes in the middle and at the end, which will be required later
// for selecting a pivot according to the "median-of-three" rule.
p_middle = p_start;
for ( p_end = p_start; p_end->next != nullptr; p_end = p_end->next )
{
num_total++;
if ( num_total % 2 == 0 ) p_middle = p_middle->next;
}
// find out whether we are looking for only one or two median values
enum LoopState loop_state = num_total % 2 == 0 ? LOOPSTATE_LOOKINGFORTWO : LOOPSTATE_LOOKINGFORONE;
//set k to the index of the middle element, or if there are two middle elements, to the left one
int k = ( num_total - 1 ) / 2;
// If we are looking for two median values, but we have only found one, then this variable will
// hold the value of the one we found. Whether we have found one can be determined by the state of
// the variable loop_state.
T val_found;
for (;;)
{
//make p_start cache the value of *pp_start again, because a previous iteration of the loop
//may have changed the value of pp_start
p_start = *pp_start;
assert( p_start != nullptr );
assert( p_middle != nullptr );
assert( p_end != nullptr );
assert( num_total != 0 );
if ( num_total == 1 )
{
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
return p_start->data;
case LOOPSTATE_FOUNDONE:
return arithmetic_mean<T,U>( val_found, p_start->data );
default:
assert( false ); //this should be unreachable
}
}
//select the pivot according to the "median-of-three" rule
T pivot;
if ( p_start->data < p_middle->data )
{
if ( p_middle->data < p_end->data )
pivot = p_middle->data;
else if ( p_start->data < p_end->data )
pivot = p_end->data;
else
pivot = p_start->data;
}
else
{
if ( p_start->data < p_end->data )
pivot = p_start->data;
else if ( p_middle->data < p_end->data )
pivot = p_end->data;
else
pivot = p_middle->data;
}
#if PRINT_DEBUG_LEVEL >= 1
//this line is conditionally compiled for extra debugging information
std::cout << "\nmedian of three: " << (*pp_start)->data << " " << p_middle->data << " " << p_end->data << " ->" << pivot << std::endl;
#endif
// We will be dividing the current partition into 3 new partitions (less-than,
// equal-to and greater-than) each represented as a linked list. Each list
// requires a pointer to the start of the list and a pointer to the pointer at
// the end of the list to write the address of new elements to. Also, when
// traversing the lists, we need to keep a pointer to the middle of the list,
// as this information will be required for selecting a new pivot in the next
// iteration of the loop. The latter is not required for the equal-to partition,
// as it would never be used.
Node<T> *p_less = nullptr, **pp_less_end = &p_less, **pp_less_middle = &p_less;
Node<T> *p_equal = nullptr, **pp_equal_end = &p_equal;
Node<T> *p_greater = nullptr, **pp_greater_end = &p_greater, **pp_greater_middle = &p_greater;
// These pointers are only used as a cache to the location of the end node.
// Despite their similar name, their function is quite different to pp_less_end
// and pp_greater_end.
Node<T> *p_less_end = nullptr;
Node<T> *p_greater_end = nullptr;
// counter for the number of elements in each partition
int num_less = 0;
int num_equal = 0;
int num_greater = 0;
// NOTE:
// The following loop will temporarily split the linked list. It will be merged later.
Node<T> *p_next_node = p_start;
//the following line isn't necessary; it is only used to clarify that the pointers no
//longer point to anything meaningful
*pp_start = p_start = nullptr;
for ( int i = 0; i < num_total; i++ )
{
assert( p_next_node != nullptr );
Node<T> *p_current_node = p_next_node;
p_next_node = p_next_node->next;
if ( p_current_node->data < pivot )
{
//link node to pp_less
assert( *pp_less_end == nullptr );
*pp_less_end = p_less_end = p_current_node;
pp_less_end = &p_current_node->next;
p_current_node->next = nullptr;
num_less++;
if ( num_less % 2 == 0 )
{
pp_less_middle = &(*pp_less_middle)->next;
}
}
else if ( p_current_node->data == pivot )
{
//link node to pp_equal
assert( *pp_equal_end == nullptr );
*pp_equal_end = p_current_node;
pp_equal_end = &p_current_node->next;
p_current_node->next = nullptr;
num_equal++;
}
else
{
//link node to pp_greater
assert( *pp_greater_end == nullptr );
*pp_greater_end = p_greater_end = p_current_node;
pp_greater_end = &p_current_node->next;
p_current_node->next = nullptr;
num_greater++;
if ( num_greater % 2 == 0 )
{
pp_greater_middle = &(*pp_greater_middle)->next;
}
}
}
assert( num_total == num_less + num_equal + num_greater );
assert( num_equal >= 1 );
#if PRINT_DEBUG_LEVEL >= 1
//this section is conditionally compiled for extra debugging information
{
std::cout << std::setfill( '0' );
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
std::cout << "LOOPSTATE_LOOKINGFORONE k = " << k << "\n";
break;
case LOOPSTATE_LOOKINGFORTWO:
std::cout << "LOOPSTATE_LOOKINGFORTWO k = " << k << "\n";
break;
case LOOPSTATE_FOUNDONE:
std::cout << "LOOPSTATE_FOUNDONE k = " << k << " val_found = " << val_found << "\n";
}
std::cout << "partition lengths: ";
std::cout <<
std::setw( 2 ) << num_less << " " <<
std::setw( 2 ) << num_equal << " " <<
std::setw( 2 ) << num_greater << " " <<
std::setw( 2 ) << num_total << "\n";
#if PRINT_DEBUG_LEVEL >= 2
Node<T> *p;
std::cout << "less: ";
if ( num_less > 10 )
std::cout << "too many to print";
else
for ( p = p_less; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\nequal: ";
if ( num_equal > 10 )
std::cout << "too many to print";
else
for ( p = p_equal; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\ngreater: ";
if ( num_greater > 10 )
std::cout << "too many to print";
else
for ( p = p_greater; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\n\n" << std::flush;
#endif
std::cout << std::flush;
}
#endif
//insert less-than partition into list
assert( *pp_start == nullptr );
*pp_start = p_less;
//insert equal-to partition into list
assert( *pp_less_end == nullptr );
*pp_less_end = p_equal;
//insert greater-than partition into list
assert( *pp_equal_end == nullptr );
*pp_equal_end = p_greater;
//link list to previously cut off part
assert( *pp_greater_end == nullptr );
*pp_greater_end = p_next_node;
//if less-than partition is large enough to hold both possible median values
if ( k + 2 <= num_less )
{
//set the next iteration of the loop to process the less-than partition
//pp_start is already set to the desired value
p_middle = *pp_less_middle;
p_end = p_less_end;
num_total = num_less;
}
//else if less-than partition holds one of both possible median values
else if ( k + 1 == num_less )
{
if ( loop_state == LOOPSTATE_LOOKINGFORTWO )
{
//the equal_to partition never needs sorting, because all members are already equal
val_found = p_equal->data;
loop_state = LOOPSTATE_FOUNDONE;
}
//set the next iteration of the loop to process the less-than partition
//pp_start is already set to the desired value
p_middle = *pp_less_middle;
p_end = p_less_end;
num_total = num_less;
}
//else if equal-to partition holds both possible median values
else if ( k + 2 <= num_less + num_equal )
{
//the equal_to partition never needs sorting, because all members are already equal
if ( loop_state == LOOPSTATE_FOUNDONE )
return arithmetic_mean<T,U>( val_found, p_equal->data );
return p_equal->data;
}
//else if equal-to partition holds one of both possible median values
else if ( k + 1 == num_less + num_equal )
{
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
return p_equal->data;
case LOOPSTATE_LOOKINGFORTWO:
val_found = p_equal->data;
loop_state = LOOPSTATE_FOUNDONE;
k = 0;
//set the next iteration of the loop to process the greater-than partition
pp_start = pp_equal_end;
p_middle = *pp_greater_middle;
p_end = p_greater_end;
num_total = num_greater;
break;
case LOOPSTATE_FOUNDONE:
return arithmetic_mean<T,U>( val_found, p_equal->data );
}
}
//else both possible median values must be in the greater-than partition
else
{
k = k - num_less - num_equal;
//set the next iteration of the loop to process the greater-than partition
pp_start = pp_equal_end;
p_middle = *pp_greater_middle;
p_end = p_greater_end;
num_total = num_greater;
}
}
}
// NOTE:
// The following code is not part of the algorithm, but is only intended to test the algorithm
// This simple class is designed to contain a singly-linked list
template <typename T>
class List
{
public:
List() : first( nullptr ) {}
// the following is required to abide by the rule of three/five/zero
// see: https://en.cppreference.com/w/cpp/language/rule_of_three
List( const List<T> & ) = delete;
List( const List<T> && ) = delete;
List<T>& operator=( List<T> & ) = delete;
List<T>& operator=( List<T> && ) = delete;
~List()
{
Node<T> *p = first;
while ( p != nullptr )
{
Node<T> *temp = p;
p = p->next;
delete temp;
}
}
void push_front( int data )
{
Node<T> *temp = new Node<T>;
temp->data = data;
temp->next = first;
first = temp;
}
//member variables
Node<T> *first;
};
int main()
{
//generated random numbers will be between 0 and 2 billion (fits in 32-bit signed int)
constexpr int min_val = 0;
constexpr int max_val = 2*1000*1000*1000;
//will allocate array for 1 million ints and fill with random numbers
constexpr int num_values = 1*1000*1000;
//this class contains the singly-linked list and is empty for now
List<int> l;
double result;
//These variables are used for random number generation
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_int_distribution<> dis( min_val, max_val );
try
{
//fill array with random data
std::cout << "Filling array with random data..." << std::flush;
auto unsorted_data = std::make_unique<int[]>( num_values );
for ( int i = 0; i < num_values; i++ ) unsorted_data[i] = dis( gen );
//fill the singly-linked list
std::cout << "done\nFilling linked list..." << std::flush;
for ( int i = 0; i < num_values; i++ ) l.push_front( unsorted_data[i] );
std::cout << "done\nCalculating median using STL function..." << std::flush;
//calculate the median using the functions provided by the C++ standard template library.
//Note: this is only done to compare the results with the algorithm provided in this file
if ( num_values % 2 == 0 )
{
int median1, median2;
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 1) / 2], &unsorted_data[num_values] );
median1 = unsorted_data[(num_values - 1) / 2];
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 0) / 2], &unsorted_data[num_values] );
median2 = unsorted_data[(num_values - 0) / 2];
result = arithmetic_mean<int,double>( median1, median2 );
}
else
{
int median;
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 0) / 2], &unsorted_data[num_values] );
median = unsorted_data[(num_values - 0) / 2];
result = static_cast<int>(median);
}
std::cout << "done\nMedian according to STL function: " << std::setprecision( 12 ) << result << std::endl;
// NOTE: Since the STL functions only sorted the array, but not the linked list, the
// order of the linked list is still random and not pre-sorted.
//calculate the median using the algorithm provided in this file
std::cout << "Starting algorithm" << std::endl;
result = find_median<int,double>( l.first );
std::cout << "The calculated median is: " << std::setprecision( 12 ) << result << std::endl;
std::cout << "Cleaning up\n\n" << std::flush;
}
catch ( std::bad_alloc )
{
std::cerr << "Error: Unable to allocate sufficient memory!" << std::endl;
return -1;
}
return 0;
}
我已经用一百万个随机生成的元素成功地测试了我的代码,它几乎瞬间就找到了正确的中位数。
我需要一种算法,它可以在线性时间复杂度 O(n) 和常数 space 复杂度 O(1) 中找到单个 linked 列表的中值。
编辑:单 linked 列表是 C 风格的单 linked 列表。不允许 stl(没有容器,没有函数,所有 stl 都被禁止,例如没有 std::forward_list)。不允许移动任何其他容器(如数组)中的数字。 O(logn) 的 space 复杂度是可以接受的,因为对于我的列表,这实际上什至低于 100。此外,我不允许使用像 nth_element
这样的 STL 函数基本上我有 linked 列表,其中包含 3 * 10^6 个元素,我需要在 3 秒内获得中位数,所以我不能使用排序算法对列表进行排序(这将是 O(nlogn) 并且可能需要 10-14 秒。
我在网上做了一些搜索,发现可以在 O(n) 和 O(1) space 中找到 std::vector 的中位数 quickselect(最坏情况在O(n^2),但很少见),例子:https://www.geeksforgeeks.org/quickselect-a-simple-iterative-implementation/
但是我找不到任何针对 linked 列表执行此操作的算法。问题是我可以使用数组索引随机访问向量如果我想修改该算法,复杂性会大得多,因为。例如,当我将 pivotindex 更改为左侧时,我实际上需要遍历列表以获取该新元素并走得更远(这将使我至少得到 O(kn),我的列表有一个大的 k,甚至是 O(n^ 2)...).
编辑 2:
我知道我有太多变量,但我一直在测试不同的东西,而且我仍在编写我的代码... 我当前的代码:
#include <bits/stdc++.h>
using namespace std;
template <class T> class Node {
public:
T data;
Node<T> *next;
};
template <class T> class List {
public:
Node<T> *first;
};
template <class T> T getMedianValue(List<T> & l) {
Node<T> *crt,*pivot,*incpivot;
int left, right, lung, idx, lungrel,lungrel2, left2, right2, aux, offset;
pivot = l.first;
crt = pivot->next;
lung = 1;
//lung is the lenght of the linked list (yeah it's lenght in romanian...)
//lungrel and lungrel2 are the relative lenghts of the part of
//the list I am processing, e.g: 2 3 4 in a list with 1 2 3 4 5
right = left = 0;
while (crt != NULL) {
if(crt->data < pivot->data){
aux = pivot->data;
pivot->data = crt->data;
crt->data = pivot->next->data;
pivot->next->data = aux;
pivot = pivot->next;
left++;
}
else right++;
// cout<<crt->data<<endl;
crt = crt->next;
lung++;
}
if(right > left) offset = left;
// cout<<endl;
// cout<<pivot->data<<" "<<left<<" "<<right<<endl;
// printList(l);
// cout<<endl;
lungrel = lung;
incpivot = l.first;
// offset = 0;
while(left != right){
//cout<<"parcurgere"<<endl;
if(left > right){
//cout<<endl;
//printList(l);
//cout<<endl;
//cout<<"testleft "<<incpivot->data<<" "<<left<<" "<<right<<endl;
crt = incpivot->next;
pivot = incpivot;
idx = offset;left2 = right2 = lungrel = 0;
//cout<<idx<<endl;
while(idx < left && crt!=NULL){
if(pivot->data > crt->data){
// cout<<"1crt "<<crt->data<<endl;
aux = pivot->data;
pivot->data = crt->data;
crt->data = pivot->next->data;
pivot->next->data = aux;
pivot = pivot->next;
left2++;lungrel++;
}
else {
right2++;lungrel++;
//cout<<crt->data<<" "<<right2<<endl;
}
//cout<<crt->data<<endl;
crt = crt->next;
idx++;
}
left = left2 + offset;
right = lung - left - 1;
if(right > left) offset = left;
//if(pivot->data == 18) return 18;
//cout<<endl;
//cout<<"l "<<pivot->data<<" "<<left<<" "<<right<<" "<<right2<<endl;
// printList(l);
}
else if(left < right && pivot->next!=NULL){
idx = left;left2 = right2 = 0;
incpivot = pivot->next;offset++;left++;
//cout<<endl;
//printList(l);
//cout<<endl;
//cout<<"testright "<<incpivot->data<<" "<<left<<" "<<right<<endl;
pivot = pivot->next;
crt = pivot->next;
lungrel2 = lungrel;
lungrel = 0;
// cout<<"p right"<<pivot->data<<" "<<left<<" "<<right<<endl;
while((idx < lungrel2 + offset - 1) && crt!=NULL){
if(crt->data < pivot->data){
// cout<<"crt "<<crt->data<<endl;
aux = pivot->data;
pivot->data = crt->data;
crt->data = (pivot->next)->data;
(pivot->next)->data = aux;
pivot = pivot->next;
// cout<<"crt2 "<<crt->data<<endl;
left2++;lungrel++;
}
else right2++;lungrel++;
//cout<<crt->data<<endl;
crt = crt->next;
idx++;
}
left = left2 + left;
right = lung - left - 1;
if(right > left) offset = left;
// cout<<"r "<<pivot->data<<" "<<left<<" "<<right<<endl;
// printList(l);
}
else{
//cout<<cmx<<endl;
return pivot->data;
}
}
//cout<<cmx<<endl;
return pivot->data;
}
template <class T> void printList(List<T> const & l) {
Node<T> *tmp;
if(l.first != NULL){
tmp = l.first;
while(tmp != NULL){
cout<<tmp->data<<" ";
tmp = tmp->next;
}
}
}
template <class T> void push_front(List<T> & l, int x)
{
Node<T>* tmp = new Node<T>;
tmp->data = x;
tmp->next = l.first;
l.first = tmp;
}
int main(){
List<int> l;
int n = 0;
push_front(l, 19);
push_front(l, 12);
push_front(l, 11);
push_front(l, 101);
push_front(l, 91);
push_front(l, 21);
push_front(l, 9);
push_front(l, 6);
push_front(l, 25);
push_front(l, 4);
push_front(l, 18);
push_front(l, 2);
push_front(l, 8);
push_front(l, 10);
push_front(l, 200);
push_front(l, 225);
push_front(l, 170);
printList(l);
n=getMedianValue(l);
cout<<endl;
cout<<n;
return 0;
}
关于如何使快速选择适应单独列出的 link 或其他适用于我的问题的算法,您有什么建议吗?
所以你可以做的就是使用迭代器来保持位置。我已经编写了上面的算法来处理 std::forward_list。我知道这并不完美,但很快就写了下来,希望对您有所帮助。
int partition(int leftPos, int rightPos, std::forward_list<int>::iterator& currIter,
std::forward_list<int>::iterator lowIter, std::forward_list<int>::iterator highIter) {
auto iter = lowIter;
int i = leftPos - 1;
for(int j = leftPos; j < rightPos - 1; j++) {
if(*iter <= *highIter) {
++currIter;
++i;
std::iter_swap(currIter, iter);
}
iter++;
}
std::forward_list<int>::iterator newIter = currIter;
std::iter_swap(++newIter, highIter);
return i + 1;
}
std::forward_list<int>::iterator kthSmallest(std::forward_list<int>& list,
std::forward_list<int>::iterator left, std::forward_list<int>::iterator right, int size, int k) {
int leftPos {0};
int rightPos {size};
int pivotPos {0};
std::forward_list<int>::iterator resetIter = left;
std::forward_list<int>::iterator currIter = left;
++left;
while(leftPos <= rightPos) {
pivotPos = partition(leftPos, rightPos, currIter, left, right);
if(pivotPos == (k-1)) {
return currIter;
} else if(pivotPos > (k-1)) {
right = currIter;
rightPos = pivotPos - 1;
} else {
left = currIter;
++left;
resetIter = left;
++left;
leftPos = pivotPos + 1;
}
currIter = resetIter;
}
return list.end();
}
调用第 k 个迭代器时,左边的迭代器应该比您打算开始的迭代器少一个。这使我们在 partition()
中落后 low
一个位置。这是一个执行它的例子:
int main() {
std::forward_list<int> list {10, 12, 12, 13, 4, 5, 8, 11, 6, 26, 15, 21};
auto startIter = list.before_begin();
int k = 6;
int size = getSize(list);
auto kthIter = kthSmallest(list, startIter, getEnd(list), size - 1, k);
std::cout << k << "th smallest: " << *kthIter << std::endl;
return 0;
}
6th smallest: 10
在您的问题中,您提到您在 select 设置不在列表开头的主元时遇到问题,因为这需要遍历列表。如果你做对了,你只需要遍历整个列表两次:
- 一次用于找到列表的中间和末尾以便 select 一个好的主元(例如使用 "median-of-three" 规则)
- 实际排序一次
如果您不太关心 select 一个好的主元并且您很乐意 select 将列表的第一个元素作为主元 (如果数据已经排序,这会导致最坏情况 O(n^2) time complexity。
如果您通过维护指向末尾的指针在第一次遍历列表时记住了列表的末尾,那么您永远不必再次遍历它来找到末尾。此外,如果您使用的是标准 Lomuto partition scheme(由于下述原因我没有使用它),那么您还必须维护指向列表的两个指针,它们代表 i
和 j
标准 Lomuto 分区方案的索引。通过使用这些指针,永远不必遍历列表来访问单个元素。
另外,如果你维护一个指向每个分区的中间和末尾的指针,那么,当你以后必须对这些分区之一进行排序时,你将不必再次遍历该分区来找到中间和末尾。
我现在已经为链表创建了自己的 QuickSelect 算法实现,我已在下面发布。
既然你说链表是单链的,不能升级为双向链表,那我就不能用霍尔分区方案,因为迭代一个向后的单链表非常昂贵。因此,我改用通常效率较低的 Lomuto partition scheme。
当使用 Lomuto 分区方案时,第一个元素或最后一个元素通常 selected 作为主元。但是,selecting 中的任何一个都有一个缺点,即排序数据将导致算法具有 O(n^2) 的最坏情况时间复杂度。这可以通过 select 根据 "median-of-three" rule 设置一个枢轴来防止,该枢轴是从第一个元素、中间元素和最后一个元素的中值 select 一个枢轴。因此,在我的实施中,我使用了这个“三中位数”规则。
此外,Lomuto 分区方案通常会创建两个分区,一个用于小于主元的值,一个用于大于或等于主元的值。但是,如果所有值都相同,这将导致 O(n^2) 的最坏情况时间复杂度。因此,在我的实现中,我创建了三个分区,一个用于小于主元的值,一个用于大于主元的值,一个用于等于主元的值。
虽然这些措施并没有完全消除 O(n^2) 的最坏情况时间复杂度的可能性,但它们至少使它不太可能发生(除非输入是由恶意攻击者提供的)。为了保证 O(n) 的时间复杂度,必须使用更复杂的枢轴 selection 算法,例如 median of medians.
我遇到的一个重要问题是,对于偶数个元素,median is defined as the arithmetic mean of the two "middle" or "median" elements. For this reason, I can't simply write a function similar to std::nth_element
,因为如果,例如,元素总数是 14,那么我将寻找第 7 和第 8 个最大的元素。这意味着我必须调用这样的函数两次,这样效率很低。因此,我编写了一个可以同时搜索两个“中值”元素的函数。虽然这会使代码更加复杂,但与不必两次调用相同函数的优势相比,由于额外的代码复杂性而导致的性能损失应该是最小的。
请注意,尽管我的实现可以在 C++ 编译器上完美编译,但我不会将其称为教科书 C++ 代码,因为问题表明我不允许使用 C++ 标准模板库中的任何内容。因此,我的代码是 C 代码和 C++ 代码的混合体。
在下面的代码中,我只使用标准模板库(特别是函数std::nth_element
)来测试我的算法和验证结果。我在实际算法中没有使用任何这些函数。
#include <iostream>
#include <iomanip>
#include <cassert>
// The following two headers are only required for testing the algorithm and verifying
// the correctness of its results. They are not used in the algorithm itself.
#include <random>
#include <algorithm>
// The following setting can be changed to print extra debugging information
// possible settings:
// 0: no extra debugging information
// 1: print the state and length of all partitions in every loop iteraton
// 2: additionally print the contents of all partitions (if they are not too big)
#define PRINT_DEBUG_LEVEL 0
template <typename T>
struct Node
{
T data;
Node<T> *next;
};
// NOTE:
// The return type is not necessarily the same as the data type. The reason for this is
// that, for example, the data type "int" requires a "double" as a return type, so that
// the arithmetic mean of "3" and "6" returns "4.5".
// This function may require template specializations to handle overflow or wrapping.
template<typename T, typename U>
U arithmetic_mean( const T &first, const T &second )
{
return ( static_cast<U>(first) + static_cast<U>(second) ) / 2;
}
//the main loop of the function find_median can be in one of the following three states
enum LoopState
{
//we are looking for one median value
LOOPSTATE_LOOKINGFORONE,
//we are looking for two median values, and the returned median
//will be the arithmetic mean of the two
LOOPSTATE_LOOKINGFORTWO,
//one of the median values has been found, but we are still searching for
//the second one
LOOPSTATE_FOUNDONE
};
template <
typename T, //type of the data
typename U //type of the return value
>
U find_median( Node<T> *list )
{
//This variable points to the pointer to the first element of the current partition.
//During the partition phase, the linked list will be broken and reassembled afterwards, so
//the pointer this pointer points to will be nullptr until it is reassembled.
Node<T> **pp_start = &list;
//This pointer represents nothing more than the cached value of *pp_start and it is
//not always valid
Node<T> *p_start = *pp_start;
//These pointers are maintained for accessing the middle of the list for selecting a pivot
//using the "median-of-three" rule.
Node<T> *p_middle;
Node<T> *p_end;
//result is not defined if list is empty
assert( p_start != nullptr );
//in the main loop, this variable always holds the number of elements in the current partition
int num_total = 1;
// First, we must traverse the entire linked list in order to determine the number of elements,
// in order to calculate k1 and k2. If it is odd, then the median is defined as the k'th smallest
// element where k = n / 2. If the number of elements is even, then the median is defined as the
// arithmetic mean of the k'th element and the (k+1)'th element.
// We also set a pointer to the nodes in the middle and at the end, which will be required later
// for selecting a pivot according to the "median-of-three" rule.
p_middle = p_start;
for ( p_end = p_start; p_end->next != nullptr; p_end = p_end->next )
{
num_total++;
if ( num_total % 2 == 0 ) p_middle = p_middle->next;
}
// find out whether we are looking for only one or two median values
enum LoopState loop_state = num_total % 2 == 0 ? LOOPSTATE_LOOKINGFORTWO : LOOPSTATE_LOOKINGFORONE;
//set k to the index of the middle element, or if there are two middle elements, to the left one
int k = ( num_total - 1 ) / 2;
// If we are looking for two median values, but we have only found one, then this variable will
// hold the value of the one we found. Whether we have found one can be determined by the state of
// the variable loop_state.
T val_found;
for (;;)
{
//make p_start cache the value of *pp_start again, because a previous iteration of the loop
//may have changed the value of pp_start
p_start = *pp_start;
assert( p_start != nullptr );
assert( p_middle != nullptr );
assert( p_end != nullptr );
assert( num_total != 0 );
if ( num_total == 1 )
{
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
return p_start->data;
case LOOPSTATE_FOUNDONE:
return arithmetic_mean<T,U>( val_found, p_start->data );
default:
assert( false ); //this should be unreachable
}
}
//select the pivot according to the "median-of-three" rule
T pivot;
if ( p_start->data < p_middle->data )
{
if ( p_middle->data < p_end->data )
pivot = p_middle->data;
else if ( p_start->data < p_end->data )
pivot = p_end->data;
else
pivot = p_start->data;
}
else
{
if ( p_start->data < p_end->data )
pivot = p_start->data;
else if ( p_middle->data < p_end->data )
pivot = p_end->data;
else
pivot = p_middle->data;
}
#if PRINT_DEBUG_LEVEL >= 1
//this line is conditionally compiled for extra debugging information
std::cout << "\nmedian of three: " << (*pp_start)->data << " " << p_middle->data << " " << p_end->data << " ->" << pivot << std::endl;
#endif
// We will be dividing the current partition into 3 new partitions (less-than,
// equal-to and greater-than) each represented as a linked list. Each list
// requires a pointer to the start of the list and a pointer to the pointer at
// the end of the list to write the address of new elements to. Also, when
// traversing the lists, we need to keep a pointer to the middle of the list,
// as this information will be required for selecting a new pivot in the next
// iteration of the loop. The latter is not required for the equal-to partition,
// as it would never be used.
Node<T> *p_less = nullptr, **pp_less_end = &p_less, **pp_less_middle = &p_less;
Node<T> *p_equal = nullptr, **pp_equal_end = &p_equal;
Node<T> *p_greater = nullptr, **pp_greater_end = &p_greater, **pp_greater_middle = &p_greater;
// These pointers are only used as a cache to the location of the end node.
// Despite their similar name, their function is quite different to pp_less_end
// and pp_greater_end.
Node<T> *p_less_end = nullptr;
Node<T> *p_greater_end = nullptr;
// counter for the number of elements in each partition
int num_less = 0;
int num_equal = 0;
int num_greater = 0;
// NOTE:
// The following loop will temporarily split the linked list. It will be merged later.
Node<T> *p_next_node = p_start;
//the following line isn't necessary; it is only used to clarify that the pointers no
//longer point to anything meaningful
*pp_start = p_start = nullptr;
for ( int i = 0; i < num_total; i++ )
{
assert( p_next_node != nullptr );
Node<T> *p_current_node = p_next_node;
p_next_node = p_next_node->next;
if ( p_current_node->data < pivot )
{
//link node to pp_less
assert( *pp_less_end == nullptr );
*pp_less_end = p_less_end = p_current_node;
pp_less_end = &p_current_node->next;
p_current_node->next = nullptr;
num_less++;
if ( num_less % 2 == 0 )
{
pp_less_middle = &(*pp_less_middle)->next;
}
}
else if ( p_current_node->data == pivot )
{
//link node to pp_equal
assert( *pp_equal_end == nullptr );
*pp_equal_end = p_current_node;
pp_equal_end = &p_current_node->next;
p_current_node->next = nullptr;
num_equal++;
}
else
{
//link node to pp_greater
assert( *pp_greater_end == nullptr );
*pp_greater_end = p_greater_end = p_current_node;
pp_greater_end = &p_current_node->next;
p_current_node->next = nullptr;
num_greater++;
if ( num_greater % 2 == 0 )
{
pp_greater_middle = &(*pp_greater_middle)->next;
}
}
}
assert( num_total == num_less + num_equal + num_greater );
assert( num_equal >= 1 );
#if PRINT_DEBUG_LEVEL >= 1
//this section is conditionally compiled for extra debugging information
{
std::cout << std::setfill( '0' );
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
std::cout << "LOOPSTATE_LOOKINGFORONE k = " << k << "\n";
break;
case LOOPSTATE_LOOKINGFORTWO:
std::cout << "LOOPSTATE_LOOKINGFORTWO k = " << k << "\n";
break;
case LOOPSTATE_FOUNDONE:
std::cout << "LOOPSTATE_FOUNDONE k = " << k << " val_found = " << val_found << "\n";
}
std::cout << "partition lengths: ";
std::cout <<
std::setw( 2 ) << num_less << " " <<
std::setw( 2 ) << num_equal << " " <<
std::setw( 2 ) << num_greater << " " <<
std::setw( 2 ) << num_total << "\n";
#if PRINT_DEBUG_LEVEL >= 2
Node<T> *p;
std::cout << "less: ";
if ( num_less > 10 )
std::cout << "too many to print";
else
for ( p = p_less; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\nequal: ";
if ( num_equal > 10 )
std::cout << "too many to print";
else
for ( p = p_equal; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\ngreater: ";
if ( num_greater > 10 )
std::cout << "too many to print";
else
for ( p = p_greater; p != nullptr; p = p->next ) std::cout << p->data << " ";
std::cout << "\n\n" << std::flush;
#endif
std::cout << std::flush;
}
#endif
//insert less-than partition into list
assert( *pp_start == nullptr );
*pp_start = p_less;
//insert equal-to partition into list
assert( *pp_less_end == nullptr );
*pp_less_end = p_equal;
//insert greater-than partition into list
assert( *pp_equal_end == nullptr );
*pp_equal_end = p_greater;
//link list to previously cut off part
assert( *pp_greater_end == nullptr );
*pp_greater_end = p_next_node;
//if less-than partition is large enough to hold both possible median values
if ( k + 2 <= num_less )
{
//set the next iteration of the loop to process the less-than partition
//pp_start is already set to the desired value
p_middle = *pp_less_middle;
p_end = p_less_end;
num_total = num_less;
}
//else if less-than partition holds one of both possible median values
else if ( k + 1 == num_less )
{
if ( loop_state == LOOPSTATE_LOOKINGFORTWO )
{
//the equal_to partition never needs sorting, because all members are already equal
val_found = p_equal->data;
loop_state = LOOPSTATE_FOUNDONE;
}
//set the next iteration of the loop to process the less-than partition
//pp_start is already set to the desired value
p_middle = *pp_less_middle;
p_end = p_less_end;
num_total = num_less;
}
//else if equal-to partition holds both possible median values
else if ( k + 2 <= num_less + num_equal )
{
//the equal_to partition never needs sorting, because all members are already equal
if ( loop_state == LOOPSTATE_FOUNDONE )
return arithmetic_mean<T,U>( val_found, p_equal->data );
return p_equal->data;
}
//else if equal-to partition holds one of both possible median values
else if ( k + 1 == num_less + num_equal )
{
switch ( loop_state )
{
case LOOPSTATE_LOOKINGFORONE:
return p_equal->data;
case LOOPSTATE_LOOKINGFORTWO:
val_found = p_equal->data;
loop_state = LOOPSTATE_FOUNDONE;
k = 0;
//set the next iteration of the loop to process the greater-than partition
pp_start = pp_equal_end;
p_middle = *pp_greater_middle;
p_end = p_greater_end;
num_total = num_greater;
break;
case LOOPSTATE_FOUNDONE:
return arithmetic_mean<T,U>( val_found, p_equal->data );
}
}
//else both possible median values must be in the greater-than partition
else
{
k = k - num_less - num_equal;
//set the next iteration of the loop to process the greater-than partition
pp_start = pp_equal_end;
p_middle = *pp_greater_middle;
p_end = p_greater_end;
num_total = num_greater;
}
}
}
// NOTE:
// The following code is not part of the algorithm, but is only intended to test the algorithm
// This simple class is designed to contain a singly-linked list
template <typename T>
class List
{
public:
List() : first( nullptr ) {}
// the following is required to abide by the rule of three/five/zero
// see: https://en.cppreference.com/w/cpp/language/rule_of_three
List( const List<T> & ) = delete;
List( const List<T> && ) = delete;
List<T>& operator=( List<T> & ) = delete;
List<T>& operator=( List<T> && ) = delete;
~List()
{
Node<T> *p = first;
while ( p != nullptr )
{
Node<T> *temp = p;
p = p->next;
delete temp;
}
}
void push_front( int data )
{
Node<T> *temp = new Node<T>;
temp->data = data;
temp->next = first;
first = temp;
}
//member variables
Node<T> *first;
};
int main()
{
//generated random numbers will be between 0 and 2 billion (fits in 32-bit signed int)
constexpr int min_val = 0;
constexpr int max_val = 2*1000*1000*1000;
//will allocate array for 1 million ints and fill with random numbers
constexpr int num_values = 1*1000*1000;
//this class contains the singly-linked list and is empty for now
List<int> l;
double result;
//These variables are used for random number generation
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_int_distribution<> dis( min_val, max_val );
try
{
//fill array with random data
std::cout << "Filling array with random data..." << std::flush;
auto unsorted_data = std::make_unique<int[]>( num_values );
for ( int i = 0; i < num_values; i++ ) unsorted_data[i] = dis( gen );
//fill the singly-linked list
std::cout << "done\nFilling linked list..." << std::flush;
for ( int i = 0; i < num_values; i++ ) l.push_front( unsorted_data[i] );
std::cout << "done\nCalculating median using STL function..." << std::flush;
//calculate the median using the functions provided by the C++ standard template library.
//Note: this is only done to compare the results with the algorithm provided in this file
if ( num_values % 2 == 0 )
{
int median1, median2;
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 1) / 2], &unsorted_data[num_values] );
median1 = unsorted_data[(num_values - 1) / 2];
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 0) / 2], &unsorted_data[num_values] );
median2 = unsorted_data[(num_values - 0) / 2];
result = arithmetic_mean<int,double>( median1, median2 );
}
else
{
int median;
std::nth_element( &unsorted_data[0], &unsorted_data[(num_values - 0) / 2], &unsorted_data[num_values] );
median = unsorted_data[(num_values - 0) / 2];
result = static_cast<int>(median);
}
std::cout << "done\nMedian according to STL function: " << std::setprecision( 12 ) << result << std::endl;
// NOTE: Since the STL functions only sorted the array, but not the linked list, the
// order of the linked list is still random and not pre-sorted.
//calculate the median using the algorithm provided in this file
std::cout << "Starting algorithm" << std::endl;
result = find_median<int,double>( l.first );
std::cout << "The calculated median is: " << std::setprecision( 12 ) << result << std::endl;
std::cout << "Cleaning up\n\n" << std::flush;
}
catch ( std::bad_alloc )
{
std::cerr << "Error: Unable to allocate sufficient memory!" << std::endl;
return -1;
}
return 0;
}
我已经用一百万个随机生成的元素成功地测试了我的代码,它几乎瞬间就找到了正确的中位数。