优化排序数组中的二进制搜索查找出现次数

Optimize Binary Search in sorted array find number of occurences

我正在尝试执行尽可能少的操作来查找数组中某个元素的出现次数。如果可能,甚至节省 1 个操作。到目前为止,这是我所知道的最好的二进制搜索版本。 我不能使用向量或任何其他 std:: 函数

int modifiedbinsearch_low(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low + (high-low) /2;

    if(key >  arr[mid] ) { modifiedbinsearch_low(arr,mid + 1 , high,key);  } 
    else  { modifiedbinsearch_low(arr,low,mid,key);  }  
}

int modifiedbinsearch_high(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low + (high-low) /2;

    if(key <  arr[mid] ) { modifiedbinsearch_high(arr,low,mid,key);  } 
    else  { modifiedbinsearch_high(arr,mid+1,high,key);  } 

} 

int low = modifiedbinsearch_low( ...)
int high = modifiedbinsearch_high( ...)

此版本将两个功能合并为一个,但花费的时间几乎翻了一番。我想知道这个想法对它变得最快是有好处的,但实现是错误的。

#include<stdio.h>
int binarysearch(int a[],int n,int k,bool searchfirst){
    int result=-1;
    int low=0,high=n-1;
    while(low<=high){
        int mid=(low+high)/2;
        if(a[mid]==k)  {
              result=mid; 
           if(searchfirst)
              high=mid-1; 
            else
              low=mid+1;
    }
    else if(k<a[mid])  high=mid-1;
    else low=mid+1;
    }
    return result;
}

int main(){
    int a[]={1,1,1,2,2,3,3,3,6,6,6,6,6,7,7};
    int n=sizeof(a)/sizeof(a[0]);
    int x=6;
    int firstindex=binarysearch(a,n,x,true);
    printf("%d\n",firstindex);
    if(firstindex==-1){
        printf("elment not found in the array:\n ");
    }
    else {
        int lastindex=binarysearch(a,n,x,false);
        printf("%d\n",lastindex);
        printf("count is = %d", lastindex-firstindex+1);
    }

}

较短的版本

      int  binmin(int a[], int start, int end,int val ) {
         if(start<end) {
            int mid = (start+end)/2;
            if(a[mid]>=val) 
                binmin(a,start,mid-1,val);
            else if(a[mid]<val)
                binmin(a,mid+1,end,val);

      }
      else if(start>end)
           return start;
}

这是一个性能问题。在主 while 循环中,当您找到目标值时,您并没有跳出循环。

while(low<=high){
    int mid=(low+high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
       if(searchfirst)
          high=mid-1; 
        else
          low=mid+1;
}

但这不是唯一的问题。 searchfirst 值不应该是决定调整高低的因素。在经典的二进制搜索中,根据 a[mid] 与 k` 的比较来调整 highlow 参数。可能更接近于此:

while(low<=high) {
    int mid=(low+high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
          break;
    }
    if(a[mid] > k)
        high=mid-1; 
    else
       low=mid+1;
}

你的想法是对的。二分查找查找元素。但是让我建议在初始二进制搜索之后更简单的解决方案是“向左扫描”和“向右扫描”以计算重复元素。

告诉我你对此的看法:

int binarySearch(int* arr, int start, int end, int value) {
    while (start <= end) {
        int mid = (start + end) / 2;
        if (arr[mid] == value) {
            return mid;
        }
        start = (arr[mid] < value) ? (mid + 1) : start;
        end = (arr[mid] > value) ? (mid - 1) : end;
    }
    return -1;
}

int countSide(int* arr, int length, int index, int value, int step) {
    int count = 0;
    while (index >= 0 && index <= (length - 1) && arr[index] == value) {
        count++;
        index += step;
    }
    return count;
}

int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = binarySearch(a, 0, n - 1, x);
    printf("%d\n", firstindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = countSide(a, n, firstindex, x, -1);
        count += countSide(a, n, firstindex, x, 1);
        count--; // because we counted the middle element twice
        printf("count is = %d\n", count);
    }
}

已更新

这里有一个解决方案,它执行两次二进制搜索以找到数组中目标值的下限和上限,并简单地测量索引之间的距离以获得计数:

int bound(int* arr, int length, int value, bool isUpperBound) {

    int best = -1;
    int start = 0;
    int end = start + length - 1;

    while (start <= end) {
        int mid = (start + end) / 2;
        if (arr[mid] == value) {
            best = mid;

            if (isUpperBound) {
                start = mid + 1;
            }
            else {
                end = mid - 1;
            }
        }
        else if (arr[mid] < value) {
            start = mid + 1;
        }
        else if (arr[mid] > value) {
            end = mid - 1;
        }
    }
    return best;
}


int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = bound(a, n, x, false);
    int lastindex = bound(a, n, x, true);
    printf("%d - %d\n", firstindex, lastindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = lastindex-firstindex + 1;
        printf("count is = %d\n", count);
    }
}