在分段筛中将 int 更改为 unsigned long long 时性能大幅下降

Massive performance slowdown when changing int to unsigned long long in segmented sieve

我对实现 Segmented SieveC 程序的性能感到困惑。我最初只使用 int 作为数据类型,但为了支持更大的数字,我决定切换到 unsigned long long。我预计由于开销会导致一些性能损失,但是当我尝试使用上限为 1000 亿的分段筛时,int 方法需要 23 秒,而 unsigned long long 甚至没有完成(或者至少需要太多让我久等了)

这是只有 int 数据类型的分段筛,N(上限)预设为 100B-

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>

int size = 5;
int current_slot = 0;
int* primes_arr;

void append(int data)
{
    if ((current_slot + 1) < size)
    {
        primes_arr[current_slot++] = data;
        return;
    }
    int* newarr = realloc(primes_arr, (size += 5) * sizeof(int));
    if (newarr != NULL)
    {
        newarr[current_slot++] = data;
        primes_arr = newarr;
    }
    else
    {
        printf("\nAn error occured while re-allocating memory\n");
        exit(1);
    }
}

// The following is just a standard approach to segmented sieve, nothing interesting

void simpleSieve(int limit)
{
    int p;
    if (primes_arr == NULL)
    {
        printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
        exit(1);
    }
    bool* mark = malloc((limit + 1) * sizeof(bool));
    if (mark != NULL)
    {
        memset(mark, true, sizeof(bool) * (limit + 1));
    }
    else
    {
        printf("\nAn error occured while allocating memory for mark in simpleSieve\n");
        exit(1);
    }

    for (p = 2; p * p < limit; p++)
    {
        if (mark[p])
        {
            for (int i = p * 2; i < limit; i += p)
            {
                mark[i] = false;
            }
        }
    }

    for (p = 2; p < limit; p++)
    {
        if (mark[p])
        {
            append(p);
            // printf("%d ", p);
        }
    }
}

void segmentedSieve(int n)
{
    int limit = (int)floor(sqrt(n)) + 1;
    simpleSieve(limit);

    int low = limit;
    int high = 2 * limit;

    while (low < n)
    {
        if (high >= n)
        {
            high = n;
        }
        bool* mark = malloc((limit + 1) * sizeof(bool));
        if (mark != NULL)
        {
            memset(mark, true, sizeof(bool) * (limit + 1));
        }
        else
        {
            printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
            exit(1);
        }
        for (int i = 0; i < current_slot; i++)
        {
            int lower_lim = (int)floor(low / primes_arr[i]) * primes_arr[i];
            if (lower_lim < low)
            {
                lower_lim += primes_arr[i];
            }
            for (int j = lower_lim; j < high; j += primes_arr[i])
            {
                mark[j - low] = false;
            }
        }

        for (int i = low; i < high; i++)
        {
            if (mark[i - low] == true)
            {
                // printf("%d ", i);
            }
        }
        low = low + limit;
        high = high + limit;
        free(mark);
    }
}

int main()
{
    primes_arr = malloc(size * sizeof(int));
    clock_t t0 = clock();
    segmentedSieve(100000000000);
    clock_t t1 = clock();
    double time_taken = (double) (t1 - t0) / CLOCKS_PER_SEC;
    printf("\nDone! Time taken: %f\n", time_taken);
    return 0;
}

这里的分段筛只有 unsigned long long 数据类型,N(上限)预设为 100B-

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>

unsigned long size = 5, current_slot = 0;
unsigned long long* primes_arr;

void append(unsigned long long data)
{
    if ((current_slot + 1) < size)
    {
        primes_arr[current_slot++] = data;
        return;
    }
    unsigned long long* newarr = realloc(primes_arr, (size += 5l) * sizeof(unsigned long long));
    if (newarr != NULL)
    {
        newarr[current_slot++] = data;
        primes_arr = newarr;
    }
    else
    {
        printf("\nAn error occured while re-allocating memory\n");
        exit(1);
    }
}

void simpleSieve(unsigned long limit)
{
    unsigned long long p;
    if (primes_arr == NULL)
    {
        printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
        exit(1);
    }
    bool* mark = malloc((limit + 1) * sizeof(bool));
    if (mark == NULL)
    {
        printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
        exit(1);
    }
    memset(mark, true, sizeof(bool) * (limit + 1));

    for (p = 2; p * p < limit; p++)
    {
        if (mark[p])
        {
            for (unsigned long long i = p * 2; i < limit; i += p)
            {
                mark[i] = false;
            }
        }
    }

    for (p = 2; p < limit; p++)
    {
        if (mark[p])
        {
            append(p);
            //printf("%llu ", p);
        }
    }
}

void segmentedSieve(unsigned long long n)
{
    unsigned long limit = (unsigned long)floor(sqrt(n)) + 1l;
    simpleSieve(limit);

    unsigned long low = limit;
    unsigned long high = 2 * limit;

    while (low < n)
    {
        if (high >= n)
        {
            high = n;
        }
        bool* mark = malloc((limit + 1) * sizeof(bool));
        if (mark == NULL)
        {
            printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
            exit(1);
        }
        memset(mark, true, sizeof(bool) * (limit + 1));
        for (unsigned long long i = 0; i < current_slot; i++)
        {
            unsigned long long lower_lim = (unsigned long)floor(low / primes_arr[i]) * primes_arr[i];
            if (lower_lim < low)
            {
                lower_lim += primes_arr[i];
            }
            for (unsigned long long j = lower_lim; j < high; j += primes_arr[i])
            {
                mark[j - low] = false;
            }
        }

        for (unsigned long long i = low; i < high; i++)
        {
            if (mark[i - low])
            {
                //printf("%llu ", i);
            }
        }
        low = low + limit;
        high = high + limit;
        free(mark);
    }
}

int main()
{
    primes_arr = malloc(size * sizeof(unsigned long long));
    clock_t t0 = clock();
    segmentedSieve(100000000000);
    clock_t t1 = clock();
    double time_taken = (double)(t1 - t0) / CLOCKS_PER_SEC;
    printf("\nDone! Time taken: %f\n", time_taken);
    return 0;
}

我没看到为什么会这样,我做错了什么吗?

编辑:我也意识到 int 无论如何都不能处理 1000 亿,但程序执行时没有错误,甚至打印了最终时间报告。同时,unsigned long long 程序甚至没有完成 两倍 的时间 int

另一方面,尝试将两者的上限设置为 1B,实际上 return 非常相似的结果。为什么?

1000亿十进制是17 4876 E800H十六进制。由于不适合int,'surplus'最高位被切掉,剩下4876 4800H,即1.215.752.192D,因此您实际上将限制设置为在 main.

中调用 segmentedSieve 时实际预期的 100 分之一

实际上,你很幸运没有那样产生负数。

但是请注意,由于有符号整数溢出,您已经进入了未定义行为 的领域。任何事情都可能发生,包括您的程序崩溃或关闭太阳...

但更有趣的是,请查看以下内容:

segmentedSieve(unsigned long long n)
{
    unsigned long low = limit;
    while (low < n)

这很关键。在许多系统上,longint 具有相同的大小(例如 64 位 linux 是一个例外)。如果 你的 系统也是这种情况,那么你会产生一个无限循环,因为 low 也会溢出,(只是这次不是未定义的行为)并且永远无法达到 n...

中存储的 1000 亿

您应该始终如一地使用 unsigned long long – 或者甚至更好,stdint.h.

中的 uint64_t