在分段筛中将 int 更改为 unsigned long long 时性能大幅下降
Massive performance slowdown when changing int to unsigned long long in segmented sieve
我对实现 Segmented Sieve 的 C
程序的性能感到困惑。我最初只使用 int
作为数据类型,但为了支持更大的数字,我决定切换到 unsigned long long
。我预计由于开销会导致一些性能损失,但是当我尝试使用上限为 1000 亿的分段筛时,int
方法需要 23 秒,而 unsigned long long
甚至没有完成(或者至少需要太多让我久等了)
这是只有 int
数据类型的分段筛,N(上限)预设为 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
int size = 5;
int current_slot = 0;
int* primes_arr;
void append(int data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
int* newarr = realloc(primes_arr, (size += 5) * sizeof(int));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
// The following is just a standard approach to segmented sieve, nothing interesting
void simpleSieve(int limit)
{
int p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in simpleSieve\n");
exit(1);
}
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (int i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
// printf("%d ", p);
}
}
}
void segmentedSieve(int n)
{
int limit = (int)floor(sqrt(n)) + 1;
simpleSieve(limit);
int low = limit;
int high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
for (int i = 0; i < current_slot; i++)
{
int lower_lim = (int)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (int j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (int i = low; i < high; i++)
{
if (mark[i - low] == true)
{
// printf("%d ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(int));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double) (t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
这里的分段筛只有 unsigned long long
数据类型,N(上限)预设为 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
unsigned long size = 5, current_slot = 0;
unsigned long long* primes_arr;
void append(unsigned long long data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
unsigned long long* newarr = realloc(primes_arr, (size += 5l) * sizeof(unsigned long long));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
void simpleSieve(unsigned long limit)
{
unsigned long long p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (unsigned long long i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
//printf("%llu ", p);
}
}
}
void segmentedSieve(unsigned long long n)
{
unsigned long limit = (unsigned long)floor(sqrt(n)) + 1l;
simpleSieve(limit);
unsigned long low = limit;
unsigned long high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (unsigned long long i = 0; i < current_slot; i++)
{
unsigned long long lower_lim = (unsigned long)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (unsigned long long j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (unsigned long long i = low; i < high; i++)
{
if (mark[i - low])
{
//printf("%llu ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(unsigned long long));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double)(t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
我没看到为什么会这样,我做错了什么吗?
编辑:我也意识到 int
无论如何都不能处理 1000 亿,但程序执行时没有错误,甚至打印了最终时间报告。同时,unsigned long long
程序甚至没有完成 两倍 的时间 int
。
另一方面,尝试将两者的上限设置为 1B,实际上 return 非常相似的结果。为什么?
1000亿十进制是17 4876 E800H十六进制。由于不适合int
,'surplus'最高位被切掉,剩下4876 4800H,即1.215.752.192D,因此您实际上将限制设置为在 main
.
中调用 segmentedSieve
时实际预期的 100 分之一
实际上,你很幸运没有那样产生负数。
但是请注意,由于有符号整数溢出,您已经进入了未定义行为 的领域。任何事情都可能发生,包括您的程序崩溃或关闭太阳...
但更有趣的是,请查看以下内容:
segmentedSieve(unsigned long long n)
{
unsigned long low = limit;
while (low < n)
这很关键。在许多系统上,long
与 int
具有相同的大小(例如 64 位 linux 是一个例外)。如果 你的 系统也是这种情况,那么你会产生一个无限循环,因为 low
也会溢出,(只是这次不是未定义的行为)并且永远无法达到 n
...
中存储的 1000 亿
您应该始终如一地使用 unsigned long long
– 或者甚至更好,stdint.h
.
中的 uint64_t
我对实现 Segmented Sieve 的 C
程序的性能感到困惑。我最初只使用 int
作为数据类型,但为了支持更大的数字,我决定切换到 unsigned long long
。我预计由于开销会导致一些性能损失,但是当我尝试使用上限为 1000 亿的分段筛时,int
方法需要 23 秒,而 unsigned long long
甚至没有完成(或者至少需要太多让我久等了)
这是只有 int
数据类型的分段筛,N(上限)预设为 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
int size = 5;
int current_slot = 0;
int* primes_arr;
void append(int data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
int* newarr = realloc(primes_arr, (size += 5) * sizeof(int));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
// The following is just a standard approach to segmented sieve, nothing interesting
void simpleSieve(int limit)
{
int p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in simpleSieve\n");
exit(1);
}
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (int i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
// printf("%d ", p);
}
}
}
void segmentedSieve(int n)
{
int limit = (int)floor(sqrt(n)) + 1;
simpleSieve(limit);
int low = limit;
int high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
for (int i = 0; i < current_slot; i++)
{
int lower_lim = (int)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (int j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (int i = low; i < high; i++)
{
if (mark[i - low] == true)
{
// printf("%d ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(int));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double) (t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
这里的分段筛只有 unsigned long long
数据类型,N(上限)预设为 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
unsigned long size = 5, current_slot = 0;
unsigned long long* primes_arr;
void append(unsigned long long data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
unsigned long long* newarr = realloc(primes_arr, (size += 5l) * sizeof(unsigned long long));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
void simpleSieve(unsigned long limit)
{
unsigned long long p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (unsigned long long i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
//printf("%llu ", p);
}
}
}
void segmentedSieve(unsigned long long n)
{
unsigned long limit = (unsigned long)floor(sqrt(n)) + 1l;
simpleSieve(limit);
unsigned long low = limit;
unsigned long high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (unsigned long long i = 0; i < current_slot; i++)
{
unsigned long long lower_lim = (unsigned long)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (unsigned long long j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (unsigned long long i = low; i < high; i++)
{
if (mark[i - low])
{
//printf("%llu ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(unsigned long long));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double)(t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
我没看到为什么会这样,我做错了什么吗?
编辑:我也意识到 int
无论如何都不能处理 1000 亿,但程序执行时没有错误,甚至打印了最终时间报告。同时,unsigned long long
程序甚至没有完成 两倍 的时间 int
。
另一方面,尝试将两者的上限设置为 1B,实际上 return 非常相似的结果。为什么?
1000亿十进制是17 4876 E800H十六进制。由于不适合int
,'surplus'最高位被切掉,剩下4876 4800H,即1.215.752.192D,因此您实际上将限制设置为在 main
.
segmentedSieve
时实际预期的 100 分之一
实际上,你很幸运没有那样产生负数。
但是请注意,由于有符号整数溢出,您已经进入了未定义行为 的领域。任何事情都可能发生,包括您的程序崩溃或关闭太阳...
但更有趣的是,请查看以下内容:
segmentedSieve(unsigned long long n)
{
unsigned long low = limit;
while (low < n)
这很关键。在许多系统上,long
与 int
具有相同的大小(例如 64 位 linux 是一个例外)。如果 你的 系统也是这种情况,那么你会产生一个无限循环,因为 low
也会溢出,(只是这次不是未定义的行为)并且永远无法达到 n
...
您应该始终如一地使用 unsigned long long
– 或者甚至更好,stdint.h
.
uint64_t