查找具有一定数量 steps/count 的起始值最小的数字

Question

我正在研究 Collatz 猜想程序。我有一个遵循简单规则的序列：

如果当前项是偶数：下一项是当前项的一半。
如果当前项是奇数：下一项是当前项的三倍加1。

我们继续这样做，直到达到一个并计算每个使用的术语。

例如：对于数字 17，我们有：

17 52 26 13 40 20 10 5 16 8 4 2 1

因此，计数为 13。

我特别需要帮助的是我需要找到计数超过 1234 的最小起始值（数字）。

即：

1 的计数为 1
2 的计数为 2
3 的计数为 8
4 的计数为 3
5 的计数为 6
6 有计数 9

......

18 有计数 21

......

97 的计数为 119
等

这是我认为有效但处理时间极长的代码。我该如何优化它才能更快地找到计数？有人建议我进行二进制搜索，但计数不是线性的，因此不起作用..

#include <stdio.h>

int main (void) {


    unsigned long long int num1 = 1;
    while (num1 <= 99999999999999999) {   

    unsigned long long int num = num1;
    int count = 1;
    while (num != 1) {

        if (num % 2 == 0) {
            num = (num/2);
            count++;
        }

        else {
            num = ((num*3) + 1);   
            count++;
        }

    }

    if (count > 1234) {
        printf("%llu\n", num1);
        break;
    }

    num1++;
    }

    return 0;
}

Answer 1

当您找到从任何特定值开始的序列长度时，您也会找到从您一路上看到的每个数字开始的序列长度。你应该记住你看到的所有小于 1000000 的数字的长度，这样你下次看到它们时就可以停止追踪了。

Answer 2

你怎么知道长度为 1234 的序列不会上升到不适合 long long 的值？

你可能不知道。因此，您需要一个 bignum 实现。

bignum Collatz 检查器有一个特别干净的实现 won the IOCCC（schweikhardt 的条目，咳咳）。您可以以此为起点。

Answer 3

产生长于 1234 步的 Collatz 序列的最小根是 133561134663。它达到最大值 319497287463520，这是一个 49 位值。因此，许多 C 库可以使用 <stdint.h> 中提供的 uint64_t（通常通过 <inttypes.h>）评估此序列。

不幸的是，有许多序列的起点较低，需要最多 71 位整数才能正确解析，例如从 110243094271 开始的 573 步序列。

最烦人的是，如果您使用 64 位无符号整数实现 Collatz 序列，而不检查溢出，您将计算出这些序列的错误长度；但是因为它们恰好有无趣的长度，所以这个bug被掩盖了，你仍然可以找到上述解决方案！

从本质上讲，如果这是一个 C 练习，它就是有问题的：即使是一个可怕的错误和有限的实施也能找到正确的答案。

我碰巧在 64 位 Linux 上使用 GCC-5.4.0（运行ning 在 Core i5-7200U CPU 笔记本电脑上），所以我可以使用unsigned __int128，128位无符号整数类型，用于Collatz序列。我还有 16 GiB 的 RAM，其中我使用了大约 12 GiB 来存储序列长度高达 130 亿（13×10⁹），用于奇数索引。

每当你找到从某个奇数 i 开始的序列的长度时，从 2i 开始的序列的长度是一个步长（但其他方面相同）。事实上，有一个从 2^ki 开始的序列正好是 k 步更长。如果您只是在寻找某个最小长度序列的最小起始值，则可以忽略所有偶数起始点，直到 k 足够小（基本上包含您需要的起始值范围调查）。

仅考虑偶数起始值会带来显着的加速（20% 或更多），但我在这里绘制了 "line"，而是检查每个起始值。

为了加快计算速度，我使用了GCC内置的__builtin_ctz()（__builtin_ctzll() for 64-bit unsigned long long）来查找连续最低有效位的个数零位。这样我就可以一次处理所有连续的 "even" 步，但正确计算步数。

我还将序列步进器分成两个并行的，一个处理状态适合 64 位变量的情况，另一个处理需要完整 128 位的情况。

在仅限64位的部分，如果当前状态的序列长度是可缓存的，我会查找它。如果它不为零，我将其添加到我们已经完成的步骤数中，然后得到结果。否则，我将该缓存条目索引和从根开始的步数添加到一个小的更新缓存中。这样，当我得到结果时，我不需要重复序列，并且可以简单地在一个紧密的循环中应用更新。在乘以三加一之前，我确实检查了该值是否溢出（6148914691236517205 溢出，到 2⁶⁴）；如果是，我切换到 128 位部分，然后在那里进行乘加运算。

在 128 位部分，我假设我们没有那么多少内存（在艾字节范围内），所以我不需要担心缓存部分。在我乘以三并加一（我使用 curr128 += curr128 + curr128 + 1 做的）之前，我确实检查状态没有溢出，只是为了确定。

在搭载Intel Core i5-7200U的笔记本电脑上，使用单核，计算出所有开始的Collatz序列的长度，用了大约4253秒的CPU时间（大约一小时十分钟）从 1 到 133561134663。

下一个更长的序列从 158294678119 开始，长 1243 步；为了做到这一点，我的笔记本电脑燃烧了 CPU 一个半小时。

然而，到了那个地步，代码已经变得不像OP了，而且非常可怕。特别是，为了混合 64-bit/128-bit 循环，我不得不求助于 goto；并且整个实现充满了 GCC 内置函数。我认为它几乎是只写代码；也就是说，如果我怀疑它的任何部分，我会从头开始重写它。当优化优先于可维护性和稳健性时，这是典型的最终结果。

无论如何，为了让其他人在 x86-64 上的 Linux 上使用 GCC 验证以上内容，这是可怕的代码，collatz.c:

#define  _POSIX_C_SOURCE  200809L
#define  _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>

#define  MAX_SEQ_LEN  2000

#define  OVER64       UINT64_C(6148914691236517205)

typedef  unsigned __int128   u128;
typedef  uint64_t            u64;

static inline u64  u128_lo(const u128  u) { return (u64)u;         }
static inline u64  u128_hi(const u128  u) { return (u64)(u >> 64); }

static inline u64 highest_bit_set_64(const u64  u)
{
    if (sizeof u == sizeof (unsigned int))
        return 63 - __builtin_clz(u);
    else
    if (sizeof u == sizeof (unsigned long))
        return 63 - __builtin_clzl(u);
    else
    if (sizeof u == sizeof (unsigned long long))
        return 63 - __builtin_clzll(u);
    else
        exit(EXIT_FAILURE);
}

static unsigned int highest_bit_set_128(u128 u)
{
    u64  hi = u128_hi(u);
    if (hi)
        return 64 + highest_bit_set_64(hi);
    else
        return highest_bit_set_64(u128_lo(u));
}

static inline unsigned int  ctz64(const u64  u)
{
    if (sizeof u <= sizeof (unsigned int))
        return __builtin_ctz(u);
    else
    if (sizeof u <= sizeof (unsigned long))
        return __builtin_ctzl(u);
    else
    if (sizeof u <= sizeof (unsigned long long))
        return __builtin_ctzll(u);
    else
        exit(EXIT_FAILURE);
}

static inline unsigned int  ctz128(const u128  u)
{
    if (sizeof u == sizeof (unsigned long long))
        return __builtin_ctzll(u);
    else {
        const u64  lo = u128_lo(u);
        if (lo)
            return ctz64(u);
        else
            return 64 + ctz64(u128_hi(u));
    }
}

static const char *s128(u128 u)
{
    static char  buffer[40];
    char        *p = buffer + sizeof buffer;

    *(--p) = '[=10=]';
    do {
        *(--p) = '0' + (u % 10);
        u /= 10;
    } while (u);

    return p;
}

static struct timespec  wall_started, wall_now;
static inline void      wall_start(void)
{
    clock_gettime(CLOCK_MONOTONIC, &wall_started);
}
static inline double    wall_seconds(void)
{
    clock_gettime(CLOCK_MONOTONIC, &wall_now);
    return (double)(wall_now.tv_sec - wall_started.tv_sec)
         + (double)(wall_now.tv_nsec - wall_started.tv_nsec) / 1000000000.0;
}

static struct timespec  cpu_elapsed;
static inline double    cpu_seconds(void)
{
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu_elapsed);
    return (double)cpu_elapsed.tv_sec + (double)cpu_elapsed.tv_nsec / 1000000000.0;
}

static int out_and_err = 0;
static void print(const char *fmt, ...)
{
    va_list  args;

    va_start(args, fmt);
    vdprintf(STDOUT_FILENO, fmt, args);
    va_end(args);

    if (out_and_err) {
        va_start(args, fmt);
        vdprintf(STDERR_FILENO, fmt, args);
        va_end(args);
    }
}

static size_t        cache_index[MAX_SEQ_LEN];
static unsigned int  cache_depth[MAX_SEQ_LEN];

static u64           seq_max = 0;  /* 2*seq_num */
static size_t        seq_num = 0;
static uint16_t     *seq_len = NULL;

static unsigned int  tip_bits = 0;
static u128          tip_max  = 0;

static inline unsigned int  collatz_length(const u64 root, u128 *maxto)
{
    u128          curr128, max128;
    u64           curr64, max64, lo;
    size_t        cached = 0, i;
    unsigned int  steps = 1, n;

    curr128 = max128 = root;
    curr64 = max64 = root;

any64bit:

    if (!(curr64 & 1)) {
        n = ctz64(curr64);
        curr64 >>= n;
        steps += n;
    }

    if (curr64 >= OVER64) {
        curr128 = curr64;
        goto odd128bit;
    }

odd64bit:

    if (curr64 <= 1)
        goto done;

    if (curr64 < seq_max) {
        i = curr64 >> 1;
        if (seq_len[i]) {
            steps += seq_len[i];
            goto done;
        }

        cache_index[cached] = i;
        cache_depth[cached] = steps;
        cached++;
    }

    curr64 += curr64 + curr64 + 1;
    steps++;

    if (max64 < curr64)
        max64 = curr64;

    goto any64bit;


any128bit:

    if (!(curr128 & 1)) {
        n = ctz128(curr128);
        curr128 >>= n;
        steps += n;

        if (!u128_hi(curr128)) {
            lo = u128_lo(curr128);
            if (lo <= 1)
                goto done;
            if (lo < OVER64) {
                curr64 = lo;
                goto odd64bit;
            }
        }
    }

odd128bit:

    if (u128_hi(curr128) >= OVER64) {
        print("Overflow at root %" PRIu64 ".\n", root);
        exit(EXIT_FAILURE);
    }

    curr128 += curr128 + curr128 + 1;
    steps++;

    if (max128 < curr128)
        max128 = curr128;

    goto any128bit;

done:

    if (cached >= MAX_SEQ_LEN) {
        print("Update cache overrun.\n");
        exit(EXIT_FAILURE);
    }

    while (cached-->0)
        seq_len[ cache_index[cached] ] = steps - cache_depth[cached];

    if (max128 < (u128)max64)
        max128 = max64;

    if (maxto)
        *maxto = max128;

    if (tip_max <= max128) {
        const unsigned int  maxbits = highest_bit_set_128(max128) + 1;
        tip_max = max128;
        if (tip_bits <= maxbits) {
            tip_bits = maxbits;
            print("%" PRIu64 " length %u (reaches %s - %u bits).\n", root, steps, s128(max128), maxbits);
        }
    }

    return steps;
}

int main(void)
{
    unsigned int  n, nmax = 0;
    u128          m;
    uint64_t      i = 1;

    wall_start();

    /* If standard output is redirected to a file, print everything to standard error also. */
    out_and_err = (isatty(STDERR_FILENO) && !isatty(STDOUT_FILENO));

    /* Try allocating up to 16 GiB of cache. */
    seq_num = (size_t)1024 * 1024 * 1024 * 16 / sizeof seq_len[0];
    while (1) {
        seq_len = malloc(seq_num * sizeof seq_len[0]);
        if (seq_len)
            break;
        seq_num = ( seq_num * 7 ) / 8;
    }
    seq_max = 2 * (uint64_t)seq_num;
    memset(seq_len,~0, seq_num * sizeof seq_len[0]);
    memset(seq_len, 0, seq_num * sizeof seq_len[0]);

    print("Allocated %zu entries (%.3f GiB)\n", seq_num, (double)(seq_num * sizeof seq_len[0]) / 1073741824.0);

    do {
        n = collatz_length(i, &m);
        if (n >= nmax) {
            const double cs = cpu_seconds();
            const double ws = wall_seconds();
            const char  *s = s128(m);
            nmax = n;
            print("%" PRIu64 " length %u (reaches %s) [%.3f seconds elapsed, %.3f seconds CPU time]\n", i, n, s, ws, cs);
        }

        i++;
    } while (nmax < MAX_SEQ_LEN);

    return EXIT_SUCCESS;
}

我使用 gcc -Wall -O2 collatz.c -lrt -o collatz && ./collatz > results.txt 编译并运行它。（优化确保例如 sizeof if 子句在编译时解析，并生成最少数量的慢速条件跳转，而不是使用条件移动。因此，上面的程序是 设计的 至少使用 -O2 进行编译。）它确实包含额外的代码，例如显示实时时间和使用的 CPU 时间。由于它们与手头的问题无关，它们确实可以帮助 TA 轻松检测是否有人尝试将此代码作为他们自己的作业提交。

虽然它正在工作，但我可以在另一个终端中使用 grep -gk 3 results.txt 查看到目前为止获得的结果，按序列长度递增排序；或 grep -gk 5 results.txt 查看根据序列中的峰（序列中的最大值）排序的结果。

查找具有一定数量 steps/count 的起始值最小的数字

Finding number with the smallest starting value that has a certain amount of steps/count

c

optimization

collatz