密码算法求解器分解的效率

Efficiency of Cryptarithmetic Algorithm solver decomposition

问题如下:给定 "ABC+DEF=GHI" 格式字符串,其中 A、B、C 等代表唯一数字,找到给出最大 GHI 的表达式。例:输入字符串为AAB+AAB=AAB,则无解。如果改为 AAA + BBB = AAA,则解为 999 + 000 = 999。另一个示例字符串:ABC + CBA = GGG,结果为 => 543 + 345 = 888.

我很容易排除不可能的情况。我想到的算法是一种蛮力,它只是先尝试​​最大化 rhs。然而,我的问题是这样做得很快,还要注意独特的数字。解决这个问题的有效方法是什么?

注意:我希望以单线程方法解决这个问题,我当前的问题是检测 "assign_value" 函数中是否使用了唯一数字。也许有更好的赋值方法?

编辑:根据 smci 的建议,这就是我最终想要实现的目标:ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828 -- 一个系统不仅可以处理我在开头提供的形式的字符串(3 位数字 + 3 位数字 = 3 位数字),还可以处理那些。然而,从简单开始并使用我给出的格式解决方案并没有什么坏处:)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_EXPRESSION_SIZE 11 + 1
#define MAX_VARIABLES 9

int variables_read[MAX_VARIABLES] = { 0 };

struct variable {
    int coefficient;
    int* ptr;
    int side;
    int canhavezero;
    unsigned value_max;
};

typedef struct variable Variable;

struct equation {
    Variable* variables[9]; // max
    unsigned distinct_on_rhs;
    unsigned var_count;
};

typedef struct equation Equation;

int int_pow(int n, int k) {
    int res = 1;
    for(int i = 0; i < k; ++i)
        res *= n;
    return res;
}

void AddVariable(Equation* E, Variable* V) {
    E->variables[E->var_count++] = V;
}

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int assign_value(Equation* E, int pos, int* values) {
    if(!E->variables[pos]->value_count) {
        if(pos < 0)
            return 2;
        // if no possible values left, reset this, but take one value count from the closest variable
        E->variables[pos - 1]->value_count--;
        E->variables[pos]->value_count = E->variables[pos]->value_max;
        return 0;
    }
    int i;
    for(i = 9; i >= 0 && values[i] == -1; --i)
    printf("Assigning %d to %c\n", E->variables[pos]->value_set[E->variables[pos]->value_count - 1], 'A' + (E->variables[pos]->ptr - E->variables[0]->ptr));
    *(E->variables[pos]->ptr) = values[i];
    values[i] = -1; // we have unique numbers
    return 0;
}

int isSolved(Equation E) {
    int sum = 0, coeff = 0;
    printf("Trying...\n");
    for(int i = 0; i < E.var_count; ++i) {
        coeff = E.variables[i]->coefficient * (*E.variables[i]->ptr);
        printf("%d ", *E.variables[i]->ptr);
        if(E.variables[i]->side)
            coeff *= -1;
        sum += coeff;
    }
    printf("\nSum was %d\n", sum);
    return !sum;
}

char* evaluate(char* expression) {
    char* res;
    // check for impossible cases first
    if(IsImpossible(expression)) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
        return res;
    }
    res = (char *) malloc(sizeof(char) * MAX_EXPRESSION_SIZE);
    // now try to find solutions, first describe the given characters as equations
    Equation E;
    E.var_count = 0;
    E.distinct_on_rhs = 0;
    int side_mode = 0, powcounter = 0;
    int a = -1, b = -1, c = -1, d = -1, e = -1, f = -1, g = -1, h = -1, i = -1;
    int* max_variables[MAX_VARIABLES] = { &a, &b, &c, &d, &e, &f, &g, &h, &i };
    for(int j = 0; j < MAX_EXPRESSION_SIZE - 1; ++j) {
        if(expression[j] == '+')
            continue;
        if(expression[j] == '=') {
            side_mode = 1;
            continue;
        }
        Variable* V = (Variable *) malloc(sizeof(Variable));
        // we know we always get 3 digit numbers but we can easily change if we need to
        V->coefficient = int_pow(10, 2 - (powcounter % 3));
        V->ptr = max_variables[expression[j] - 'A'];
        V->side = side_mode;
        E.distinct_on_rhs += side_mode && !variables_read[expression[j] - 'A'];
        if(!(powcounter % 3)) { // beginning of a number
            V->value_count = 9;
            V->value_max = 9;
            V->canhavezero = 0;
        }
        else {
            V->value_count = 10;
            V->value_max = 10;
            V->canhavezero = 1;
        }
        AddVariable(&E, V);
        variables_read[expression[j] - 'A'] = 1;
        ++powcounter;
    }
    for(int j = 0; j < E.var_count; ++j)
        printf("%d %c %d\n", E.variables[j]->coefficient, 'A' + (E.variables[j]->ptr - max_variables[0]), E.variables[j]->side);
    // we got a representaion of the equation, now try to solve it
    int solved = 0;
    // O(9^N), where N is number of distinct variables.
    // An optimization we can do is, we first assign possible max values to rhs number, then go down. We need max number.
    printf("Distincts: %d\n", E.distinct_on_rhs);
    do {
        // try to assign values to all variables and try if it solves the equation
        // but first try to assign rhs as max as possible
        int values[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
        int temp = E.var_count - E.distinct_on_rhs;
        while(temp < E.var_count) {
            solved = assign_value(&E, temp, values);
            ++temp;
        }
        for(int j = E.var_count - 1 - E.distinct_on_rhs; j >= 0; --j)
            solved = assign_value(&E, j, values);
        if(solved) // can return no solution
            break;
        printf("Solving...\n");
        solved = isSolved(E);
        system("PAUSE");
    } while(!solved);
    if(solved == 2) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
    }
    else {

    }
    return res;
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    do {
        printf("Enter the formula: ");
        scanf("%s", expression);
        char* res = evaluate(expression);
        printf("%s\n", res);
        free(res);
    } while(expression[0] != '-');
    return 0;
}

我将从结果开始。没有那么多不同的案例:

  • AAA
  • AABABABAA
  • ABC

所有其他情况都可以通过重命名变量来简化为这些情况。 ABC + CBA = GGG 会变成 DBC + CBD = AAA.

那么你有

  • 单变量情况的 10 种可能的解决方案 AAA
  • 90 (10*9) 对于两个变量案例
  • 720 (10*9*8) 对于三变量情况

假设任何地方都允许零。如果没有,您可以过滤掉那些不允许的。

这会设置等式右侧的变量。每个仅出现在左侧的变量都会添加可能的解决方案。 B 增加 9 倍,C 增加 8 倍,D 增加 7 等等。

最 "efficient" 的解决方案将获取任务的所有知识并简单地打印结果。所以问题是可以对多少条件进行编码,在哪里以及需要什么样的灵活性。

另一种方法是分别查看测试用例的生成和评估。

一个简单的递归函数就可以生成10! (362880) 个唯一数字的测试用例。

unsigned long long count = 0;
unsigned long long sol = 0;

void evaluate(int object[]) {
  count++;
  int ABC = object[0] * 100 + object[1] * 10 + object[2];
  int DEF = object[3] * 100 + object[4] * 10 + object[5];
  int GHI = object[6] * 100 + object[7] * 10 + object[8];
  if (ABC + DEF == GHI) {
    printf("%4llu %03d + %03d = %03d\n", ++sol, ABC,DEF,GHI);
  }
}

void form_combos(int pool[], size_t pool_count, int object[],
    size_t object_count, size_t object_count_max) {
  if (object_count >= object_count_max) {
    evaluate(object);
    return;
  }
  assert(pool_count > 0);
  int *pool_end = pool + pool_count - 1;
  for (size_t p = 0; p < pool_count; p++) {
    int sample = pool[p];  // take one out
    pool[p] = *pool_end;   // replace it with the end
    object[object_count] = sample;
    form_combos(pool, pool_count - 1, object, object_count + 1,
        object_count_max);
    pool[p] = sample;      // restore pool item
  }
}

int main() {
  int pool[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
  size_t pool_size = sizeof pool / sizeof pool[0];
  #define object_count 9
  int object[object_count];
  form_combos(pool, pool_size, object, 0, object_count);
  printf("Evaluate() iterations %llu\n", count);
}

输出

   1 091 + 762 = 853
   2 091 + 763 = 854
   3 091 + 735 = 826
...
1726 874 + 061 = 935
1727 875 + 046 = 921
1728 876 + 045 = 921
Evaluate() iterations 3628800

这种方法的优点在于,如果现在找到任务

ABC*ABC + DEF*DEF == GHI*GHI

仅更改 2 行代码:

  if (ABC*ABC + DEF*DEF == GHI*GHI) {
    printf("%4llu sqr(%03d) + sqr(%03d) = sqr(%03d)\n", ++sol, ABC,DEF,GHI);
  }

结果

   1 sqr(534) + sqr(712) = sqr(890)
   2 sqr(546) + sqr(728) = sqr(910)
   3 sqr(712) + sqr(534) = sqr(890)
   4 sqr(728) + sqr(546) = sqr(910)
Evaluate() iterations 3628800

好的,所以对于一个简单的解决方案(建立泛化的基础,到目前为止它只适用于格式 <3 digit number> + <3 digit number> = <3 digit number>)灵感来自@ chux 和@alain 的建议是以下代码。它真正在 O(10^N) 上运行,其中 N 是存在的不同数字数,或者如果您想这样称呼它们,则为变量。我会看看我是否可以进一步概括这一点。

请注意,这是针对寻找最大 rhs 的初始问题。也要考虑到这一点。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_DIGITS 10
#define MAX_VARIABLES 9
#define MAX_EXPRESSION_SIZE 11

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int ArePointersAssigned(int*** pointers) {
    for(int i = 0; i < MAX_VARIABLES; ++i) {
        if(**pointers[i] == -1)
            return 0;
    }
    return 1;
}

int evaluate(int*** pointers) {
    int ABC = *(*pointers[0]) * 100 + *(*pointers[1]) * 10 + *(*pointers[2]);
    int DEF = *(*pointers[3]) * 100 + *(*pointers[4]) * 10 + *(*pointers[5]);
    int GHI = *(*pointers[6]) * 100 + *(*pointers[7]) * 10 + *(*pointers[8]);
    if (ABC + DEF == GHI) { // since we use dfs, if this is a solution simply return it
        //printf("%d + %d = %d\n", ABC, DEF, GHI);
        return 1;
    }
    return 0;
}

// use the solved pointer to escape recursion early
// check_end checks if we reached 6 for the 2nd time, if it's first time we ignore (because it's start state)
void form_combos(int pool[], int pool_count, int object_count, int*** pointers, int* solved) {
    if(object_count == MAX_DIGITS - 1)
        object_count = 0;
    if(*solved) // if a branch solved this, escape recursion
        return;
    if (ArePointersAssigned(pointers)) { // that means we got a full equation set
        *solved = evaluate(pointers);
        if(*solved)
            return;
    }
    int *pool_end = pool + pool_count - 1;
    for (int p = pool_count - 1; p >= 0 && !*solved; p--) {
        int sample = pool[p];  // take one out
        pool[p] = *pool_end;   // replace it with the end
        int temp = **pointers[object_count];
        if(**pointers[object_count] == -1)
            **pointers[object_count] = sample;
        form_combos(pool, pool_count - 1, object_count + 1, pointers, solved);
        pool[p] = sample;      // restore pool item
        if(!*solved)
            **pointers[object_count] = temp;
    }
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    printf("Enter the formula: ");
    scanf("%s", expression);
    while(expression[0] != '-') {
        if(IsImpossible(expression))
            printf("No solution!\n");
        else {
            int digits[MAX_DIGITS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
            int object[MAX_VARIABLES] = { -1, -1, -1, -1, -1, -1, -1, -1, -1 }; // stack for dfs
            int *A = &object[0], *B = &object[1], *C = &object[2], 
                *D = &object[3], *E = &object[4], *F = &object[5], 
                *G = &object[6], *H = &object[7], *I = &object[8];
            // set same pointers
            int** pointers[MAX_VARIABLES] = { &A, &B, &C, &D, &E, &F, &G, &H, &I };

            // analyze the equation
            int var = 0;
            for(int p = 0; p < MAX_EXPRESSION_SIZE; ++p) {
                if(expression[p] >= 'A' && expression[p] <= 'I') {
                    *pointers[var++] = &object[expression[p] - 'A']; // link same pointers
                }
            }
            int solved = 0, check_end = 0;
            form_combos(digits, MAX_DIGITS, MAX_DIGITS - 4, pointers, &solved);
            if(!solved) // it can be unsolvable still
                printf("No solution!\n");
            else
                printf("%d%d%d + %d%d%d = %d%d%d\n", *A, *B, *C, *D, *E, *F, *G, *H, *I);
        }
        printf("Enter the formula: ");
        scanf("%s", expression);
    }
    return 0;
}