求最长公共子串问题的最小 space 复杂度是多少？

Question

const char *_longest_common_substr(const char *s1, const char *s2, int n, int m) {
   // assert n >= m
   int max = 0; // keep track of max length found
   int tmp = 0; // value is incremented till letters match
   int begin = 0;
   int try_begin = 0; // possible new begin index
   int end = 0; // rv = s2[begin : end - 1]
   
   for (int i = 0; i < n; i++) {
       tmp = 0;
       try_begin = 0;
       // s1 is scanned circularly
       for (int j = 0; j < m; j++) {
           int index = (j + i) % n; // current s1 index
           if (index < n && s1[index] == s2[j]) {
               if (tmp == 0)
                   try_begin = j;
               tmp++;
           } else {
               tmp = 0;
           }
           if (tmp > max) {
               max = tmp;
               begin = try_begin;
               end = j + 1;
           }
       }
   }
       
   int size = begin >= end ? 0 : end - begin;
   char *rv = malloc((size + 1) * sizeof(*rv));
   int i;
   for (i = 0; i < size; i++)
       rv[i] = s2[begin + i];
   rv[i] = '[=11=]';
   return rv;
}

const char *longest_common_substr(const char *s1, const char *s2, int n, int m) {
   if (n < m)
       return _longest_common_substr(s2, s1, m, n);
   return _longest_common_substr(s1, s2, n, m);
}

此代码是否正确查找最长公共子串？我不明白为什么在很多地方，比如wikipedia，他们用矩阵来解决问题，而在这个看似简单的解决方案中却不需要它，时间复杂度仍然是O (n*m) 而 space 复杂度为 O(1)。一个可能的测试是

int main() {
    const char *str1 = "identification";
    const char *str2 = "administration";
    
    printf("%s\n", longest_common_substr(str1, str2, strlen(str1), strlen(str2)));
    
}

输出为

ation

substring 以循环方式使用，因此输入

action
tionac

输出将是

tionac

无论如何我可以在字符串末尾添加两个不同的无效字符来删除这个属性

Answer 1

你的算法很奇怪。您只计算最长字符串上的循环子字符串。如果字符串长度相等，则只计算第一个字符串的循环子字符串。

例如：

对于 "bxa" 和 "ab" 你找到了解决方案 "ab" 因为你计算了 bxa
上的循环子串
但是对于 "bxa" 和 "zaby" 你不考虑 "bxa" 中的循环子串并且你没有找到 "ab" 作为解决方案
("abx", "bya") 和 ("bya", "abx") 有不同的解决方案，即使只是参数的位置发生变化。（如果两个参数的长度相等，则仅在第一个参数上计算循环子串）。

您必须：

根本不计算循环子字符串（wiki link 中描述的经典算法）或：
计算两个字符串的循环子串（这是与最长公共子串）
不同的算法

所以你的算法的复杂度无法与维基算法相提并论，因为它是解决不同问题的不同算法。

Answer 2

你的算法是循环匹配，与经典的LCS算法不同。然而，它可以简化为删除这个额外的匹配，也不需要使 s1 成为更长的字符串：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *longest_common_substr(const char *s1, const char *s2, int n1, int n2) {
    int begin = 0;  // offset of lcs in s2
    int max = 0;    // length of lcs

    for (int i = 0; i < n1; i++) {
        int tmp = 0;        // count of matched letters
        int index = i;      // current s1 index
        for (int j = 0; j < n2; j++, index++) {
            if (index == n1) {
                index = 0;
                tmp = 0;    // do not match circularly
            }
            if (s1[index] == s2[j]) {
                tmp++;
                if (tmp > max) {
                    max = tmp;
                    begin = j - (tmp - 1);
                }
            } else {
                tmp = 0;
            }
        }
    }

    char *rv = malloc(sizeof(*rv) * (max + 1));
    memcpy(rv, s2 + begin, max);
    rv[max] = '[=10=]';
    return rv;
}

int main(int argc, char *argv[]) {
    if (argc == 3) {
        const char *s1 = argv[1];
        const char *s2 = argv[2];
        char *p = longest_common_substr(s1, s2, strlen(s1), strlen(s2));
        if (p) {
            printf("lcs('%s','%s') -> '%s'\n", s1, s2, p);
        }
    }
    return 0;
}

上面确实有 O(n1*n2) 的时间复杂度和 O(1)[=18 的 space 复杂度=].实现更好的时间复杂度通常是一个理想的目标，这可以解释为什么维基百科文章有一个针对更复杂情况的次优示例。

求最长公共子串问题的最小 space 复杂度是多少？

What is the minimum space complexity in problem of find longest common substring?

c

algorithm

time-complexity