C语言求两个字符串中最长的公共词?

Finding the longest common word in two strings in C Language?

一段时间以来,我一直无法找到两个字符串中最长的常用词。首先我想到用 "isspace" 函数来做,但不知道如何找到一个常用词。然后 "strcmp" 出现在我的脑海中,但到目前为止我只能比较两个字符串。我在想一些方法来合并 strcmp 和 isspace 以找到不同的单词,然后使用临时值找到最长的单词,但我想不出这样做的正确代码。

   #include <stdio.h>

   int strcmp(char s[],char t[]);


 void main()
 {

    char s[20],t[20];
   printf("Type in a string s.\n");
   gets(s);
    printf("Type in a string t.\n");
    gets( t );
   printf("The result of comparison=%d\n",strcmp(s,t));

    return 0;
      }


   int strcmp(char s[],char t[])
  {
   int i;
   for(i=0;s[i]==t[i];i++)
   if(s[i]=='[=10=]')
   return( 0 );
   return(s[i]-t[i]);
   }

请帮我解决这个问题。欢迎和赞赏所有想法(和代码)。提前致谢!

编辑::

我已经和这个问题斗争了一段时间,我想我有解决方案,但这是一种非常严格的方法。该程序有一个错误,可能与数组 "ptrArray1" 有关,但我无法修复它。

     #include <stdio.h>
     #include <string.h>
     #include <stdlib.h>


     int returnArrayOfWords (char* str4Parsing, char* arrayParsed[])
       {
// returns the length of array
char seps[]   = " \t\n"; // separators
char *token = NULL;
char *next_token = NULL;
int i = 0;

// Establish string and get the first token:
token = strtok( str4Parsing, seps);

// While there are tokens in "str4Parsing"
while ((token != NULL))
{
    // Get next token:
    arrayParsed[i] = token;
    //printf( " %s\n", arrayParsed[i] );//to be commented
    token = strtok( NULL, seps);
    i++;
}
return i;
      }

      void printArr(char *arr[], int n)
     {
int i;
for ( i = 0; i < n; i++)
{
    printf("Element %d is %s \n", i, arr[i]);
}
   }


     void findLargestWord(char *ptrArray1[], int sizeArr1, char              *ptrArray2[], int sizeArr2)
{
int maxLength = 0;
char *wordMaxLength = NULL ;
int i = 0, j = 0;
char *w1 = NULL, *w2 = NULL; /*pointers*/
int currLength1 = 0, currLength2 = 0 ;

//printArr(&ptrArray1[0], sizeArr1);
//printArr(&ptrArray2[0], sizeArr2);

for (i = 0; i < sizeArr1; i++)
    {
    // to find the largest word in the array
    w1 = (ptrArray1[i]); // value of address (ptrArray1 + i)
    currLength1 = strlen(w1);
    //printf("The word from the first string is: %s and its length is : %d \n", w1, currLength1); // check point

    for (j = 0; j < sizeArr2; j++)
        {
        w2 = (ptrArray2[j]); // value of address (ptrArray2 + j)
        currLength2 = strlen(w2);
        //printf("The word from the second string is : %s and its length is : %d \n", w2, currLength2); // check point

        if (strcoll(w1, w2) == 0 && currLength1 == currLength2)
            // compares the strings
            {
            if (currLength2 >= maxLength)
                // in the variable maxLength -> the length of the longest word
                {
                    maxLength = currLength2;
                    wordMaxLength = w2;
                    printf("The largest word for now is : %s and its length is : %d \n", wordMaxLength, maxLength); // check point
                }
            }
        }
    }
printf("The largest word is: %s \n", wordMaxLength);
printf("Its length is: %d \n", maxLength);
    }



     int main ()
      {
int n = 80; /*max number of words in string*/
char arrS1[80], arrS2[80];
char *ptrArray1 = NULL, *ptrArray2 = NULL;
int sizeArr1 = 0, sizeArr2 = 0;

// to allocate memory:
ptrArray1 = (char*)calloc(80, sizeof(char));
if(ptrArray1 == NULL)
{
    printf("Error! Memory for Pointer 1 is not allocated.");
    exit(0);
}

ptrArray2 = (char*)calloc(80, sizeof(char));
if(ptrArray2 == NULL)
{
    printf("Error! Memory for Pointer 2 is not allocated.");
    exit(0);
}

printf("Type your first string: ");
fgets(arrS1, 80, stdin);
sizeArr1 = returnArrayOfWords (arrS1, &ptrArray1); // sizeArr1 = number of elements in array 1

printf("Type your second string: ");
fgets(arrS2, 80, stdin);
sizeArr2 = returnArrayOfWords (arrS2, &ptrArray2); // sizeArr2 = number of elements in array 2

findLargestWord(&ptrArray1, sizeArr1, &ptrArray2, sizeArr2);

free(ptrArray1);
free(ptrArray2);
return 0;
          }

我也尝试使用后两个已发布的解决方案,但我在使用它们时遇到了问题,如下所述。

欢迎对我的代码提供任何帮助,用后一种解决方案解决我的问题或提出新的解决方案。提前谢谢大家!

PS。如果我的代码放置不当,我很抱歉。我还是不太会使用展示位置。

此示例打印给定两个输入字符串的最长子字符串。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int *lcommon(char *str1, char *str2) {
    int strlen1 = (unsigned) strlen(str1);
    int strlen2 = (unsigned) strlen(str2);
    int i, j, k;
    int longest = 0;
    int **ptr = malloc(2 * sizeof(int *));
    static int *ret;
    ret = calloc((unsigned) strlen1 + 1, sizeof(int));
    for (i = 0; i < 2; i++)
        ptr[i] = calloc((unsigned) strlen2, sizeof(int));

    k = 0;
    for (i = 0; i < strlen1; i++) {
        memcpy(ptr[0], ptr[1], strlen2 * sizeof(int));
        for (j = 0; j < strlen2; j++) {
            if (str1[i] == str2[j]) {
                if (i == 0 || j == 0) {
                    ptr[1][j] = 1;
                } else {
                    ptr[1][j] = ptr[0][j-1] + 1;
                }
                if (ptr[1][j] > longest) {
                    longest = ptr[1][j];
                    k = 0;
                    ret[k++] = longest;
                }
                if (ptr[1][j] == longest) {
                    ret[k++] = i;
                    ret[k] = -1;
                }
            } else {
                ptr[1][j] = 0;
            }
        }
    }
    for (i = 0; i < 2; i++)
        free(ptr[i]);
    free(ptr);
    ret[0] = longest;
    return ret;
}

int main(int argc, char *argv[]) {
    int i, longest, *ret;

    if (argc != 3) {
        printf("usage: longest-common-substring string1 string2\n");
        exit(1);
    }

    ret = lcommon(argv[1], argv[2]);
    if ((longest = ret[0]) == 0) {
        printf("There is no common substring\n");
        exit(2);
    }

    i = 0;
    while (ret[++i] != -1) {
        printf("%.*s\n", longest, &argv[1][ret[i]-longest+1]);
    }

    exit(0);
}

测试

$ ./a.out computerprogramming javaprogrammer
programm

您可以阅读有关该问题的更多信息 here

您还可以使用交互式程序,在控制台中写入字符串:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int *lcommon(char *str1, char *str2) {
    int strlen1 = (unsigned) strlen(str1);
    int strlen2 = (unsigned) strlen(str2);
    int i, j, k;
    int longest = 0;
    int **ptr = malloc(2 * sizeof(int *));
    static int *ret;
    ret = calloc((unsigned) strlen1 + 1, sizeof(int));
    for (i = 0; i < 2; i++)
        ptr[i] = calloc((unsigned) strlen2, sizeof(int));

    k = 0;
    for (i = 0; i < strlen1; i++) {
        memcpy(ptr[0], ptr[1], strlen2 * sizeof(int));
        for (j = 0; j < strlen2; j++) {
            if (str1[i] == str2[j]) {
                if (i == 0 || j == 0) {
                    ptr[1][j] = 1;
                } else {
                    ptr[1][j] = ptr[0][j - 1] + 1;
                }
                if (ptr[1][j] > longest) {
                    longest = ptr[1][j];
                    k = 0;
                    ret[k++] = longest;
                }
                if (ptr[1][j] == longest) {
                    ret[k++] = i;
                    ret[k] = -1;
                }
            } else {
                ptr[1][j] = 0;
            }
        }
    }
    for (i = 0; i < 2; i++)
        free(ptr[i]);
    free(ptr);
    ret[0] = longest;
    return ret;
}

int main(int argc, char *argv[]) {
    int i, longest, *ret;

    if (argc != 3) {
        //printf("usage: longest-common-substring string1 string2\n");

        char s[20], t[20];
        printf("Type in a string s.\n");
        fgets(s, 20, stdin);
        printf("Type in a string t.\n");
        fgets(t, 20, stdin);

        ret = lcommon(s, t);
        if ((longest = ret[0]) == 0) {
            printf("There is no common substring\n");
            exit(2);
        }

        i = 0;
        while (ret[++i] != -1) {
            printf("%.*s\n", longest, &s[ret[i] - longest + 1]);
        }

        //printf("The result of comparison=%d\n", strcmp(s, t));


        exit(0);
    } else { }
    ret = lcommon(argv[1], argv[2]);
    if ((longest = ret[0]) == 0) {
        printf("There is no common substring\n");
        exit(2);
    }

    i = 0;
    while (ret[++i] != -1) {
        printf("%.*s\n", longest, &argv[1][ret[i] - longest + 1]);
    }
    exit(0);
}

测试

Type in a string s.
string1
Type in a string t.
string2
string

有很多方法可以解决这个问题。下面,一个指向其中一个字符串中每个字符的指针用于使用 strchr 搜索另一个字符串中的匹配字符。找到匹配字符后,比较循环会运行推进每个指针,以设计存在的公共子字符串的长度(如果有的话)。

例程,匹配字符,检查子字符串长度,重复,只要 strchr return 是一个有效的 pointer.Each 时间,就会继续找到更长的子字符串,max 长度更新为 return 并且存在的子字符串被复制到 rstrncpynul-terminated 这样最长的文本字符串可用于调用函数,main 这里。

这是一种相当蛮力的方法,可能会有一些额外的调整来提高效率。函数本身是:

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp; /* search ptr initialization */
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

长度 max 被 return 编辑,然后在函数执行期间更新为 r 导致 r 保存与最长子字符串匹配关联的字符串。

其他改进是删除 gets,由于存在安全风险,它在 C11 中被删除而没有弃用。它不应再被任何理智的编码人员使用(应该涵盖我们大约 40% 的人)。将剩余的位放在一起,一小部分测试代码可以是:

#include <stdio.h>
#include <string.h>

#define MAXC 128

size_t maxspn (const char *a, const char *b, char *r);
void rmlf (char *s);

int main (void) {

    char res[MAXC] = "", s[MAXC] = "", t[MAXC] = "";

    printf ("Type in a string 's': ");
    if (!fgets (s, MAXC, stdin)) {  /* validate 's' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (s);   /* remove trailing newline */
    printf ("Type in a string 't': ");
    if (!fgets (t, MAXC, stdin)) {  /* validate 't' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (t);   /* remove trailing newline */

    /* obtain longest commons substring between 's' and 't' */
    printf ("\nThe longest common string is : %zu ('%s')\n",
            maxspn (s, t, res), res);

    return 0;
}

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp;
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

/** remove trailing newline from 's'. */
void rmlf (char *s)
{
    if (!s || !*s) return;
    for (; *s && *s != '\n'; s++) {}
    *s = 0;
}

例子Use/Output

$ ./bin/strspn
Type in a string 's': a string with colors123456789 all blue
Type in a string 't': a string without colors1234567890 all red

The longest common string is : 16 (' colors123456789')

或者,另一个可能更容易形象化的:

$ ./bin/strspn
Type in a string 's': green eel
Type in a string 't': cold blue steel

The longest common string is : 3 ('eel')

查看代码并与其他答案进行比较。如果您有任何其他问题,请告诉我。还应添加一些其他验证以确保文本不会超出缓冲区等的末端。希望这会提供一些帮助或替代方法。


附加子字符串

为了确保您和我看到的是同一件事,我在下面提供了更多使用示例。没有错误,代码按预期执行。如果您在修改代码时遇到问题,请告诉我您正在尝试做什么,我可以提供帮助。我上面代码中的每个指针增量都经过验证。如果您更改有关指针增量或 nul-termination 的任何内容,除非您也考虑了验证中的更改,否则代码将无法工作。

$ ./bin/strspn
Type in a string 's': 1
Type in a string 't':

The longest common string is : 0 ('')

$ ./bin/strspn
Type in a string 's': A man a plan a canal panama
Type in a string 't': a man a plan a river panama

The longest common string is : 14 (' man a plan a ')

$ ./bin/strspn
Type in a string 's': this is my favorite string
Type in a string 't': this is my favoritist string

The longest common string is : 18 ('this is my favorit')

$ ./bin/strspn
Type in a string 's': not the same until here
Type in a string 't': cant be equal till here

The longest common string is : 6 ('l here')

$ ./bin/strspn
Type in a string 's': some str with ten in the middle
Type in a string 't': a string often ignorded

The longest common string is : 5 ('ten i')

最长的常用词

OK,在我终于明白你要完成的事情后,你可以select两个字符串's'和[之间的最长公共词 =29=] 通过 标记化 每个字符串 strtok,将指向每个字符串中每个单词的指针保存在单独的指针数组中,然后简单地将指针数组迭代到 select 最长的常用词(如果有多个相同长度的常用词,则为第一个)。您只需要像下面这样简单的东西。

注意 strtok 修改字符串 's''t',因此如果需要保留原件,请复制一份。

/** return length of longest common word in 'a' and 'b'.
 *  by tokenizing each word in 'a' & 'b' and iterating over
 *  each, returning the length of the logest match, and updating
 *  'r' to contain the longest common word.
 */
size_t maxspnwhole (char *a, char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *arra[MAXC] = {NULL}, *arrb[MAXC] = {NULL};
    char *ap = a, *bp = b;          /* pointers to a & b */
    char *delim = " .,-;\t\n";      /* word delimiters   */
    size_t i, j, len, max, na, nb;  /* len, max, n-words */
    len = max = na = nb = 0;

    /* tokenize both strings into pointer arrays */
    for (ap = strtok (a, delim); ap; ap = strtok (NULL, delim))
        arra[na++] = ap;

    for (bp = strtok (b, delim); bp; bp = strtok (NULL, delim))
        arrb[nb++] = bp;

    for (i = 0; i < na; i++)           /* select longest common word */
        for (j = 0; j < nb; j++)
            if (*arra[i] == *arrb[j])             /* 1st chars match */
                if (!strcmp (arra[i], arrb[j])) { /* check word      */
                    len = strlen (arra[i]);
                    if (len > max) {              /* if longest */
                        max = len;                /* update max */
                        strcpy (r, arra[i]);      /* copy to r  */
                    }
                }

    return max;
}

将它与其他代码集成,你可以比较这样的结果:

#include <stdio.h>
#include <string.h>

#define MAXC 128

size_t maxspn (const char *a, const char *b, char *r);
size_t maxspnwhole (char *a, char *b, char *r);
void rmlf (char *s);

int main (void) {

    char res[MAXC] = "", s[MAXC] = "", t[MAXC] = "";

    printf ("Type in a string 's': ");
    if (!fgets (s, MAXC, stdin)) {  /* validate 's' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (s);   /* remove trailing newline */
    printf ("Type in a string 't': ");
    if (!fgets (t, MAXC, stdin)) {  /* validate 't' */
        fprintf (stderr, "error: invalid input for 's'.\n");
        return 1;
    }
    rmlf (t);   /* remove trailing newline */

    /* obtain longest commons substring between 's' and 't' */
    printf ("\nThe longest common string is : %zu ('%s')\n",
            maxspn (s, t, res), res);

    /* obtain longest commons word between 's' and 't' */
    printf ("\nThe longest common word is : %zu ('%s')\n",
            maxspnwhole (s, t, res), res);

    return 0;
}

/** return length of longest common word in 'a' and 'b'.
 *  by tokenizing each word in 'a' & 'b' and iterating over
 *  each, returning the length of the logest match, and updating
 *  'r' to contain the longest common word.
 */
size_t maxspnwhole (char *a, char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *arra[MAXC] = {NULL}, *arrb[MAXC] = {NULL};
    char *ap = a, *bp = b;          /* pointers to a & b */
    char *delim = " .,-;\t\n";      /* word delimiters   */
    size_t i, j, len, max, na, nb;  /* len, max, n-words */
    len = max = na = nb = 0;

    /* tokenize both strings into pointer arrays */
    for (ap = strtok (a, delim); ap; ap = strtok (NULL, delim))
        arra[na++] = ap;

    for (bp = strtok (b, delim); bp; bp = strtok (NULL, delim))
        arrb[nb++] = bp;

    for (i = 0; i < na; i++)
        for (j = 0; j < nb; j++)
            if (*arra[i] == *arrb[j])
                if (!strcmp (arra[i], arrb[j])) {
                    len = strlen (arra[i]);
                    if (len > max) {
                        max = len;
                        strcpy (r, arra[i]);
                    }
                }

    return max;
}

/** return length of longest common substring in 'a' and 'b'.
 *  by searching through each character in 'a' for each match
 *  in 'b' and comparing substrings present at each match. the
 *  size of the longest substring is returned, the test of the
 *  longest common substring is copied to 'r' and made available
 *  in the calling function. (the lengths should also be passed
 *  for validation, but that is left as an exercise)
 */
size_t maxspn (const char *a, const char *b, char *r)
{
    if (!a||!b||!*a||!*b) return 0; /* valdate parameters */

    char *ap = (char *)a;           /* pointer to a       */
    size_t max = 0;                 /* max substring char */

    for (; *ap; ap++) {             /* for each char in a */
        char *bp = (char *)b;       /* find match in b with strchr */
        for (; *bp && (bp = strchr (bp, *ap)); bp++) {
            char *spa = ap, *spb = bp;
            size_t len = 0;         /* find substring len */
            for (; *spa && *spb && *spa == *spb; spa++, spb++) len++;
            if (len > max) {        /* if max, copy to r  */
                strncpy (r, ap, (max = len));
                r[max] = 0;         /* nul-terminate r    */
            }
        }
    }

    return max;
}

/** remove trailing newline from 's'. */
void rmlf (char *s)
{
    if (!s || !*s) return;
    for (; *s && *s != '\n'; s++) {}
    *s = 0;
}

例子Use/Output

$ ./bin/strlongestcmn
Type in a string 's': I have a huge boat.
Type in a string 't': I have a small boat.

The longest common string is : 9 ('I have a ')

The longest common word is : 4 ('have')

仔细阅读,如果您还有其他问题,请告诉我。