在 C 中使用 POSIX 正则表达式
Using POSIX Regex in C
我实际上正在尝试创建自己的服务器文本用户界面(以便管理 FTP、SSH 连接、任务管理器等)。我的问题出在任务管理器上
为了保存我的任务,我决定将它们全部写在一个文件中。我希望每一行(对应于一个任务)看起来像:
Year Month Day Week-Day Hour Min Second ; Command
为了更简单,我使用了与 cron 相同的过程,其中 *
相当于相应类别的任何时刻
* * * * 00 00 00 ; reboot //allow me to run reboot everyday at midnight
为此,我决定使用 POSIX 正则表达式 。
我想要它的格式:
YEAR [0-9] {1-9}
MONTH [0-9] {2}
DAY [0-9] {2}
WEEK-DAY [A-Z] [a-z] {3}
HOUR [0-9] {2}
MINUTE [0-9] {2}
SECOND [0-9] {2}
COMMAND can be any printable character
这让我想到了一个问题。我已经能够创建这个正则表达式:
char *regexString = "^(\*|([[:digit:]]){1,9})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:alpha:]]){3})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]];[[:blank:]]([[:print:]])*";
它似乎在工作,但是当我尝试使用 this found here 来了解如何获得每个组件时,这导致我:
Output :
Match 0, Group 0: [ 0-25]: * * * * 00 00 00 ; reboot
Match 0, Group 1: [ 0- 1]: *
你能帮我理解一下吗?谢谢 (:
PS : 这是一些例子 :
* * * * * * * ; command //Match
0 00 00 Mon 00 00 00 ; command //Match
123456789 00 00 Mon 00 00 00 ; command //Match
01234556789 00 00 Mon 00 00 00 ; command //Don't Match
0 00 00 0 00 00 00 ; command //Don't Match
0 0 0 Mon 0 0 0 ; command //Don't Match
EDIT :这是我使用的代码
#include <stdio.h>
#include <string.h>
#include <regex.h>
int main ()
{
char * source = "* * * * 00 00 00 ; reboot";
char *regexString = "^(\*|([[:digit:]]){1,9})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:alpha:]]){3})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]];[[:blank:]]([[:print:]])*";
size_t maxMatches = 3; //I've tried for sevrals values, 2, 3 ... same Output
size_t maxGroups = 3; //I've tried for sevrals values, 2, 3 ... same Output
regex_t regexCompiled;
regmatch_t groupArray[maxGroups];
unsigned int m;
char * cursor;
if (regcomp(®exCompiled, regexString, REG_EXTENDED))
{
printf("Could not compile regular expression.\n");
return 1;
};
m = 0;
cursor = source;
for (m = 0; m < maxMatches; m ++)
{
if (regexec(®exCompiled, cursor, maxGroups, groupArray, 0))
break; // No more matches
unsigned int g = 0;
unsigned int offset = 0;
for (g = 0; g < maxGroups; g++)
{
if (groupArray[g].rm_so == (size_t)-1)
break; // No more groups
if (g == 0)
offset = groupArray[g].rm_eo;
char cursorCopy[strlen(cursor) + 1];
strcpy(cursorCopy, cursor);
cursorCopy[groupArray[g].rm_eo] = 0;
printf("Match %u, Group %u: [%2u-%2u]: %s\n",
m, g, groupArray[g].rm_so, groupArray[g].rm_eo,
cursorCopy + groupArray[g].rm_so);
}
cursor += offset;
}
regfree(®exCompiled);
return 0;
}
示例输出:
//Case of a match :
Output :
Match 0, Group 0: [ 0-25]: * * * * 00 00 00 ; reboot
Match 0, Group 1: [ 0- 1]: * // YEAR
Match 0, Group 2: [ 2- 3]: * // MONTH
Match 0, Group 3: [ 4- 5]: * // DAY
Match 0, Group 4: [ 6- 7]: * // WEEK-DAY
Match 0, Group 5: [ 8- 10]: 00 //HOUR
Match 0, Group 6: [ 11- 13]: 00 //MINUTE
Match 0, Group 7: [ 14- 16]: 00 // SECOND
Match 0, Group 8: [ 20- 25]: reboot //COMMAND
$> echo $?
0
//Case of a match :
Output :
Match 0, Group 0: [ 0-38]: 123456789 00 00 Mon 00 00 00 ; Command
Match 0, Group 1: [ 0- 9]: 123456789 //YEAR
Match 0, Group 2: [ 10- 12]: 00 //MONTH
Match 0, Group 3: [ 13- 15]: 00 //DAY
Match 0, Group 4: [ 16- 19]: Mon //WEEK-DAY
Match 0, Group 5: [ 20- 22]: 00 //HOUR
Match 0, Group 6: [ 23- 25]: 00 //MINUTE
Match 0, Group 7: [ 26- 28]: 00 //SECOND
Match 0, Group 8: [ 31- 38]: Command //COMMAND
$> echo $?
0
//Case of Not Match
$> echo $?
0
设置maxGroups
变量时要小心。它的值是模式中所有捕获组的总和+1(整个匹配值,第一项)。
你应该摆脱所有多余的捕获组并使用
char *regexString = "^(\*|[[:digit:]]{1,9})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:alpha:]]{3})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]];[[:blank:]]([[:print:]]*)";
正则表达式(参见 its demo)现在有 8 个捕获组,因此将 maxGroups
值设置为 9
:
size_t maxGroups = 9; // 8 groups + 1 for whole match
您的代码应该可以工作,请参阅 online demo。
将 maxMatches
增加到接近或略高于预期匹配数的值可能会很有用。
我实际上正在尝试创建自己的服务器文本用户界面(以便管理 FTP、SSH 连接、任务管理器等)。我的问题出在任务管理器上
为了保存我的任务,我决定将它们全部写在一个文件中。我希望每一行(对应于一个任务)看起来像:
Year Month Day Week-Day Hour Min Second ; Command
为了更简单,我使用了与 cron 相同的过程,其中 *
相当于相应类别的任何时刻
* * * * 00 00 00 ; reboot //allow me to run reboot everyday at midnight
为此,我决定使用 POSIX 正则表达式 。 我想要它的格式:
YEAR [0-9] {1-9}
MONTH [0-9] {2}
DAY [0-9] {2}
WEEK-DAY [A-Z] [a-z] {3}
HOUR [0-9] {2}
MINUTE [0-9] {2}
SECOND [0-9] {2}
COMMAND can be any printable character
这让我想到了一个问题。我已经能够创建这个正则表达式:
char *regexString = "^(\*|([[:digit:]]){1,9})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:alpha:]]){3})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]];[[:blank:]]([[:print:]])*";
它似乎在工作,但是当我尝试使用 this found here 来了解如何获得每个组件时,这导致我:
Output :
Match 0, Group 0: [ 0-25]: * * * * 00 00 00 ; reboot
Match 0, Group 1: [ 0- 1]: *
你能帮我理解一下吗?谢谢 (:
PS : 这是一些例子 :
* * * * * * * ; command //Match
0 00 00 Mon 00 00 00 ; command //Match
123456789 00 00 Mon 00 00 00 ; command //Match
01234556789 00 00 Mon 00 00 00 ; command //Don't Match
0 00 00 0 00 00 00 ; command //Don't Match
0 0 0 Mon 0 0 0 ; command //Don't Match
EDIT :这是我使用的代码
#include <stdio.h>
#include <string.h>
#include <regex.h>
int main ()
{
char * source = "* * * * 00 00 00 ; reboot";
char *regexString = "^(\*|([[:digit:]]){1,9})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:alpha:]]){3})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]](\*|([[:digit:]]){2})[[:blank:]];[[:blank:]]([[:print:]])*";
size_t maxMatches = 3; //I've tried for sevrals values, 2, 3 ... same Output
size_t maxGroups = 3; //I've tried for sevrals values, 2, 3 ... same Output
regex_t regexCompiled;
regmatch_t groupArray[maxGroups];
unsigned int m;
char * cursor;
if (regcomp(®exCompiled, regexString, REG_EXTENDED))
{
printf("Could not compile regular expression.\n");
return 1;
};
m = 0;
cursor = source;
for (m = 0; m < maxMatches; m ++)
{
if (regexec(®exCompiled, cursor, maxGroups, groupArray, 0))
break; // No more matches
unsigned int g = 0;
unsigned int offset = 0;
for (g = 0; g < maxGroups; g++)
{
if (groupArray[g].rm_so == (size_t)-1)
break; // No more groups
if (g == 0)
offset = groupArray[g].rm_eo;
char cursorCopy[strlen(cursor) + 1];
strcpy(cursorCopy, cursor);
cursorCopy[groupArray[g].rm_eo] = 0;
printf("Match %u, Group %u: [%2u-%2u]: %s\n",
m, g, groupArray[g].rm_so, groupArray[g].rm_eo,
cursorCopy + groupArray[g].rm_so);
}
cursor += offset;
}
regfree(®exCompiled);
return 0;
}
示例输出:
//Case of a match :
Output :
Match 0, Group 0: [ 0-25]: * * * * 00 00 00 ; reboot
Match 0, Group 1: [ 0- 1]: * // YEAR
Match 0, Group 2: [ 2- 3]: * // MONTH
Match 0, Group 3: [ 4- 5]: * // DAY
Match 0, Group 4: [ 6- 7]: * // WEEK-DAY
Match 0, Group 5: [ 8- 10]: 00 //HOUR
Match 0, Group 6: [ 11- 13]: 00 //MINUTE
Match 0, Group 7: [ 14- 16]: 00 // SECOND
Match 0, Group 8: [ 20- 25]: reboot //COMMAND
$> echo $?
0
//Case of a match :
Output :
Match 0, Group 0: [ 0-38]: 123456789 00 00 Mon 00 00 00 ; Command
Match 0, Group 1: [ 0- 9]: 123456789 //YEAR
Match 0, Group 2: [ 10- 12]: 00 //MONTH
Match 0, Group 3: [ 13- 15]: 00 //DAY
Match 0, Group 4: [ 16- 19]: Mon //WEEK-DAY
Match 0, Group 5: [ 20- 22]: 00 //HOUR
Match 0, Group 6: [ 23- 25]: 00 //MINUTE
Match 0, Group 7: [ 26- 28]: 00 //SECOND
Match 0, Group 8: [ 31- 38]: Command //COMMAND
$> echo $?
0
//Case of Not Match
$> echo $?
0
设置maxGroups
变量时要小心。它的值是模式中所有捕获组的总和+1(整个匹配值,第一项)。
你应该摆脱所有多余的捕获组并使用
char *regexString = "^(\*|[[:digit:]]{1,9})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:alpha:]]{3})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]](\*|[[:digit:]]{2})[[:blank:]];[[:blank:]]([[:print:]]*)";
正则表达式(参见 its demo)现在有 8 个捕获组,因此将 maxGroups
值设置为 9
:
size_t maxGroups = 9; // 8 groups + 1 for whole match
您的代码应该可以工作,请参阅 online demo。
将 maxMatches
增加到接近或略高于预期匹配数的值可能会很有用。