为什么这个扫描器不吃空格?
Why does this scanner not eat whitespaces?
这些是我的词法分析器定义,有很多词法分析器定义,但这个是我的。我有几个正则表达式试图从这个示例中捕获并忽略 whitespace。我得到的错误是在第 1 行:14 中找到一个 $undefined Symbold
- asci 值 32。
也称为space。
OIL_VERSION = "314159OS";
CPU AT91SAM7S256
{
//Test Coment
OS HTOSEK
{
STATUS = EXTENDED;
STARTUPHOOK = TRUE;
ERRORHOOK = FALSE;
SHUTDOWNHOOK = FALSE;
PRETASKHOOK = FALSE;
POSTTASKHOOK = FALSE;
USEGETSERVICEID = FALSE;
USEPARAMETERACCESS = FALSE;
USERESSCHEDULER = FALSE;
USR_STACK_SIZE=3000;
};
/* Definition of application mode */
APPMODE appmode1{};
/* Definition of resource */
RESOURCE resource1
{
RESOURCEPROPERTY = STANDARD;
};
/* Definition of event */
EVENT event1
{
MASK = AUTO;
};
...
Lex 捕获定义:
%{ /*** C/C++ Declarations ***/
#define MAX_INCLUDE_DEPTH 16
#include <string>
#include <sstream>
#define SSTR( x ) dynamic_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
#include "scanner.h"
/* import the parser's token type into a local typedef */
typedef implementation::Parser::token token;
typedef implementation::Parser::token_type token_type;
/* By default yylex returns int, we use token_type. Unfortunately yyterminate
* by default returns 0, which is not of token_type. */
#define yyterminate() return token::END
/* This disables inclusion of unistd.h, which is not available under Visual C++
* on Win32. The C++ scanner uses STL streams instead. */
#define YY_NO_UNISTD_H
static int once = 0;
static int lineno = 1;
static void nextLine()
{
lineno++;
}
//convert a str to int
int fromInt(char *s)
{
int i;
int m;
m = 1;
i = 0;
if (s[0]=='-'){
m = -1;
i = 1;
}
else if(s[0]=='+')
i = 1;
return((atoi(s+i))*m);
}
int fromHex(char *s)
{
return((int)strtol(s, NULL, 16));
}
int LineCounter=0;
%}
/*** Flex Declarations and Options ***/
/* enable c++ scanner class generation */
%option c++
/* change the name of the scanner class. results in "ExampleFlexLexer" */
%option prefix="Example"
/* the manual says "somewhat more optimized" */
%option batch
/* enable scanner to generate debug output. disable this for release
* versions. */
%option debug
/* no support for include files is planned */
%option yywrap nounput
/* enables the use of start condition stacks */
%option stack
%x C_COMMENT
%x incl
/* The following paragraph suffices to track locations accurately. Each time
* yylex is invoked, the begin position is moved onto the end position. */
%{
#define YY_USER_ACTION yylloc->columns(yyleng);
%}
%% /*** Regular Expressions Part ***/
/* code to place at the beginning of yylex() */
%{
// reset location
yylloc->step();
%}
"/*" { BEGIN(C_COMMENT); }
<C_COMMENT>"*/" { BEGIN(INITIAL); }
<C_COMMENT>. { }
"=" { return(token::EQ);
}
"[" { return(token::LBRACK);
}
"]" { return(token::RBRACK);
}
"OS" { return(token::OSEK);
}
"EVENT" { return(token::EVENT);
}
"TASK" { return(token::TASK);
}
"ALARM" { return(token::ALARM);
}
"COUNTER" { return(token::COUNTER);
}
"OIL_VERSION" { return(token::OIL_VERSION);
}
"APPMODE" { return(token::APPMODE);
}
"CPU" { return (token::CPU);
}
"true"|"TRUE" { yylval->integerVal =1; return(token::VAL_BOOL);
}
"false"|"FALSE" { yylval->integerVal =0; return(token::VAL_BOOL);
}
"BOOLEAN" { return(token::BOOLEAN);
}
"INT" { return(token::INT);
}
"{" { return(token::LBRACE);
}
"}" { return(token::RBRACE);
}
":" { return(token::COLON);
}
"," { return(token::COMMA);
}
";" { return(token::SEMI);
}
([_A-Za-z])([a-zA-Z0-9!^_])* {yylval->stringVal = new std::string(yytext, yyleng);
return(token::STRING);
}
(([+-])?([0-9])*) {yylval->integerVal = fromInt( yytext );
return(token::NUMERAL);
}
(("0x")([0-9ABCDEFabcdef])*) {yylval->integerVal = fromHex( yytext );
return(token::NUMERAL);
}
(([-+]?[1-9][0-9]+\.[0-9]*)|([-+]?[0-9]*\.[0-9]+)|([-+]?[1-9]+))([eE][-+]?[0-9]+)?(f)? { yylval->doubleVal=atof(yytext);
return (token::VAL_FLOAT);
}
[\n\r]+ {
//yylloc->lines(yyleng);
yylloc->step();
LineCounter++;
//return token::EOL;
}
[\r\n]+ {
//yylloc->lines(yyleng);
yylloc->step();LineCounter++;
//return token::EOL;
}
[\t\r]+ { /* gobble up white-spaces */ yylloc->step(); }
[\s]+ { yylloc->step(); }
\"([^\"])*\" {
yytext[yyleng-1]= 0;
yylval->stringVal = new std::string( yytext, yyleng);
return(token::STRING);
}
. {
unsigned int temp;
temp= (unsigned int)(*yytext);
std::stringstream str2;
str2<<temp;
std::cout<<"Unknown character"<<*yytext<<" as Asci-value : "<<str2.str()<<std::endl;
return static_cast<token_type>(*yytext);
}
%% /*** Additional Code ***/
namespace implementation {
Scanner::Scanner(std::istream* in,
std::ostream* out)
: ExampleFlexLexer(in, out)
{
}
Scanner::~Scanner()
{
}
void Scanner::set_debug(bool b)
{
yy_flex_debug = b;
}
}
/* This implementation of ExampleFlexLexer::yylex() is required to fill the
* vtable of the class ExampleFlexLexer. We define the scanner's main yylex
* function via YY_DECL to reside in the Scanner class instead. */
#ifdef yylex
#undef yylex
#endif
int ExampleFlexLexer::yylex()
{
std::cerr << "in ExampleFlexLexer::yylex() !" << std::endl;
return 0;
}
/* When the scanner receives an end-of-file indication from YY_INPUT, it then
* checks the yywrap() function. If yywrap() returns false (zero), then it is
* assumed that the function has gone ahead and set up `yyin' to point to
* another input file, and scanning continues. If it returns true (non-zero),
* then the scanner terminates, returning 0 to its caller. */
int ExampleFlexLexer::yywrap()
{
return 1;
}
我修改了最后一条规则,所以它只是简单地尝试转换任何未知文本并打印出它捕获的 ascisymbols.. 结果是 32 47 47 32 " // "。
将尝试打印出流..
flex 没有实现诸如 \s
之类的 perlism。它识别的唯一反斜杠转义序列是标准的 C 转义序列,例如 \n
。如果要识别 space 字符,请使用 " "
.
顺便说一下,[\n\r]+
和 [\r\n]+
识别完全相同的东西:单个字符的一次或多次重复,可以是换行符或 return。所以第二个这样的规则永远不会匹配。我想 flex 会警告你。
这些是我的词法分析器定义,有很多词法分析器定义,但这个是我的。我有几个正则表达式试图从这个示例中捕获并忽略 whitespace。我得到的错误是在第 1 行:14 中找到一个 $undefined Symbold
- asci 值 32。
也称为space。
OIL_VERSION = "314159OS";
CPU AT91SAM7S256
{
//Test Coment
OS HTOSEK
{
STATUS = EXTENDED;
STARTUPHOOK = TRUE;
ERRORHOOK = FALSE;
SHUTDOWNHOOK = FALSE;
PRETASKHOOK = FALSE;
POSTTASKHOOK = FALSE;
USEGETSERVICEID = FALSE;
USEPARAMETERACCESS = FALSE;
USERESSCHEDULER = FALSE;
USR_STACK_SIZE=3000;
};
/* Definition of application mode */
APPMODE appmode1{};
/* Definition of resource */
RESOURCE resource1
{
RESOURCEPROPERTY = STANDARD;
};
/* Definition of event */
EVENT event1
{
MASK = AUTO;
};
...
Lex 捕获定义:
%{ /*** C/C++ Declarations ***/
#define MAX_INCLUDE_DEPTH 16
#include <string>
#include <sstream>
#define SSTR( x ) dynamic_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
#include "scanner.h"
/* import the parser's token type into a local typedef */
typedef implementation::Parser::token token;
typedef implementation::Parser::token_type token_type;
/* By default yylex returns int, we use token_type. Unfortunately yyterminate
* by default returns 0, which is not of token_type. */
#define yyterminate() return token::END
/* This disables inclusion of unistd.h, which is not available under Visual C++
* on Win32. The C++ scanner uses STL streams instead. */
#define YY_NO_UNISTD_H
static int once = 0;
static int lineno = 1;
static void nextLine()
{
lineno++;
}
//convert a str to int
int fromInt(char *s)
{
int i;
int m;
m = 1;
i = 0;
if (s[0]=='-'){
m = -1;
i = 1;
}
else if(s[0]=='+')
i = 1;
return((atoi(s+i))*m);
}
int fromHex(char *s)
{
return((int)strtol(s, NULL, 16));
}
int LineCounter=0;
%}
/*** Flex Declarations and Options ***/
/* enable c++ scanner class generation */
%option c++
/* change the name of the scanner class. results in "ExampleFlexLexer" */
%option prefix="Example"
/* the manual says "somewhat more optimized" */
%option batch
/* enable scanner to generate debug output. disable this for release
* versions. */
%option debug
/* no support for include files is planned */
%option yywrap nounput
/* enables the use of start condition stacks */
%option stack
%x C_COMMENT
%x incl
/* The following paragraph suffices to track locations accurately. Each time
* yylex is invoked, the begin position is moved onto the end position. */
%{
#define YY_USER_ACTION yylloc->columns(yyleng);
%}
%% /*** Regular Expressions Part ***/
/* code to place at the beginning of yylex() */
%{
// reset location
yylloc->step();
%}
"/*" { BEGIN(C_COMMENT); }
<C_COMMENT>"*/" { BEGIN(INITIAL); }
<C_COMMENT>. { }
"=" { return(token::EQ);
}
"[" { return(token::LBRACK);
}
"]" { return(token::RBRACK);
}
"OS" { return(token::OSEK);
}
"EVENT" { return(token::EVENT);
}
"TASK" { return(token::TASK);
}
"ALARM" { return(token::ALARM);
}
"COUNTER" { return(token::COUNTER);
}
"OIL_VERSION" { return(token::OIL_VERSION);
}
"APPMODE" { return(token::APPMODE);
}
"CPU" { return (token::CPU);
}
"true"|"TRUE" { yylval->integerVal =1; return(token::VAL_BOOL);
}
"false"|"FALSE" { yylval->integerVal =0; return(token::VAL_BOOL);
}
"BOOLEAN" { return(token::BOOLEAN);
}
"INT" { return(token::INT);
}
"{" { return(token::LBRACE);
}
"}" { return(token::RBRACE);
}
":" { return(token::COLON);
}
"," { return(token::COMMA);
}
";" { return(token::SEMI);
}
([_A-Za-z])([a-zA-Z0-9!^_])* {yylval->stringVal = new std::string(yytext, yyleng);
return(token::STRING);
}
(([+-])?([0-9])*) {yylval->integerVal = fromInt( yytext );
return(token::NUMERAL);
}
(("0x")([0-9ABCDEFabcdef])*) {yylval->integerVal = fromHex( yytext );
return(token::NUMERAL);
}
(([-+]?[1-9][0-9]+\.[0-9]*)|([-+]?[0-9]*\.[0-9]+)|([-+]?[1-9]+))([eE][-+]?[0-9]+)?(f)? { yylval->doubleVal=atof(yytext);
return (token::VAL_FLOAT);
}
[\n\r]+ {
//yylloc->lines(yyleng);
yylloc->step();
LineCounter++;
//return token::EOL;
}
[\r\n]+ {
//yylloc->lines(yyleng);
yylloc->step();LineCounter++;
//return token::EOL;
}
[\t\r]+ { /* gobble up white-spaces */ yylloc->step(); }
[\s]+ { yylloc->step(); }
\"([^\"])*\" {
yytext[yyleng-1]= 0;
yylval->stringVal = new std::string( yytext, yyleng);
return(token::STRING);
}
. {
unsigned int temp;
temp= (unsigned int)(*yytext);
std::stringstream str2;
str2<<temp;
std::cout<<"Unknown character"<<*yytext<<" as Asci-value : "<<str2.str()<<std::endl;
return static_cast<token_type>(*yytext);
}
%% /*** Additional Code ***/
namespace implementation {
Scanner::Scanner(std::istream* in,
std::ostream* out)
: ExampleFlexLexer(in, out)
{
}
Scanner::~Scanner()
{
}
void Scanner::set_debug(bool b)
{
yy_flex_debug = b;
}
}
/* This implementation of ExampleFlexLexer::yylex() is required to fill the
* vtable of the class ExampleFlexLexer. We define the scanner's main yylex
* function via YY_DECL to reside in the Scanner class instead. */
#ifdef yylex
#undef yylex
#endif
int ExampleFlexLexer::yylex()
{
std::cerr << "in ExampleFlexLexer::yylex() !" << std::endl;
return 0;
}
/* When the scanner receives an end-of-file indication from YY_INPUT, it then
* checks the yywrap() function. If yywrap() returns false (zero), then it is
* assumed that the function has gone ahead and set up `yyin' to point to
* another input file, and scanning continues. If it returns true (non-zero),
* then the scanner terminates, returning 0 to its caller. */
int ExampleFlexLexer::yywrap()
{
return 1;
}
我修改了最后一条规则,所以它只是简单地尝试转换任何未知文本并打印出它捕获的 ascisymbols.. 结果是 32 47 47 32 " // "。 将尝试打印出流..
flex 没有实现诸如 \s
之类的 perlism。它识别的唯一反斜杠转义序列是标准的 C 转义序列,例如 \n
。如果要识别 space 字符,请使用 " "
.
顺便说一下,[\n\r]+
和 [\r\n]+
识别完全相同的东西:单个字符的一次或多次重复,可以是换行符或 return。所以第二个这样的规则永远不会匹配。我想 flex 会警告你。