如何识别 python 个解析范围
how to recognize python scopes for parsing
我必须使用 javacc 制作一个 python 编译器,但我遇到了 python 作用域的问题 我如何检查不同作用域中有多少行代码?
options
{
static = true;
}
PARSER_BEGIN(MyNewGrammar)
package test;
public class MyNewGrammar
{
public static void main(String args []) throws ParseException
{
MyNewGrammar parser = new MyNewGrammar(System.in);
while (true)
{
System.out.println("Reading from standard input...");
System.out.print("Enter an expression like \"1+(2+3)*4;\" :");
try
{
switch (MyNewGrammar.one_line())
{
case 0 :
System.out.println("OK.");
break;
case 1 :
System.out.println("Goodbye.");
break;
default :
break;
}
}
catch (Exception e)
{
System.out.println("NOK.");
System.out.println(e.getMessage());
MyNewGrammar.ReInit(System.in);
}
catch (Error e)
{
System.out.println("Oops.");
System.out.println(e.getMessage());
break;
}
}
}
}
PARSER_END(MyNewGrammar)
SKIP :
{
" "
| "\r"
| "\t"
| "\n"
}
TOKEN : /* OPERATORS */
{
< PLUS : "+" >
| < MINUS : "-" >
| < MULTIPLY : "*" >
| < DIVIDE : "/" >
| <IF: "if">
| <AND: "and">
| <BREAK: "break">
| <CLASS: "class">
| <CONTINUE: "continue">
| <OR: "or">
| <PASS: "pass">
| <PRINT: "print">
| <ELIF: "elif">
| <ELSE: "else">
| <EXEPT: "exept">
| <EXEC: "exec">
| <FINALLY: "finally">
| <FOR: "for">
| <IN: "in">
| <DEF: "def">
| <DEL: "del">
| <IS: "is">
| <NOT: "not">
| <RAIS: "rais">
| <RETURN: "return">
| <TRY: "try">
| <WHILE: "while">
| <WITH: "with">
| <YIELD: "yield">
| <FROM: "from">
| <GLOBAL: "global">
| <IMPORT: "import">
| <RANGE: "range">
| <XRANGE: "xrange">
}
TOKEN :
{
< CONSTANT : (< DIGIT >)+ >
| <id: (<LETTER>)(<LETTER>|<DIGIT>)* >
| <LETTER: (<LOWER>|<UPPER>) >
| <literal:"\""((< LETTER >)|(< DIGIT >))+ "\"" >
| < #DIGIT : [ "0"-"9" ] >
| < #LOWER: ["a" - "z"]>
| < #UPPER: ["A" - "Z"]>
}
int one_line() :
{}
{
sum() |forp()";"
{
return 0;
}
| ";"
{
return 1;
}
}
void sum() :
{}
{
term()
(
(
< PLUS >
| < MINUS >
)
term()
)*
}
void term() :
{}
{
unary()
(
(
< MULTIPLY >
| < DIVIDE >
)
unary()
)*
}
void unary() :
{}
{
< MINUS > element()
| element()
}
void element() :
{}
{
< CONSTANT >
| "(" sum() ")"
}
void forp():
{}
{
"for"< id >"in"range()
}
void range():
{}
{
"range""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"|"xrange""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"
}
我如何解析其范围内的所有语句
使解析 python 有趣的是缩进。该标准给出了插入 INDENT 和 DEDENT 令牌的规则。我们可以在 JavaCC 中做到这一点,但下面采用另一种方法,即使用语义先行。
void for_stmt() : {
int col = getToken(1).beginColumn ;
} {
"for" exprlist() "in" testlist() ":" suite(col)
[ {checkColumn( col ) ;} 'else' ':' suite(col) ]
}
void suite(int col) : {
int newCol ;
} {
<NEWLINE>
{ newCol = checkIndent(col) ; }
stmtsAndDedent(newCol)
|
simple_stmt(col)
}
// One or more stmt followed by a dedent
void stmtsAndDedent(int col) : {
stmt(col)
(
LOOKAHEAD( dedenting(col) ) {}
|
stmtsAndDedent(col)
)
}
}
void stmt(int col) : {
} {
simple_stmt(col)
|
{checkColumn(col) ;}
compound_stmt()
}
void simple_stmt() : {
} {
{checkColumn(col) ;}
small_stmt() (";" small_stmt())* [";"] <NEWLINE>
}
现在还得写一些java方法
int checkIndent(int col) {
Token tk = getToken(1) ;
int newCol = tk.beginColumn ;
if( newCol <= col ) {
throw new ParseException( "Expected token at line " +tk.beginLine+
" column " +tk.beginColumn+
" was expected to be indented by more than "
+col+ " characters.") ; }
return newCol ; }
boolean dedenting(int col) {
Token tk = getToken(1) ;
return tk.beginColumn < col ; }
void checkColumn(int col) {
Token tk = getToken(1) ;
int newCol = tk.beginColumn ;
if( newCol != col ) {
throw new ParseException( "Expected token at line " +tk.beginLine+
" column " +tk.beginColumn+
" was expected to be indented by exactly "
+col+ " characters.") ; } }
这一切都未经测试,但我认为一旦修复了小错误,它就会起作用。
一旦你可以解析,计算行数就很简单了。
我必须使用 javacc 制作一个 python 编译器,但我遇到了 python 作用域的问题 我如何检查不同作用域中有多少行代码?
options
{
static = true;
}
PARSER_BEGIN(MyNewGrammar)
package test;
public class MyNewGrammar
{
public static void main(String args []) throws ParseException
{
MyNewGrammar parser = new MyNewGrammar(System.in);
while (true)
{
System.out.println("Reading from standard input...");
System.out.print("Enter an expression like \"1+(2+3)*4;\" :");
try
{
switch (MyNewGrammar.one_line())
{
case 0 :
System.out.println("OK.");
break;
case 1 :
System.out.println("Goodbye.");
break;
default :
break;
}
}
catch (Exception e)
{
System.out.println("NOK.");
System.out.println(e.getMessage());
MyNewGrammar.ReInit(System.in);
}
catch (Error e)
{
System.out.println("Oops.");
System.out.println(e.getMessage());
break;
}
}
}
}
PARSER_END(MyNewGrammar)
SKIP :
{
" "
| "\r"
| "\t"
| "\n"
}
TOKEN : /* OPERATORS */
{
< PLUS : "+" >
| < MINUS : "-" >
| < MULTIPLY : "*" >
| < DIVIDE : "/" >
| <IF: "if">
| <AND: "and">
| <BREAK: "break">
| <CLASS: "class">
| <CONTINUE: "continue">
| <OR: "or">
| <PASS: "pass">
| <PRINT: "print">
| <ELIF: "elif">
| <ELSE: "else">
| <EXEPT: "exept">
| <EXEC: "exec">
| <FINALLY: "finally">
| <FOR: "for">
| <IN: "in">
| <DEF: "def">
| <DEL: "del">
| <IS: "is">
| <NOT: "not">
| <RAIS: "rais">
| <RETURN: "return">
| <TRY: "try">
| <WHILE: "while">
| <WITH: "with">
| <YIELD: "yield">
| <FROM: "from">
| <GLOBAL: "global">
| <IMPORT: "import">
| <RANGE: "range">
| <XRANGE: "xrange">
}
TOKEN :
{
< CONSTANT : (< DIGIT >)+ >
| <id: (<LETTER>)(<LETTER>|<DIGIT>)* >
| <LETTER: (<LOWER>|<UPPER>) >
| <literal:"\""((< LETTER >)|(< DIGIT >))+ "\"" >
| < #DIGIT : [ "0"-"9" ] >
| < #LOWER: ["a" - "z"]>
| < #UPPER: ["A" - "Z"]>
}
int one_line() :
{}
{
sum() |forp()";"
{
return 0;
}
| ";"
{
return 1;
}
}
void sum() :
{}
{
term()
(
(
< PLUS >
| < MINUS >
)
term()
)*
}
void term() :
{}
{
unary()
(
(
< MULTIPLY >
| < DIVIDE >
)
unary()
)*
}
void unary() :
{}
{
< MINUS > element()
| element()
}
void element() :
{}
{
< CONSTANT >
| "(" sum() ")"
}
void forp():
{}
{
"for"< id >"in"range()
}
void range():
{}
{
"range""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"|"xrange""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"
}
我如何解析其范围内的所有语句
使解析 python 有趣的是缩进。该标准给出了插入 INDENT 和 DEDENT 令牌的规则。我们可以在 JavaCC 中做到这一点,但下面采用另一种方法,即使用语义先行。
void for_stmt() : {
int col = getToken(1).beginColumn ;
} {
"for" exprlist() "in" testlist() ":" suite(col)
[ {checkColumn( col ) ;} 'else' ':' suite(col) ]
}
void suite(int col) : {
int newCol ;
} {
<NEWLINE>
{ newCol = checkIndent(col) ; }
stmtsAndDedent(newCol)
|
simple_stmt(col)
}
// One or more stmt followed by a dedent
void stmtsAndDedent(int col) : {
stmt(col)
(
LOOKAHEAD( dedenting(col) ) {}
|
stmtsAndDedent(col)
)
}
}
void stmt(int col) : {
} {
simple_stmt(col)
|
{checkColumn(col) ;}
compound_stmt()
}
void simple_stmt() : {
} {
{checkColumn(col) ;}
small_stmt() (";" small_stmt())* [";"] <NEWLINE>
}
现在还得写一些java方法
int checkIndent(int col) {
Token tk = getToken(1) ;
int newCol = tk.beginColumn ;
if( newCol <= col ) {
throw new ParseException( "Expected token at line " +tk.beginLine+
" column " +tk.beginColumn+
" was expected to be indented by more than "
+col+ " characters.") ; }
return newCol ; }
boolean dedenting(int col) {
Token tk = getToken(1) ;
return tk.beginColumn < col ; }
void checkColumn(int col) {
Token tk = getToken(1) ;
int newCol = tk.beginColumn ;
if( newCol != col ) {
throw new ParseException( "Expected token at line " +tk.beginLine+
" column " +tk.beginColumn+
" was expected to be indented by exactly "
+col+ " characters.") ; } }
这一切都未经测试,但我认为一旦修复了小错误,它就会起作用。
一旦你可以解析,计算行数就很简单了。