如何识别 python 个解析范围

how to recognize python scopes for parsing

我必须使用 javacc 制作一个 python 编译器,但我遇到了 python 作用域的问题 我如何检查不同作用域中有多少行代码?

options
{
  static = true;
}

PARSER_BEGIN(MyNewGrammar)
package test;

public class MyNewGrammar
{
  public static void main(String args []) throws ParseException
  {
    MyNewGrammar parser = new MyNewGrammar(System.in);
    while (true)
    {
      System.out.println("Reading from standard input...");
      System.out.print("Enter an expression like \"1+(2+3)*4;\" :");
      try
      {
        switch (MyNewGrammar.one_line())
        {
          case 0 : 
          System.out.println("OK.");
          break;
          case 1 : 
          System.out.println("Goodbye.");
          break;
          default : 
          break;
        }
      }
      catch (Exception e)
      {
        System.out.println("NOK.");
        System.out.println(e.getMessage());
        MyNewGrammar.ReInit(System.in);
      }
      catch (Error e)
      {
        System.out.println("Oops.");
        System.out.println(e.getMessage());
        break;
      }
    }
  }
}

PARSER_END(MyNewGrammar)

SKIP :
{
  " "
| "\r"
| "\t"
| "\n"
}

TOKEN : /* OPERATORS */
{
    < PLUS : "+" >
|   < MINUS : "-" >
|   < MULTIPLY : "*" >
|   < DIVIDE : "/" >
|   <IF: "if">
|   <AND: "and">
|   <BREAK: "break">
|   <CLASS: "class">
|   <CONTINUE: "continue">
|   <OR: "or">
|   <PASS: "pass">
|   <PRINT: "print">
|   <ELIF: "elif">
|   <ELSE: "else">
|   <EXEPT: "exept">
|   <EXEC: "exec">
|   <FINALLY: "finally">
|   <FOR: "for">
|   <IN: "in">
|   <DEF: "def">
|   <DEL: "del">
|   <IS: "is">
|   <NOT: "not">
|   <RAIS: "rais">
|   <RETURN: "return">
|   <TRY: "try">
|   <WHILE: "while">
|   <WITH: "with">
|   <YIELD: "yield">
|   <FROM: "from">
|   <GLOBAL: "global">
|   <IMPORT: "import">
|   <RANGE: "range">
|   <XRANGE: "xrange">
}

TOKEN :
{
  < CONSTANT : (< DIGIT >)+ >
| <id: (<LETTER>)(<LETTER>|<DIGIT>)* >
| <LETTER: (<LOWER>|<UPPER>) >
| <literal:"\""((< LETTER >)|(< DIGIT >))+ "\"" >
| < #DIGIT : [ "0"-"9" ] >
| < #LOWER: ["a" - "z"]>
| < #UPPER: ["A" - "Z"]>
}

int one_line() :
{}
{
  sum() |forp()";"
  {
    return 0;
  }
| ";"
  {
    return 1;
  }
}

void sum() :
{}
{
  term()
  (
    (
      < PLUS >
    | < MINUS >
    )
    term()
  )*
}

void term() :
{}
{
  unary()
  (
    (
      < MULTIPLY >
    | < DIVIDE >
    )
    unary()
  )*
}
void unary() :
{}
{
  < MINUS > element()
| element()
}

void element() :
{}
{
  < CONSTANT >
| "(" sum() ")"
}
void forp():
{}
{
  "for"< id >"in"range()
}
void range():
{}
{
    "range""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"|"xrange""("(< id >|< CONSTANT >)","(< id >|< CONSTANT >)")"
}

我如何解析其范围内的所有语句

使解析 python 有趣的是缩进。该标准给出了插入 INDENT 和 DEDENT 令牌的规则。我们可以在 JavaCC 中做到这一点,但下面采用另一种方法,即使用语义先行。

void for_stmt() : {
    int col = getToken(1).beginColumn ;
} {
    "for" exprlist() "in" testlist() ":" suite(col)
    [ {checkColumn( col ) ;} 'else' ':' suite(col) ]
}

void suite(int col) : {
    int newCol ;
} {
    <NEWLINE>
    { newCol = checkIndent(col) ; }
    stmtsAndDedent(newCol)
|
    simple_stmt(col)
}

// One or more stmt followed by a dedent 
void stmtsAndDedent(int col) : {
    stmt(col)
    (
        LOOKAHEAD( dedenting(col) ) {}
    |
        stmtsAndDedent(col)
    )
 }
}

void stmt(int col) : {
} {
    simple_stmt(col)
|
    {checkColumn(col) ;}
    compound_stmt()
}

void simple_stmt() : {
} {
    {checkColumn(col) ;}
    small_stmt() (";" small_stmt())* [";"] <NEWLINE>
}

现在还得写一些java方法

int checkIndent(int col) {
    Token tk = getToken(1) ;
    int newCol = tk.beginColumn ; 
    if( newCol <= col ) {
        throw new ParseException( "Expected token at line " +tk.beginLine+
                                  " column " +tk.beginColumn+
                                  " was expected to be indented by more than "
                                  +col+ " characters.") ; }
    return newCol ; }

boolean dedenting(int col) {
    Token tk = getToken(1) ;
    return tk.beginColumn < col ; }

void checkColumn(int col) {
    Token tk = getToken(1) ;
    int newCol = tk.beginColumn ; 
    if( newCol != col ) {
        throw new ParseException( "Expected token at line " +tk.beginLine+
                                  " column " +tk.beginColumn+
                                  " was expected to be indented by exactly "
                                  +col+ " characters.") ; } }

这一切都未经测试,但我认为一旦修复了小错误,它就会起作用。

一旦你可以解析,计算行数就很简单了。