将 lambda 添加到我的编程语言中
Adding lambdas to my programming language
我正在努力使用 Rex and Racc 创建我自己的语言,但我遇到了困难。我不确定如何添加函数或任何不会像 lambda 那样立即执行的代码。我已将 blocks/lambdas 添加到语言中,但块中的任何内容都会立即执行。我怎样才能使 block/lambdas 可以在任何时候多次成为 运行 并且有自己的范围?或者甚至像 if 语句,其中 "block" 仅在语句为真时执行?
这是我的代码:
lexer.rex:
class MyLang
macro
BLANK [\s]+
VAR [a-zA-Z_]\w*
NUMBER \d+
MULTIPLY \*
DIVIDE \/
ADD \+
SUBTRACT \-
EQUALS =
LEFT_PARENTHESIS \(
RIGHT_PARENTHESIS \)
STRING ("([^"]|\")*?(?<!\)")|('([^']|\')*?(?<!\)')
CURLY_BRACKET_L {
CURLY_BRACKET_R }
rule
{BLANK}
{VAR} { [:VAR, text.to_sym] }
{NUMBER} { [:NUMBER, text.to_i] }
{MULTIPLY} { [:MULTIPLY, text.to_sym] }
{DIVIDE} { [:DIVIDE, text.to_sym] }
{ADD} { [:ADD, text.to_sym] }
{SUBTRACT} { [:SUBTRACT, text.to_sym] }
{EQUALS} { [:EQUALS, text.to_sym] }
{LEFT_PARENTHESIS} { [:LEFT_PARENTHESIS, text.to_sym] }
{RIGHT_PARENTHESIS} { [:RIGHT_PARENTHESIS, text.to_sym] }
{STRING} { [:STRING, text] }
{CURLY_BRACKET_L} { [:CURLY_BRACKET_L, text.to_sym] }
{CURLY_BRACKET_R} { [:CURLY_BRACKET_R, text.to_sym] }
inner
def tokenize(code)
scan_setup(code)
tokens = []
while token = next_token
tokens << token
end
tokens
end
end
parser.y:
class MyLang
prechigh
left LEFT_PARENTHESIS
left RIGHT_PARENTHESIS
left MULTIPLY
left DIVIDE
left ADD
left SUBTRACT
right EQUALS
preclow
rule
expression : value
| block
value : NUMBER { return Value.new(val[0], "Number") }
| STRING { return Value.new(MyLangCore.str_escape(val[0]), "String") }
| assignment
| value MULTIPLY value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value DIVIDE value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value ADD value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value SUBTRACT value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| LEFT_PARENTHESIS value RIGHT_PARENTHESIS { return val[1] }
| VAR { return MyLangCore.get_var(val[0]) }
assignment : VAR EQUALS value { return MyLangCore.new_variable(val[0], val[2]) }
block : CURLY_BRACKET_L expression CURLY_BRACKET_R
end
---- header
require_relative "lexer"
require_relative "my_lang_core"
---- inner
def parse(input)
scan_str(input)
end
我是这样评价我的语言的:
#!/usr/bin/env ruby
require_relative "parser.rb"
require "minitest/autorun"
# check for errors once they exist
describe MyLang do
before do
@parser = MyLang.new
end
describe "variables" do
it "assigns usable variables" do
@parser.parse("a = 4")
@parser.parse("a").value.must_equal 4
end
it "does complex assignments" do
@parser.parse("a = (4 + 8) * 2")
@parser.parse("b = 2 * (c = a + 1) + 1")
@parser.parse("a").value.must_equal 24
@parser.parse("b").value.must_equal 51
@parser.parse("c").value.must_equal 25
end
it "allows cool variable names" do
@parser.parse("_123 = 74")
@parser.parse("_123").value.must_equal 74
end
end
describe "PEMDAS" do
it "does math" do
@parser.parse("10 + 12 * 3 + 2").value.must_equal 48
end
it "does simple parentheses" do
@parser.parse("(1)").value.must_equal 1
end
it "uses parentheses" do
@parser.parse("(10 + 12) * 3 + 2").value.must_equal 68
end
it "does multi-level parentheses" do
@parser.parse("(3 - (2 - 1)) * 4").value.must_equal 8
end
end
describe "strings" do
it "parses strings" do
@parser.parse(%{'hello world.'}).value.must_equal "hello world."
@parser.parse(%{"hello world."}).value.must_equal "hello world."
@parser.parse(%{''}).value.must_equal ""
end
it "assigns strings" do
@parser.parse("a = 'hey'")
@parser.parse("a").value.must_equal "hey"
end
# TODO: fix
# it "handles escape charectors" do
# @parser.parse(%{"hey\"\n"}).value.must_equal "hey\"\n"
# end
it "adds strings" do
@parser.parse(%{"Hello, " + "world" + "!"}).value.must_equal "Hello, world!"
end
it "multiplies strings" do
@parser.parse(%{"1" * 3}).value.must_equal "111"
end
it "adds and multiplies" do
@parser.parse(%{("Na" + "N ") * 3 + "!"}).value.must_equal "NaN NaN NaN !"
end
end
# this is what I need to implement
describe "blocks" do
@parser.parse("{ a, b in a + b }(5, 4)")
end
end
l = MyLang.new
p l.parse("{ 1 + 2 }")
Racc 的 wiki 中的示例使用的方法(在解析期间直接计算表达式)仅适用于简单的表达式计算器。一旦您添加任何类型的控制流,它就会停止工作 - 正如您所注意到的。
实现解释器的常用方法是让解析器构造源代码的某种中间表示形式——通常是抽象语法树或某种字节码。解析后,此表示将作为单独的步骤执行。
我正在努力使用 Rex and Racc 创建我自己的语言,但我遇到了困难。我不确定如何添加函数或任何不会像 lambda 那样立即执行的代码。我已将 blocks/lambdas 添加到语言中,但块中的任何内容都会立即执行。我怎样才能使 block/lambdas 可以在任何时候多次成为 运行 并且有自己的范围?或者甚至像 if 语句,其中 "block" 仅在语句为真时执行?
这是我的代码:
lexer.rex:
class MyLang
macro
BLANK [\s]+
VAR [a-zA-Z_]\w*
NUMBER \d+
MULTIPLY \*
DIVIDE \/
ADD \+
SUBTRACT \-
EQUALS =
LEFT_PARENTHESIS \(
RIGHT_PARENTHESIS \)
STRING ("([^"]|\")*?(?<!\)")|('([^']|\')*?(?<!\)')
CURLY_BRACKET_L {
CURLY_BRACKET_R }
rule
{BLANK}
{VAR} { [:VAR, text.to_sym] }
{NUMBER} { [:NUMBER, text.to_i] }
{MULTIPLY} { [:MULTIPLY, text.to_sym] }
{DIVIDE} { [:DIVIDE, text.to_sym] }
{ADD} { [:ADD, text.to_sym] }
{SUBTRACT} { [:SUBTRACT, text.to_sym] }
{EQUALS} { [:EQUALS, text.to_sym] }
{LEFT_PARENTHESIS} { [:LEFT_PARENTHESIS, text.to_sym] }
{RIGHT_PARENTHESIS} { [:RIGHT_PARENTHESIS, text.to_sym] }
{STRING} { [:STRING, text] }
{CURLY_BRACKET_L} { [:CURLY_BRACKET_L, text.to_sym] }
{CURLY_BRACKET_R} { [:CURLY_BRACKET_R, text.to_sym] }
inner
def tokenize(code)
scan_setup(code)
tokens = []
while token = next_token
tokens << token
end
tokens
end
end
parser.y:
class MyLang
prechigh
left LEFT_PARENTHESIS
left RIGHT_PARENTHESIS
left MULTIPLY
left DIVIDE
left ADD
left SUBTRACT
right EQUALS
preclow
rule
expression : value
| block
value : NUMBER { return Value.new(val[0], "Number") }
| STRING { return Value.new(MyLangCore.str_escape(val[0]), "String") }
| assignment
| value MULTIPLY value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value DIVIDE value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value ADD value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| value SUBTRACT value { return MyLangCore.binary_operator(val[0], val[2], val[1]) }
| LEFT_PARENTHESIS value RIGHT_PARENTHESIS { return val[1] }
| VAR { return MyLangCore.get_var(val[0]) }
assignment : VAR EQUALS value { return MyLangCore.new_variable(val[0], val[2]) }
block : CURLY_BRACKET_L expression CURLY_BRACKET_R
end
---- header
require_relative "lexer"
require_relative "my_lang_core"
---- inner
def parse(input)
scan_str(input)
end
我是这样评价我的语言的:
#!/usr/bin/env ruby
require_relative "parser.rb"
require "minitest/autorun"
# check for errors once they exist
describe MyLang do
before do
@parser = MyLang.new
end
describe "variables" do
it "assigns usable variables" do
@parser.parse("a = 4")
@parser.parse("a").value.must_equal 4
end
it "does complex assignments" do
@parser.parse("a = (4 + 8) * 2")
@parser.parse("b = 2 * (c = a + 1) + 1")
@parser.parse("a").value.must_equal 24
@parser.parse("b").value.must_equal 51
@parser.parse("c").value.must_equal 25
end
it "allows cool variable names" do
@parser.parse("_123 = 74")
@parser.parse("_123").value.must_equal 74
end
end
describe "PEMDAS" do
it "does math" do
@parser.parse("10 + 12 * 3 + 2").value.must_equal 48
end
it "does simple parentheses" do
@parser.parse("(1)").value.must_equal 1
end
it "uses parentheses" do
@parser.parse("(10 + 12) * 3 + 2").value.must_equal 68
end
it "does multi-level parentheses" do
@parser.parse("(3 - (2 - 1)) * 4").value.must_equal 8
end
end
describe "strings" do
it "parses strings" do
@parser.parse(%{'hello world.'}).value.must_equal "hello world."
@parser.parse(%{"hello world."}).value.must_equal "hello world."
@parser.parse(%{''}).value.must_equal ""
end
it "assigns strings" do
@parser.parse("a = 'hey'")
@parser.parse("a").value.must_equal "hey"
end
# TODO: fix
# it "handles escape charectors" do
# @parser.parse(%{"hey\"\n"}).value.must_equal "hey\"\n"
# end
it "adds strings" do
@parser.parse(%{"Hello, " + "world" + "!"}).value.must_equal "Hello, world!"
end
it "multiplies strings" do
@parser.parse(%{"1" * 3}).value.must_equal "111"
end
it "adds and multiplies" do
@parser.parse(%{("Na" + "N ") * 3 + "!"}).value.must_equal "NaN NaN NaN !"
end
end
# this is what I need to implement
describe "blocks" do
@parser.parse("{ a, b in a + b }(5, 4)")
end
end
l = MyLang.new
p l.parse("{ 1 + 2 }")
Racc 的 wiki 中的示例使用的方法(在解析期间直接计算表达式)仅适用于简单的表达式计算器。一旦您添加任何类型的控制流,它就会停止工作 - 正如您所注意到的。
实现解释器的常用方法是让解析器构造源代码的某种中间表示形式——通常是抽象语法树或某种字节码。解析后,此表示将作为单独的步骤执行。