javascript 解释器中的解析循环
Parsing loops in a javascript interpreter
我一直在探索在 javascript 中编写一个非常基本/有限的解释器作为练习。在我介绍 LOOP 的概念之前,一切都很顺利。
给定以下脚本:
LOOP 2
A
LOOP 3
B
END
LOOP 4
C
LOOP 5
D
END
E
END
F
END
算法应按以下顺序访问内部标记:
ABBBCDDDDDECDDDDDECDDDDDECDDDDDEFABBBCDDDDDECDDDDDECDDDDDECDDDDDEF
以下方法可以解决问题,但需要对标记进行大量迭代。这比我以前使用的手动扩展循环的切片方法有所改进,但远非最佳。
/**
* In practice, we'll grab each token as we read the script,
* but to keep this simple and focus on the loop algorithm,
* we can cheat and make an array of all the tokens.
*/
const getTokens = (s) => s.replace(/[\W_]+/g, " ").split(" ").filter(Boolean);
/* Temp vars - ideally, I'd like to solve this with arrays. */
const start = []; // Loop start indices
const end = []; // Loop end indices
const counts = []; // Times to loop
const completed = []; // Loops completed
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token === "LOOP") {
if (start.length == 0 || i > start[start.length - 1]) {
// Add new loop index if we haven't seen it before
start.push(i); // Store the loop index
counts.push(Number(tokens[i + 1])); // The loop count is always next LOOP token
completed.push(0); // Initialize with 0 completed at index
// Find the end index for the loop
// Note: This is the slowest part.
let skip = 0;
for (let j = i + 2; j < tokens.length; j++) {
if (tokens[j] == "LOOP") {
skip++; // Increase nest depth
} else if (tokens[j] == "END") {
if (skip == 0) {
end.push(j); // Found matching loop close
break;
}
skip--;
}
}
}
i++; // Skip over the loop count
continue;
} else if (token === "END") {
let j;
for (j = 0; j < end.length; j++) {
if (end[j] == i) break; // Found matching end index
}
const isCompleted = completed[j] == counts[j] - 1;
if (!isCompleted) {
i = start[j] + 1;
completed[j]++;
for (let k = j + 1; k < start.length; k++) {
completed[k] = 0; // Reset nested loops in between
}
}
continue;
}
console.log(tokens[i]);
}
https://jsfiddle.net/5wpa8t4n/
使用一次遍历脚本或最多两次遍历而不是 N-LOOP 遍历来完成这种基于数组的方法的更好方法是什么?
开始解释循环时不需要知道匹配end
的位置。你只需要记录遇到下一个end
时跳回的位置,但在此之前继续逐个解释token。
这些位置连同各自的计数器可以存储在堆栈结构中。
const script = `
DO
A
DO
B
LOOP 3
DO
C
DO
D
LOOP 5
E
LOOP 4
F
LOOP 2
`
const parse = (script) =>
script
.replace(/[\W_]+/g, " ")
.split(" ")
.filter(Boolean);
const interpret = (code) => {
let loops = []; // Active loops: iteration count and jump target
let ip = 0; // instruction pointer
let result = "";
while (ip < code.length) {
const instruction = code[ip];
switch (instruction) {
case "DO": {
++ip;
loops.push({count: 0, start: ip});
} break;
case "LOOP": {
const limit = Number(code[++ip]);
const {count, start} = loops.pop();
if (count < limit) {
loops.push({count: count+1, start});
ip = start; // jump back
} else {
++ip;
}
} break;
default: {
++ip;
result += instruction; // a print statement
} break;
}
}
return result;
};
console.log(interpret(parse(script)));
我已经稍微简化了结构以使用 do
-while
循环,因此我永远不必跳过循环体。在由解析器发出的真正字节码中,跳转目标(来回)将是指令本身的一部分,并且只有计数“变量”需要存储在堆栈中。跳跃目标永远不会改变,所以你只需要在 parse
函数中生成一次。
我一直在探索在 javascript 中编写一个非常基本/有限的解释器作为练习。在我介绍 LOOP 的概念之前,一切都很顺利。
给定以下脚本:
LOOP 2
A
LOOP 3
B
END
LOOP 4
C
LOOP 5
D
END
E
END
F
END
算法应按以下顺序访问内部标记:
ABBBCDDDDDECDDDDDECDDDDDECDDDDDEFABBBCDDDDDECDDDDDECDDDDDECDDDDDEF
以下方法可以解决问题,但需要对标记进行大量迭代。这比我以前使用的手动扩展循环的切片方法有所改进,但远非最佳。
/**
* In practice, we'll grab each token as we read the script,
* but to keep this simple and focus on the loop algorithm,
* we can cheat and make an array of all the tokens.
*/
const getTokens = (s) => s.replace(/[\W_]+/g, " ").split(" ").filter(Boolean);
/* Temp vars - ideally, I'd like to solve this with arrays. */
const start = []; // Loop start indices
const end = []; // Loop end indices
const counts = []; // Times to loop
const completed = []; // Loops completed
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token === "LOOP") {
if (start.length == 0 || i > start[start.length - 1]) {
// Add new loop index if we haven't seen it before
start.push(i); // Store the loop index
counts.push(Number(tokens[i + 1])); // The loop count is always next LOOP token
completed.push(0); // Initialize with 0 completed at index
// Find the end index for the loop
// Note: This is the slowest part.
let skip = 0;
for (let j = i + 2; j < tokens.length; j++) {
if (tokens[j] == "LOOP") {
skip++; // Increase nest depth
} else if (tokens[j] == "END") {
if (skip == 0) {
end.push(j); // Found matching loop close
break;
}
skip--;
}
}
}
i++; // Skip over the loop count
continue;
} else if (token === "END") {
let j;
for (j = 0; j < end.length; j++) {
if (end[j] == i) break; // Found matching end index
}
const isCompleted = completed[j] == counts[j] - 1;
if (!isCompleted) {
i = start[j] + 1;
completed[j]++;
for (let k = j + 1; k < start.length; k++) {
completed[k] = 0; // Reset nested loops in between
}
}
continue;
}
console.log(tokens[i]);
}
https://jsfiddle.net/5wpa8t4n/
使用一次遍历脚本或最多两次遍历而不是 N-LOOP 遍历来完成这种基于数组的方法的更好方法是什么?
开始解释循环时不需要知道匹配end
的位置。你只需要记录遇到下一个end
时跳回的位置,但在此之前继续逐个解释token。
这些位置连同各自的计数器可以存储在堆栈结构中。
const script = `
DO
A
DO
B
LOOP 3
DO
C
DO
D
LOOP 5
E
LOOP 4
F
LOOP 2
`
const parse = (script) =>
script
.replace(/[\W_]+/g, " ")
.split(" ")
.filter(Boolean);
const interpret = (code) => {
let loops = []; // Active loops: iteration count and jump target
let ip = 0; // instruction pointer
let result = "";
while (ip < code.length) {
const instruction = code[ip];
switch (instruction) {
case "DO": {
++ip;
loops.push({count: 0, start: ip});
} break;
case "LOOP": {
const limit = Number(code[++ip]);
const {count, start} = loops.pop();
if (count < limit) {
loops.push({count: count+1, start});
ip = start; // jump back
} else {
++ip;
}
} break;
default: {
++ip;
result += instruction; // a print statement
} break;
}
}
return result;
};
console.log(interpret(parse(script)));
我已经稍微简化了结构以使用 do
-while
循环,因此我永远不必跳过循环体。在由解析器发出的真正字节码中,跳转目标(来回)将是指令本身的一部分,并且只有计数“变量”需要存储在堆栈中。跳跃目标永远不会改变,所以你只需要在 parse
函数中生成一次。