PHP 解释器实现与 Yaegi 核心思想
版本一:委托宿主语言(AST 直译器)
核心思想:每个 AST 节点通过 eval() 方法直接委托给 PHP 原生运算符和函数执行,不自己管理栈和操作细节。
<?php
// ==================== AST 节点定义 ====================
interface Node {
public function eval(Context $ctx);
}
// 数字字面量
class NumberNode implements Node {
public function __construct(public int $value) {}
public function eval(Context $ctx): int {
return $this->value;
}
}
// 变量引用
class VariableNode implements Node {
public function __construct(public string $name) {}
public function eval(Context $ctx): int {
return $ctx->get($this->name);
}
}
// 二元运算(加减乘除)
class BinaryOpNode implements Node {
public function __construct(
public string $op,
public Node $left,
public Node $right
) {}
public function eval(Context $ctx): int {
$l = $this->left->eval($ctx);
$r = $this->right->eval($ctx);
return match($this->op) {
'+' => $l + $r,
'-' => $l - $r,
'*' => $l * $r,
'/' => intdiv($l, $r),
};
}
}
// 赋值语句
class AssignNode implements Node {
public function __construct(
public string $varName,
public Node $value
) {}
public function eval(Context $ctx): void {
$val = $this->value->eval($ctx);
$ctx->set($this->varName, $val);
}
}
// print 语句
class PrintNode implements Node {
public function __construct(public Node $value) {}
public function eval(Context $ctx): void {
echo $this->value->eval($ctx) . "\n";
}
}
// 程序(多个语句)
class ProgramNode implements Node {
public function __construct(public array $statements) {}
public function eval(Context $ctx): void {
foreach ($this->statements as $stmt) {
$stmt->eval($ctx);
}
}
}
// ==================== 上下文(变量存储) ====================
class Context {
private array $variables = [];
public function get(string $name): int {
return $this->variables[$name] ?? 0;
}
public function set(string $name, int $value): void {
$this->variables[$name] = $value;
}
}
// ==================== 简易解析器 ====================
class Parser {
private array $tokens;
private int $pos = 0;
public function __construct(string $source) {
$this->tokens = token_get_all($source);
}
private function current() {
return $this->tokens[$this->pos] ?? null;
}
private function consume() {
return $this->tokens[$this->pos++] ?? null;
}
private function expect($type) {
$token = $this->consume();
if (is_array($token) && $token[0] === $type) {
return $token;
} elseif ($token === $type) {
return $token;
}
throw new Exception("Expected $type, got " . print_r($token, true));
}
public function parse(): ProgramNode {
$statements = [];
while ($this->pos < count($this->tokens)) {
$stmt = $this->parseStatement();
if ($stmt) $statements[] = $stmt;
}
return new ProgramNode($statements);
}
private function parseStatement(): ?Node {
$token = $this->current();
if (is_array($token) && in_array($token[0], [T_WHITESPACE, T_COMMENT])) {
$this->consume();
return null;
}
if (is_array($token) && $token[0] === T_PRINT) {
$this->consume();
$expr = $this->parseExpression();
$this->expect(';');
return new PrintNode($expr);
}
if (is_array($token) && $token[0] === T_VARIABLE) {
$varName = ltrim($this->consume()[1], '$');
$this->expect('=');
$value = $this->parseExpression();
$this->expect(';');
return new AssignNode($varName, $value);
}
$this->consume();
return null;
}
private function parseExpression(): Node {
return $this->parseAdditive();
}
private function parseAdditive(): Node {
$left = $this->parseMultiplicative();
while (true) {
$token = $this->current();
if ($token === '+' || $token === '-') {
$op = $this->consume();
$right = $this->parseMultiplicative();
$left = new BinaryOpNode($op, $left, $right);
} else {
break;
}
}
return $left;
}
private function parseMultiplicative(): Node {
$left = $this->parsePrimary();
while (true) {
$token = $this->current();
if ($token === '*' || $token === '/') {
$op = $this->consume();
$right = $this->parsePrimary();
$left = new BinaryOpNode($op, $left, $right);
} else {
break;
}
}
return $left;
}
private function parsePrimary(): Node {
$token = $this->current();
if (is_array($token) && $token[0] === T_LNUMBER) {
$this->consume();
return new NumberNode((int)$token[1]);
}
if (is_array($token) && $token[0] === T_VARIABLE) {
$varName = ltrim($this->consume()[1], '$');
return new VariableNode($varName);
}
if ($token === '(') {
$this->consume();
$expr = $this->parseExpression();
$this->expect(')');
return $expr;
}
throw new Exception("Unexpected token: " . print_r($token, true));
}
}
// ==================== 测试 ====================
$source = <<<'PHP'
a = 10;
b = 20;
c = a + b * 2;
print c;
d = c - 5;
print d;
PHP;
$parser = new Parser($source);
$ast = $parser->parse();
$ctx = new Context();
$ast->eval($ctx);
// 输出: 50 45
特点:代码简洁,开发快,利用宿主语言的动态特性。但无法精确控制底层行为(如类型转换规则、溢出处理等)。
版本二:自己管理栈(虚拟机风格)
核心思想:自己维护操作数栈,自己实现每个运算操作,模拟真实虚拟机(如 JVM、CPython)的工作方式。
<?php
// ==================== 值类型(模拟 zval) ====================
class Value {
public function __construct(
public mixed $data,
public string $type // "int", "float", "string", "bool", "null"
) {}
public static function int(int $v): self {
return new self($v, "int");
}
public static function string(string $v): self {
return new self($v, "string");
}
public static function bool(bool $v): self {
return new self($v, "bool");
}
public static function null(): self {
return new self(null, "null");
}
}
// ==================== 操作数栈 ====================
class Stack {
private array $stack = [];
public function push(Value $v): void {
$this->stack[] = $v;
}
public function pop(): Value {
if (empty($this->stack)) {
throw new Exception("Stack underflow");
}
return array_pop($this->stack);
}
public function peek(int $offset = 0): Value {
$idx = count($this->stack) - 1 - $offset;
if ($idx < 0) {
throw new Exception("Stack underflow");
}
return $this->stack[$idx];
}
public function isEmpty(): bool {
return empty($this->stack);
}
}
// ==================== 虚拟机(自己实现所有操作) ====================
class VM {
private Stack $stack;
private array $variables = [];
public function __construct() {
$this->stack = new Stack();
}
// 自己实现加法
public function opAdd(): void {
$right = $this->stack->pop();
$left = $this->stack->pop();
$this->stack->push($this->addValues($left, $right));
}
// 自己实现减法
public function opSub(): void {
$right = $this->stack->pop();
$left = $this->stack->pop();
$this->stack->push($this->subValues($left, $right));
}
// 自己实现乘法
public function opMul(): void {
$right = $this->stack->pop();
$left = $this->stack->pop();
$this->stack->push($this->mulValues($left, $right));
}
// 自己实现除法
public function opDiv(): void {
$right = $this->stack->pop();
$left = $this->stack->pop();
$this->stack->push($this->divValues($left, $right));
}
// 自己实现加法逻辑(含类型转换)
private function addValues(Value $a, Value $b): Value {
// 字符串拼接
if ($a->type === "string" || $b->type === "string") {
return Value::string((string)$a->data . (string)$b->data);
}
// 数值相加
return Value::int((int)$a->data + (int)$b->data);
}
private function subValues(Value $a, Value $b): Value {
return Value::int((int)$a->data - (int)$b->data);
}
private function mulValues(Value $a, Value $b): Value {
return Value::int((int)$a->data * (int)$b->data);
}
private function divValues(Value $a, Value $b): Value {
if ((int)$b->data === 0) {
throw new Exception("Division by zero");
}
return Value::int(intdiv((int)$a->data, (int)$b->data));
}
// 加载变量到栈
public function loadVar(string $name): void {
$value = $this->variables[$name] ?? Value::int(0);
$this->stack->push($value);
}
// 存储栈顶值到变量
public function storeVar(string $name): void {
$value = $this->stack->pop();
$this->variables[$name] = $value;
}
// 加载常量到栈
public function loadConst(int $value): void {
$this->stack->push(Value::int($value));
}
// print 栈顶值
public function opPrint(): void {
$value = $this->stack->pop();
echo $value->data . "\n";
}
public function getVar(string $name): Value {
return $this->variables[$name] ?? Value::int(0);
}
}
// ==================== AST 节点(执行时操作虚拟机) ====================
interface Node {
public function compile(VM $vm): void;
}
class NumberNode implements Node {
public function __construct(public int $value) {}
public function compile(VM $vm): void {
$vm->loadConst($this->value);
}
}
class VariableNode implements Node {
public function __construct(public string $name) {}
public function compile(VM $vm): void {
$vm->loadVar($this->name);
}
}
class BinaryOpNode implements Node {
public function __construct(
public string $op,
public Node $left,
public Node $right
) {}
public function compile(VM $vm): void {
// 先编译左右子树(结果入栈)
$this->left->compile($vm);
$this->right->compile($vm);
// 再执行对应操作(出栈两个,入栈一个)
match($this->op) {
'+' => $vm->opAdd(),
'-' => $vm->opSub(),
'*' => $vm->opMul(),
'/' => $vm->opDiv(),
};
}
}
class AssignNode implements Node {
public function __construct(
public string $varName,
public Node $value
) {}
public function compile(VM $vm): void {
$this->value->compile($vm);
$vm->storeVar($this->varName);
}
}
class PrintNode implements Node {
public function __construct(public Node $value) {}
public function compile(VM $vm): void {
$this->value->compile($vm);
$vm->opPrint();
}
}
class ProgramNode implements Node {
public function __construct(public array $statements) {}
public function compile(VM $vm): void {
foreach ($this->statements as $stmt) {
$stmt->compile($vm);
}
}
}
// ==================== 解析器(与版本一相同) ====================
class Parser {
private array $tokens;
private int $pos = 0;
public function __construct(string $source) {
$this->tokens = token_get_all($source);
}
private function current() {
return $this->tokens[$this->pos] ?? null;
}
private function consume() {
return $this->tokens[$this->pos++] ?? null;
}
private function expect($type) {
$token = $this->consume();
if (is_array($token) && $token[0] === $type) {
return $token;
} elseif ($token === $type) {
return $token;
}
throw new Exception("Expected $type, got " . print_r($token, true));
}
public function parse(): ProgramNode {
$statements = [];
while ($this->pos < count($this->tokens)) {
$stmt = $this->parseStatement();
if ($stmt) $statements[] = $stmt;
}
return new ProgramNode($statements);
}
private function parseStatement(): ?Node {
$token = $this->current();
if (is_array($token) && in_array($token[0], [T_WHITESPACE, T_COMMENT])) {
$this->consume();
return null;
}
if (is_array($token) && $token[0] === T_PRINT) {
$this->consume();
$expr = $this->parseExpression();
$this->expect(';');
return new PrintNode($expr);
}
if (is_array($token) && $token[0] === T_VARIABLE) {
$varName = ltrim($this->consume()[1], '$');
$this->expect('=');
$value = $this->parseExpression();
$this->expect(';');
return new AssignNode($varName, $value);
}
$this->consume();
return null;
}
private function parseExpression(): Node {
return $this->parseAdditive();
}
private function parseAdditive(): Node {
$left = $this->parseMultiplicative();
while (true) {
$token = $this->current();
if ($token === '+' || $token === '-') {
$op = $this->consume();
$right = $this->parseMultiplicative();
$left = new BinaryOpNode($op, $left, $right);
} else {
break;
}
}
return $left;
}
private function parseMultiplicative(): Node {
$left = $this->parsePrimary();
while (true) {
$token = $this->current();
if ($token === '*' || $token === '/') {
$op = $this->consume();
$right = $this->parsePrimary();
$left = new BinaryOpNode($op, $left, $right);
} else {
break;
}
}
return $left;
}
private function parsePrimary(): Node {
$token = $this->current();
if (is_array($token) && $token[0] === T_LNUMBER) {
$this->consume();
return new NumberNode((int)$token[1]);
}
if (is_array($token) && $token[0] === T_VARIABLE) {
$varName = ltrim($this->consume()[1], '$');
return new VariableNode($varName);
}
if ($token === '(') {
$this->consume();
$expr = $this->parseExpression();
$this->expect(')');
return $expr;
}
throw new Exception("Unexpected token: " . print_r($token, true));
}
}
// ==================== 测试 ====================
$source = <<<'PHP'
a = 10;
b = 20;
c = a + b * 2;
print c;
d = c - 5;
print d;
PHP;
$parser = new Parser($source);
$ast = $parser->parse();
$vm = new VM();
$ast->compile($vm);
// 输出: 50 45
特点:完全自己管理栈和运算逻辑,可以精确控制类型转换、溢出处理等行为。代码量更大,但更接近真实虚拟机的实现方式。
Yaegi 核心思想总结
一句话概括
Yaegi 是一个 AST 直译器:它把 Go 源码解析成 AST,在 CFG 分析阶段给每个节点绑定一个预定义的 action 函数,执行时遍历 AST 逐节点调用 action,通过 Go 的 reflect 包操作原生 Go 值。
执行流水线
源码 → scanner → parser → AST → [GTA + CFG分析] → 带action的AST → 执行器运行
核心组件
| 组件 | 作用 |
|---|---|
| Scanner | 词法分析,把源码拆成 token |
| Parser | 语法分析,把 token 组装成 AST |
| GTA (Global Types Analysis) | 全局类型分析,收集所有类型定义 |
| CFG (Control Flow Graph) | 控制流分析,给每个节点绑定 action 函数 |
| Interpreter | 遍历 AST,逐节点调用 action 执行 |
action 函数的本质
action 不是运行时生成的,而是在 CFG 分析阶段静态绑定的预定义函数:
switch n.kind {
case addAssign:
n.action = addAssignAction
case callExpr:
n.action = callExprAction
case ident:
n.action = identAction
// ...
}
每个 action 函数内部使用 reflect 来操作值(取值、设值、调用函数等)。
与真实虚拟机的区别
| 特征 | 传统虚拟机(JVM/CPython) | Yaegi |
|---|---|---|
| 指令集 | 有自己的字节码指令集 | 没有,直接操作 AST |
| 类型系统 | 自己实现 | 复用 Go 原生类型系统(reflect.Type) |
| 内存管理 | 自己实现(或依赖宿主) | 复用 Go 的 GC |
| 值存储 | 操作数栈 + 局部变量表 | 符号表 + reflect.Value |
| 函数调用 | 虚拟机指令 CALL | reflect.Value.Call() |
性能瓶颈
AST 遍历开销:每次执行都要遍历 AST,原生 Go 直接执行机器码
reflect 调用开销:每次取值、设值、调用函数都要走 reflect,有动态类型检查和值拷贝
没有 JIT:每次调用都重新解释,没有编译成机器码的优化路径
与 PHP 解释器的类比
| Yaegi(Go 写 Go) | PHP 解释器(PHP 写 PHP) |
|---|---|
| 用 reflect 操作 Go 值 | 直接用 PHP 变量,天然动态 |
| 需要模拟 Go 的类型系统 | PHP 天然动态类型,无需模拟 |
| 符号表存 reflect.Value | 符号表存 PHP 原生值 |
| 函数调用用 reflect.Value.Call() | 函数调用用 call_user_func() |
核心思想迁移:不管用什么语言写解释器,核心模式都是一样的——AST 节点挂执行逻辑,解释器遍历执行。区别只在于执行逻辑是委托给宿主语言,还是自己从零实现。