PHP Generator的实现
对于Generator的实现,关键的数据结构是_zend_execute_data,已在这篇文章:PHP内核的一点探索——execute_data 介绍了。本文只是粗略的说明了Generator的实现~
Generator的创建
Generator的实现代码在Zend/zend_generators.c中,我们无法直接new一个Generator对象,如果直接new的话会报如下错误:
PHP Catchable fatal error: The "Generator" class is reserved for internal use and cannot be manually instantiated
在源码中可看到原因:
1
2
3
4
5
6
7
static zend_function *zend_generator_get_constructor(zval *object TSRMLS_DC) /* {{{ */
{
zend_error(E_RECOVERABLE_ERROR, "The \"Generator\" class is reserved for internal use and cannot be manually instantiated");
return NULL;
}
/* }}} */
PHP手册中也说明了:Generator objects are returned from generators. 也就是需要通过如下方式得到一个Generator对象:
1
2
3
4
5
6
<?php
function gen() {
yield;
}
$gen = gen();
var_dump($gen);//Generator对象
那么一个Generator对象是如何生成的?打印上面代码的opcode看看:
Generator的数据结构:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
typedef struct _zend_generator {
zend_object std;
zend_generator_iterator iterator;
/* The suspended execution context. */
zend_execute_data *execute_data;
/* The separate stack used by generator */
zend_vm_stack stack;
//yield 能够产生 key => value 形式的值
/* Current value */
zval *value;
/* Current key */
zval *key;
/* Variable to put sent value into */
zval **send_target;//通过send方法传入的变量
/* Largest used integer key for auto-incrementing keys */
long largest_used_integer_key;
/* ZEND_GENERATOR_* flags */
zend_uchar flags;
} zend_generator;
在初始化gen函数调用时(即上图中的DO_FCALL),发现gen函数的op_array->fn_flags有ZEND_ACC_GENERATOR标识,说明需要生成一个Generator对象返回。
在Zend/zend_vm_execute.h中:
1
2
3
4
5
6
7
8
……
if (UNEXPECTED((EG(active_op_array)->fn_flags & ZEND_ACC_GENERATOR) != 0)) {
if (RETURN_VALUE_USED(opline)) {
ret->var.ptr = zend_generator_create_zval(EG(active_op_array) TSRMLS_CC);//创建Generator对象
ret->var.fcall_returned_reference = 0;
}
}
……
在Zend/zend_generators.c中,zend_generator_create_zval函数主要做的事情就是备份当前EG中的一些与执行上下文相关的信息,然后创建新的execute_data,同时也分配新的一份_zend_vm_stack,注意该堆栈与原本的EG(argument_stack)是分开的,也即每个Generator对象都会拥有自己的_zend_vm_stack:
1
2
3
4
5
6
7
ZEND_API zval *zend_generator_create_zval(zend_op_array *op_array TSRMLS_DC) /* {{{ */
{
……//备份EG中的一些与执行上下文相关信息
execute_data = zend_create_execute_data_from_op_array(op_array, 0 TSRMLS_CC);
……//恢复EG中的一些与执行上下文相关信息,使得原来的上下文环境可以继续正常执行
//创建一个Generator对象后返回
}
在zend_create_execute_data_from_op_array中实际调用的是Zend/zend_execute.c中的:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
static zend_always_inline zend_execute_data *i_create_execute_data_from_op_array(zend_op_array *op_array, zend_bool nested TSRMLS_DC) /* {{{ */
{
……
if (UNEXPECTED((op_array->fn_flags & ZEND_ACC_GENERATOR) != 0)) {
……
EG(argument_stack) = zend_vm_stack_new_page((total_size + (sizeof(void*) - 1)) / sizeof(void*));//新的堆栈,与先前的堆栈是分离的
EG(argument_stack)->prev = NULL;//这里其实不需要,zend_vm_stack_new_page已经做了= =
execute_data = (zend_execute_data*)((char*)ZEND_VM_STACK_ELEMETS(EG(argument_stack)) + args_size + execute_data_size + Ts_size);
//之所以这里需要生成一个prev_execute_data,是因为要能够传递函数参数进来,这就需要了解函数参数是如何传递的,下文会提到
//在上文的例子中gen函数并没有参数
/* copy prev_execute_data */
EX(prev_execute_data) = (zend_execute_data*)((char*)ZEND_VM_STACK_ELEMETS(EG(argument_stack)) + args_size);
memset(EX(prev_execute_data), 0, sizeof(zend_execute_data));
EX(prev_execute_data)->function_state.function = (zend_function*)op_array;
EX(prev_execute_data)->function_state.arguments = (void**)((char*)ZEND_VM_STACK_ELEMETS(EG(argument_stack)) + ZEND_MM_ALIGNED_SIZE(sizeof(zval*)) * args_count);
/* copy arguments */
*EX(prev_execute_data)->function_state.arguments = (void*)(zend_uintptr_t)args_count;
if (args_count > 0) {
zval **arg_src = (zval**)zend_vm_stack_get_arg_ex(EG(current_execute_data), 1);
zval **arg_dst = (zval**)zend_vm_stack_get_arg_ex(EX(prev_execute_data), 1);
int i;
for (i = 0; i < args_count; i++) {
arg_dst[i] = arg_src[i];
Z_ADDREF_P(arg_dst[i]);
}
}
} else {
……
}
……
}
上面新建好的上下文执行堆栈如下图:
Generator的执行
通过调用send、next、throw方法能够使Generator执行到下一个yield或结束。
在Generator的方法的实现中,基本都会调用如下一个函数:
1
2
3
4
5
6
7
8
static void zend_generator_ensure_initialized(zend_generator *generator TSRMLS_DC) /* {{{ */
{
if (generator->execute_data && !generator->value) { //如果还未执行过
zend_generator_resume(generator TSRMLS_CC); //首次执行
generator->flags |= ZEND_GENERATOR_AT_FIRST_YIELD; //设置标识,表明现在已经执行到第一个yield处了
}
}
/* }}} */
在Zend/zend_generators.c中,以send方法为例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/* {{{ proto mixed Generator::send(mixed $value)
* Sends a value to the generator */
ZEND_METHOD(Generator, send)
{
zval *value;
zend_generator *generator;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &value) == FAILURE) {//接收传递过来的参数
return;
}
generator = (zend_generator *) zend_object_store_get_object(getThis() TSRMLS_CC);//获取当前对象对应的generator
zend_generator_ensure_initialized(generator TSRMLS_CC);//确保首次执行过了
/* The generator is already closed, thus can't send anything */
if (!generator->execute_data) {
return;
}
/* Put sent value in the target VAR slot, if it is used */
if (generator->send_target) {
Z_DELREF_PP(generator->send_target);
Z_ADDREF_P(value);
*generator->send_target = value;
}
zend_generator_resume(generator TSRMLS_CC);//执行一次
if (generator->value) {//如果有返回值
RETURN_ZVAL_FAST(generator->value);
}
}
/* }}} */
Generator的执行关键在zend_generator_resume函数中:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
ZEND_API void zend_generator_resume(zend_generator *generator TSRMLS_DC) /* {{{ */
{
//这个函数中的注释已经很详细了~
……
/* Backup executor globals 备份EG中与上下文相关的信息 */
zval **original_return_value_ptr_ptr = EG(return_value_ptr_ptr);
zend_execute_data *original_execute_data = EG(current_execute_data);
zend_op **original_opline_ptr = EG(opline_ptr);
zend_op_array *original_active_op_array = EG(active_op_array);
HashTable *original_active_symbol_table = EG(active_symbol_table);
zval *original_This = EG(This);
zend_class_entry *original_scope = EG(scope);
zend_class_entry *original_called_scope = EG(called_scope);
zend_vm_stack original_stack = EG(argument_stack);
/* We (mis)use the return_value_ptr_ptr to provide the generator object
* to the executor, so YIELD will be able to set the yielded value */
EG(return_value_ptr_ptr) = (zval **) generator;//作为对generator的引用,为了能够在执行yield opcode时设置generator->value
/* Set executor globals 恢复Generator的执行上下文信息 */
EG(current_execute_data) = generator->execute_data;
EG(opline_ptr) = &generator->execute_data->opline;
EG(active_op_array) = generator->execute_data->op_array;
EG(active_symbol_table) = generator->execute_data->symbol_table;
EG(This) = generator->execute_data->current_this;
EG(scope) = generator->execute_data->current_scope;
EG(called_scope) = generator->execute_data->current_called_scope;
EG(argument_stack) = generator->stack;
//这里解释了为何在创建generator的execute_data时生成一个prev_execute_data的原因
/* We want the backtrace to look as if the generator function was
* called from whatever method we are current running (e.g. next()).
* The first prev_execute_data contains an additional stack frame,
* which makes the generator function show up in the backtrace and
* makes the arguments available to func_get_args(). So we have to
* set the prev_execute_data of that prev_execute_data :) */
generator->execute_data->prev_execute_data->prev_execute_data = original_execute_data;
/* Resume execution 执行Generator,从上次停止的地方继续执行opcode*/
generator->flags |= ZEND_GENERATOR_CURRENTLY_RUNNING;
zend_execute_ex(generator->execute_data TSRMLS_CC);
generator->flags &= ~ZEND_GENERATOR_CURRENTLY_RUNNING;
/* Restore executor globals 恢复原来调用Generator执行的上下文*/
EG(return_value_ptr_ptr) = original_return_value_ptr_ptr;
EG(current_execute_data) = original_execute_data;
EG(opline_ptr) = original_opline_ptr;
EG(active_op_array) = original_active_op_array;
EG(active_symbol_table) = original_active_symbol_table;
EG(This) = original_This;
EG(scope) = original_scope;
EG(called_scope) = original_called_scope;
EG(argument_stack) = original_stack;
/* If an exception was thrown in the generator we have to internally
* rethrow it in the parent scope. */
if (UNEXPECTED(EG(exception) != NULL)) {
zend_throw_exception_internal(NULL TSRMLS_CC);
}
}
/* }}} */
Generator的结束
每个Generator的op_array中都会有GENERATOR_RETURN opcode,不是普通函数的RETURN,看看其handler:
1
2
3
4
5
6
7
8
9
10
11
static int ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
{
/* The generator object is stored in return_value_ptr_ptr */
zend_generator *generator = (zend_generator *) EG(return_value_ptr_ptr);
/* Close the generator to free up resources */
zend_generator_close(generator, 1 TSRMLS_CC);//当前generator已经执行完
/* Pass execution back to handling code */
ZEND_VM_RETURN();//返回到zend_generator_resume中
}
关于php中用户自定义函数参数的传递
带有参数的函数,其op_array中存在一个RECV opcode。因为函数的参数值是存在于其调用域的execute_data中的,在函数对应的execute_data中其实没有。
查看RECV的handler函数:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static int ZEND_FASTCALL ZEND_RECV_INIT_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
{
……
zval **param = zend_vm_stack_get_arg(arg_num TSRMLS_CC);
……
}
static zend_always_inline int zend_vm_stack_get_args_count(TSRMLS_D)
{
return zend_vm_stack_get_args_count_ex(EG(current_execute_data)->prev_execute_data);
}
static zend_always_inline zval** zend_vm_stack_get_arg(int requested_arg TSRMLS_DC)
{
//从上一个execute_data中获取参数的值
return zend_vm_stack_get_arg_ex(EG(current_execute_data)->prev_execute_data, requested_arg);
}