Size: 6188
Comment:
|
Size: 6347
Comment:
|
Deletions are marked like this. | Additions are marked like this. |
Line 8: | Line 8: |
#!cplusplus |
|
Line 10: | Line 12: |
static long collect(PyGC_Head *young, PyGC_Head *old) { /*young是当前收集的一代,还有用的对象放到old这一代*/ . long n = 0; long m = 0; PyGC_Head reachable; PyGC_Head unreachable; PyGC_Head finalizers; PyGC_Head *gc; |
static long collect(PyGC_Head *young, PyGC_Head *old) { /*young是当前收集的一代,还有用的对象放到old这一代*/ long n = 0; long m = 0; PyGC_Head reachable; PyGC_Head unreachable; PyGC_Head finalizers; PyGC_Head *gc; |
Line 14: | Line 24: |
. PySys_WriteStderr( . "gc: collecting generation %d...\n" "gc: objects in each generation: %ld %ld %ld\n", generation, gc_list_size(&_PyGC_generation0), gc_list_size(&generation1), gc_list_size(&generation2)); } /* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象 |
PySys_WriteStderr( "gc: collecting generation %d...\n" "gc: objects in each generation: %ld %ld %ld\n", generation, gc_list_size(&_PyGC_generation0), gc_list_size(&generation1), gc_list_size(&generation2)); } /* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象 |
Line 20: | Line 36: |
update_refs(young); subtract_refs(young); /* 将所有从外部可达的对象移动到 * reachable 集合(ie. gc_refs > 0). 然后, |
update_refs(young); subtract_refs(young); /* 将所有从外部可达的对象移动到 * reachable 集合(ie. gc_refs > 0). 然后, |
Line 23: | Line 42: |
* / gc_list_init(&reachable); move_roots(young, &reachable); move_root_reachable(&reachable); /* 将不可达的对象移动到临时列表, |
*/ gc_list_init(&reachable); move_roots(young, &reachable); move_root_reachable(&reachable); /* 将不可达的对象移动到临时列表, |
Line 26: | Line 49: |
gc_list_init(&unreachable); gc_list_move(young, &unreachable); /* 将可达的对象移动到下一代 */ gc_list_merge(&reachable, old); /* 移动从finalizers可达的对象, 我们还不能安全的删除他们 |
gc_list_init(&unreachable); gc_list_move(young, &unreachable); /* 将可达的对象移动到下一代 */ gc_list_merge(&reachable, old); /* 移动从finalizers可达的对象, 我们还不能安全的删除他们 |
Line 30: | Line 58: |
gc_list_init(&finalizers); move_finalizers(&unreachable, &finalizers); move_finalizer_reachable(&finalizers); /* 收集已发现的可回收的对象的统计数据, | gc_list_init(&finalizers); move_finalizers(&unreachable, &finalizers); move_finalizer_reachable(&finalizers); /* 收集已发现的可回收的对象的统计数据, |
Line 33: | Line 65: |
. gc = gc->gc.gc_next) { . m++; |
gc = gc->gc.gc_next) { m++; |
Line 36: | Line 68: |
. debug_cycle("collectable", FROM_GC(gc)); } } /* 对每个可回收的对象调用tp_clear. 它可以打破引用循环 |
debug_cycle("collectable", FROM_GC(gc)); } } /* 对每个可回收的对象调用tp_clear. 它可以打破引用循环 |
Line 40: | Line 73: |
delete_garbage(&unreachable, old); /* 收集不可回收对象的统计数据。 | delete_garbage(&unreachable, old); /* 收集不可回收对象的统计数据。 |
Line 43: | Line 78: |
. gc = gc->gc.gc_next) { . n++; |
gc = gc->gc.gc_next) { n++; |
Line 46: | Line 81: |
. debug_cycle("uncollectable", FROM_GC(gc)); | debug_cycle("uncollectable", FROM_GC(gc)); |
Line 50: | Line 85: |
. if (m == 0 && n == 0) { . PySys_WriteStderr("gc: done.\n"); } else { . PySys_WriteStderr( . "gc: done, %ld unreachable, %ld uncollectable.\n", n+m, n); } } /* 将finalizers加到一个可达的 Python 垃圾列表 |
if (m == 0 && n == 0) { PySys_WriteStderr("gc: done.\n"); } else { PySys_WriteStderr( "gc: done, %ld unreachable, %ld uncollectable.\n", n+m, n); } } /* 将finalizers加到一个可达的 Python 垃圾列表 |
Line 59: | Line 98: |
Line 60: | Line 100: |
. if (gc_str == NULL) { . gc_str = PyString_FromString("garbage collection"); } PyErr_WriteUnraisable(gc_str); Py_FatalError("unexpected exception during garbage collection"); } allocated = 0; return n+m; } }}} 注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧) 下面一段是分代机制: {{{ static long collect_generations(void) { |
if (gc_str == NULL) { gc_str = PyString_FromString("garbage collection"); } PyErr_WriteUnraisable(gc_str); Py_FatalError("unexpected exception during garbage collection"); } allocated = 0; return n+m; } }}} 注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧) 下面一段是分代机制: {{{ #!cplusplus static long collect_generations(void) { |
Line 78: | Line 125: |
* / static long collections0 = 0; static long collections1 = 0; long n = 0; | */ static long collections0 = 0; static long collections1 = 0; long n = 0; |
Line 80: | Line 132: |
. generation = 2; gc_list_merge(&_PyGC_generation0, &generation2); gc_list_merge(&generation1, &generation2); if (generation2.gc.gc_next != &generation2) { . n = collect(&generation2, &generation2); } collections1 = 0; |
generation = 2; gc_list_merge(&_PyGC_generation0, &generation2); gc_list_merge(&generation1, &generation2); if (generation2.gc.gc_next != &generation2) { n = collect(&generation2, &generation2); } collections1 = 0; |
Line 86: | Line 141: |
. generation = 1; collections1++; gc_list_merge(&_PyGC_generation0, &generation1); if (generation1.gc.gc_next != &generation1) { . n = collect(&generation1, &generation2); } collections0 = 0; } else { . generation = 0; collections0++; |
generation = 1; collections1++; gc_list_merge(&_PyGC_generation0, &generation1); if (generation1.gc.gc_next != &generation1) { n = collect(&generation1, &generation2); } collections0 = 0; } else { generation = 0; collections0++; |
Line 93: | Line 153: |
. n = collect(&_PyGC_generation0, &generation1); } } return n; |
n = collect(&_PyGC_generation0, &generation1); } } return n; |
Line 102: | Line 163: |
PyObject * _PyObject_GC_Malloc(PyTypeObject *tp, int nitems) { . PyObject *op; const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems); |
#!cplusplus PyObject * _PyObject_GC_Malloc(PyTypeObject *tp, int nitems) { PyObject *op; const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems); |
Line 106: | Line 170: |
. const size_t nbytes = sizeof(PyGC_Head) + basicsize; PyGC_Head *g = PyObject_MALLOC(nbytes); if (g == NULL) . return (PyObject *)PyErr_NoMemory(); g->gc.gc_next = NULL; allocated++; |
const size_t nbytes = sizeof(PyGC_Head) + basicsize; PyGC_Head *g = PyObject_MALLOC(nbytes); if (g == NULL) return (PyObject *)PyErr_NoMemory(); g->gc.gc_next = NULL; allocated++; |
Line 112: | Line 177: |
* allocated是个全局变量,记录从上次回收以来 * 分配的对象的数目,而 threshold0=700 * / |
*allocated是个全局变量,记录从上次回收以来 *分配的对象的数目,而 threshold0=700 */ |
Line 116: | Line 181: |
. enabled && threshold0 && !collecting && !PyErr_Occurred()) { | enabled && threshold0 && !collecting && !PyErr_Occurred()) { |
Line 118: | Line 186: |
. collect_generations(); | collect_generations(); |
Line 120: | Line 188: |
} op = FROM_GC(g); | } op = FROM_GC(g); |
Line 122: | Line 191: |
. op = PyObject_MALLOC(basicsize); if (op == NULL) . return (PyObject *)PyErr_NoMemory(); |
op = PyObject_MALLOC(basicsize); if (op == NULL) return (PyObject *)PyErr_NoMemory(); |
Line 126: | Line 196: |
. return op; |
return op; |
Line 134: | Line 203: |
PyObject * _PyObject_GC_New(PyTypeObject *tp) { . PyObject *op = _PyObject_GC_Malloc(tp, 0); if (op != NULL) . op = PyObject_INIT(op, tp); |
#!cplusplus PyObject * _PyObject_GC_New(PyTypeObject *tp) { PyObject *op = _PyObject_GC_Malloc(tp, 0); if (op != NULL) op = PyObject_INIT(op, tp); |
我是个懒人啊,老早就研究过一小点gc,但是一看c语言代码就郁闷了。
今天下午把一直想试试的leo弄下来玩,顺便下下来pythonGC.leo,一会就把整个脉络弄清楚了,leo在阅读复杂代码方面确实不错,其他的好处,挖掘中...,呵呵
如果对一些gc基本不了解,建议先看我以前写的一篇blog,开始了:
1 // 这个函数最关键,阅读这个函数能够帮助理解收集工作大致是如何进行的
2
3 static long
4 collect(PyGC_Head *young, PyGC_Head *old)
5 {
6 /*young是当前收集的一代,还有用的对象放到old这一代*/
7 long n = 0;
8 long m = 0;
9 PyGC_Head reachable;
10 PyGC_Head unreachable;
11 PyGC_Head finalizers;
12 PyGC_Head *gc;
13
14 if (debug & DEBUG_STATS) {
15 PySys_WriteStderr(
16 "gc: collecting generation %d...\n"
17 "gc: objects in each generation: %ld %ld %ld\n",
18 generation,
19 gc_list_size(&_PyGC_generation0),
20 gc_list_size(&generation1),
21 gc_list_size(&generation2));
22 }
23
24 /* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象
25 * 从该集合外部可达的 (比如考虑了容器内部所有应用
26 * 之后,refcount仍然大于0) */
27 update_refs(young);
28 subtract_refs(young);
29
30 /* 将所有从外部可达的对象移动到
31 * reachable 集合(ie. gc_refs > 0). 然后,
32 * 移动所有从reachable集合中的对象可达的对象
33 */
34 gc_list_init(&reachable);
35 move_roots(young, &reachable);
36 move_root_reachable(&reachable);
37
38 /* 将不可达的对象移动到临时列表,
39 * 在这之后就可以分配新对象了 */
40 gc_list_init(&unreachable);
41 gc_list_move(young, &unreachable);
42
43 /* 将可达的对象移动到下一代 */
44 gc_list_merge(&reachable, old);
45
46 /* 移动从finalizers可达的对象, 我们还不能安全的删除他们
47 * . Python程序员注意不要去创建这样的东西。
48 * 对Python来说 finalizers 就是拥有 __del__ 方法的对象实例. */
49 gc_list_init(&finalizers);
50 move_finalizers(&unreachable, &finalizers);
51 move_finalizer_reachable(&finalizers);
52
53 /* 收集已发现的可回收的对象的统计数据,
54 * 然后打印调试信息 */
55 for (gc = unreachable.gc.gc_next; gc != &unreachable;
56 gc = gc->gc.gc_next) {
57 m++;
58 if (debug & DEBUG_COLLECTABLE) {
59 debug_cycle("collectable", FROM_GC(gc));
60 }
61 }
62 /* 对每个可回收的对象调用tp_clear. 它可以打破引用循环
63 * .也可能导致一些finalizer对象被释放*/
64 delete_garbage(&unreachable, old);
65
66 /* 收集不可回收对象的统计数据。
67 * 打印调试信息. */
68 for (gc = finalizers.gc.gc_next; gc != &finalizers;
69 gc = gc->gc.gc_next) {
70 n++;
71 if (debug & DEBUG_UNCOLLECTABLE) {
72 debug_cycle("uncollectable", FROM_GC(gc));
73 }
74 }
75 if (debug & DEBUG_STATS) {
76 if (m == 0 && n == 0) {
77 PySys_WriteStderr("gc: done.\n");
78 }
79 else {
80 PySys_WriteStderr(
81 "gc: done, %ld unreachable, %ld uncollectable.\n",
82 n+m, n);
83 }
84 }
85
86 /* 将finalizers加到一个可达的 Python 垃圾列表
87 * 程序员必须处理这个问题,如果他们非要创建这种结构的话. */
88 handle_finalizers(&finalizers, old);
89
90 if (PyErr_Occurred()) {
91 if (gc_str == NULL) {
92 gc_str = PyString_FromString("garbage collection");
93 }
94 PyErr_WriteUnraisable(gc_str);
95 Py_FatalError("unexpected exception during garbage collection");
96 }
97 allocated = 0;
98 return n+m;
99 }
注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧) 下面一段是分代机制:
1 static long
2 collect_generations(void)
3 {
4 /* static就相当于是全局变量了
5 * collections0的意思就是第0代收集了多少次了
6 * threshold1的意思就是第0代需要收集多少次才开始收集第1代
7 * 这里可以先告诉大家threshold1=threshold2=10
8 * 中心思想就是:从0代开始收集,10次之后收集第1代,
9 * 又10次之后收集第2代,收集一次后回到第0代
10 */
11 static long collections0 = 0;
12 static long collections1 = 0;
13 long n = 0;
14
15
16 if (collections1 > threshold2) {
17 generation = 2;
18 gc_list_merge(&_PyGC_generation0, &generation2);
19 gc_list_merge(&generation1, &generation2);
20 if (generation2.gc.gc_next != &generation2) {
21 n = collect(&generation2, &generation2);
22 }
23 collections1 = 0;
24 }
25 else if (collections0 > threshold1) {
26 generation = 1;
27 collections1++;
28 gc_list_merge(&_PyGC_generation0, &generation1);
29 if (generation1.gc.gc_next != &generation1) {
30 n = collect(&generation1, &generation2);
31 }
32 collections0 = 0;
33 }
34 else {
35 generation = 0;
36 collections0++;
37 if (_PyGC_generation0.gc.gc_next != &_PyGC_generation0) {
38 n = collect(&_PyGC_generation0, &generation1);
39 }
40 }
41 return n;
42 }
再往上走就是_PyObject_GC_Malloc了,这里大家可以看到垃圾回收是什么条件下触发的。
1 PyObject *
2 _PyObject_GC_Malloc(PyTypeObject *tp, int nitems)
3 {
4 PyObject *op;
5 const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems);
6 #ifdef WITH_CYCLE_GC
7 const size_t nbytes = sizeof(PyGC_Head) + basicsize;
8 PyGC_Head *g = PyObject_MALLOC(nbytes);
9 if (g == NULL)
10 return (PyObject *)PyErr_NoMemory();
11 g->gc.gc_next = NULL;
12 allocated++;
13 /*
14 *allocated是个全局变量,记录从上次回收以来
15 *分配的对象的数目,而 threshold0=700
16 */
17 if (allocated > threshold0 &&
18 enabled &&
19 threshold0 &&
20 !collecting &&
21 !PyErr_Occurred()) {
22 collecting = 1;
23 collect_generations();
24 collecting = 0;
25 }
26 op = FROM_GC(g);
27 #else
28 op = PyObject_MALLOC(basicsize);
29 if (op == NULL)
30 return (PyObject *)PyErr_NoMemory();
31
32 #endif
33 return op;
34 }
如果还要往上走,就是
了,分配内存然后初始化,所有的对象模型都是一个样。
当然内部还有许多细节了,比如确定一个对象是否可达,处理引用循环,处理finalizers (想了半天还是不知道该怎么翻译这个词) 等等。大家自己用leo看吧,保证越看越爽,哈哈。