Differences between revisions 2 and 3
Revision 2 as of 2006-04-21 18:43:58
Size: 6188
Editor: HuangYi
Comment:
Revision 3 as of 2006-04-21 18:50:29
Size: 6347
Editor: HuangYi
Comment:
Deletions are marked like this. Additions are marked like this.
Line 8: Line 8:
#!cplusplus
Line 10: Line 12:
static long collect(PyGC_Head *young, PyGC_Head *old) { /*young是当前收集的一代,还有用的对象放到old这一代*/

 .
long n = 0; long m = 0; PyGC_Head reachable; PyGC_Head unreachable; PyGC_Head finalizers; PyGC_Head *gc;
static long
collect(PyGC_Head *young, PyGC_Head *old)
{
/*young是当前收集的一代,还有用的对象放到old这一代*/
 long n = 0;
long m = 0;
PyGC_Head reachable;
PyGC_Head unreachable;
PyGC_Head finalizers;
 PyGC_Head *gc;
Line 14: Line 24:
  . PySys_WriteStderr(
   . "gc: collecting generation %d...\n" "gc: objects in each generation: %ld %ld %ld\n", generation,
   gc_list_size(&_PyGC_generation0), gc_list_size(&generation1), gc_list_size(&generation2));
 } /* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象
  PySys_WriteStderr(
   "gc: collecting generation %d...\n"
  
"gc: objects in each generation: %ld %ld %ld\n",
  
generation,
   gc_list_size(&_PyGC_generation0),
  
gc_list_size(&generation1),
  
gc_list_size(&generation2));
 }

/* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象
Line 20: Line 36:
 update_refs(young); subtract_refs(young); /* 将所有从外部可达的对象移动到
  * reachable 集合(ie. gc_refs > 0). 然后,
 update_refs(young);
subtract_refs(young);

/* 将所有从外部可达的对象移动到
  * reachable 集合(ie. gc_refs > 0). 然后, 
Line 23: Line 42:
  * /
 gc_list_init(&reachable); move_roots(young, &reachable); move_root_reachable(&reachable); /* 将不可达的对象移动到临时列表,
  */
 gc_list_init(&reachable);
move_roots(young, &reachable);
move_root_reachable(&reachable);

/* 将不可达的对象移动到临时列表, 
Line 26: Line 49:
 gc_list_init(&unreachable); gc_list_move(young, &unreachable); /* 将可达的对象移动到下一代 */
 gc_list_merge(&reachable, old); /* 移动从finalizers可达的对象, 我们还不能安全的删除他们
 gc_list_init(&unreachable);
gc_list_move(young, &unreachable);

/* 将可达的对象移动到下一代 */
 gc_list_merge(&reachable, old);

/* 移动从finalizers可达的对象, 我们还不能安全的删除他们
Line 30: Line 58:
 gc_list_init(&finalizers); move_finalizers(&unreachable, &finalizers); move_finalizer_reachable(&finalizers); /* 收集已发现的可回收的对象的统计数据,  gc_list_init(&finalizers);
move_finalizers(&unreachable, &finalizers);
move_finalizer_reachable(&finalizers);

/* 收集已发现的可回收的对象的统计数据,
Line 33: Line 65:
  . gc = gc->gc.gc_next) {
  . m++;
   gc = gc->gc.gc_next) {
  m++;
Line 36: Line 68:
   . debug_cycle("collectable", FROM_GC(gc));
  }
 } /* 对每个可回收的对象调用tp_clear. 它可以打破引用循环
   debug_cycle("collectable", FROM_GC(gc));
  }
 }
/* 对每个可回收的对象调用tp_clear. 它可以打破引用循环
Line 40: Line 73:
 delete_garbage(&unreachable, old); /* 收集不可回收对象的统计数据。  delete_garbage(&unreachable, old);

/* 收集不可回收对象的统计数据。
Line 43: Line 78:
  . gc = gc->gc.gc_next) {
  . n++;
   gc = gc->gc.gc_next) {
  n++;
Line 46: Line 81:
   . debug_cycle("uncollectable", FROM_GC(gc));    debug_cycle("uncollectable", FROM_GC(gc));
Line 50: Line 85:
  . if (m == 0 && n == 0) {
   . PySys_WriteStderr("gc: done.\n");
  } else {
   . PySys_WriteStderr(
    . "gc: done, %ld unreachable, %ld uncollectable.\n", n+m, n);
  }
 } /* 将finalizers加到一个可达的 Python 垃圾列表
  if (m == 0 && n == 0) {
   PySys_WriteStderr("gc: done.\n");
  }
 
else {
   PySys_WriteStderr(
       "gc: done, %ld unreachable, %ld uncollectable.\n",
      
n+m, n);
  }
 }

/* 将finalizers加到一个可达的 Python 垃圾列表
Line 59: Line 98:
Line 60: Line 100:
  . if (gc_str == NULL) {
   . gc_str = PyString_FromString("garbage collection");
  }
  PyErr_WriteUnraisable(gc_str); Py_FatalError("unexpected exception during garbage collection");
 } allocated = 0; return n+m;
}
}}}

注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧) 下面一段是分代机制:   {{{
static long
collect_generations(void) {
  if (gc_str == NULL) {
      gc_str = PyString_FromString("garbage collection");
  }
  PyErr_WriteUnraisable(gc_str);
 
Py_FatalError("unexpected exception during garbage collection");
 }
allocated = 0;
return n+m;
}
}}}

注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧)
下面一段是分代机制:

{{{
#!cplusplus
static long
collect_generations(void)
{
Line 78: Line 125:
 * / static long collections0 = 0; static long collections1 = 0; long n = 0;  */
static long collections0 = 0;
static long collections1 = 0;
long n = 0;

Line 80: Line 132:
  . generation = 2;
  gc_list_merge(&_PyGC_generation0, &generation2); gc_list_merge(&generation1, &generation2); if (generation2.gc.gc_next != &generation2) {
   . n = collect(&generation2, &generation2);
  } collections1 = 0;
  generation = 2;
  gc_list_merge(&_PyGC_generation0, &generation2);
 
gc_list_merge(&generation1, &generation2);
 
if (generation2.gc.gc_next != &generation2) {
   n = collect(&generation2, &generation2);
  }
 
collections1 = 0;
Line 86: Line 141:
  . generation = 1; collections1++;
  gc_list_merge(&_PyGC_generation0, &generation1); if (generation1.gc.gc_next != &generation1) {
   . n = collect(&generation1, &generation2);
  } collections0 = 0;
 } else {
  . generation = 0; collections0++;
  generation = 1;
 
collections1++;
  gc_list_merge(&_PyGC_generation0, &generation1);
 
if (generation1.gc.gc_next != &generation1) {
   n = collect(&generation1, &generation2);
  }
 
collections0 = 0;
 }
else {
  generation = 0;
  collections0++;
Line 93: Line 153:
   . n = collect(&_PyGC_generation0, &generation1);
  }
 } return n;
   n = collect(&_PyGC_generation0, &generation1);
  }
 }
return n;
Line 102: Line 163:
PyObject * _PyObject_GC_Malloc(PyTypeObject *tp, int nitems) {

 .
PyObject *op; const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems);
#!cplusplus
PyObject *
_PyObject_GC_Malloc(PyTypeObject *tp, int nitems)
{
 PyObject *op;
const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems);
Line 106: Line 170:

 .
const size_t nbytes = sizeof(PyGC_Head) + basicsize;
 PyGC_Head *g = PyObject_MALLOC(nbytes); if (g == NULL)
  . return (PyObject *)PyErr_NoMemory();
 g->gc.gc_next = NULL; allocated++; 
 const size_t nbytes = sizeof(PyGC_Head) + basicsize;
 PyGC_Head *g = PyObject_MALLOC(nbytes);
if (g == NULL)
  return (PyObject *)PyErr_NoMemory();
 g->gc.gc_next = NULL;
allocated++;
Line 112: Line 177:
  * allocated是个全局变量,记录从上次回收以来
  * 分配的对象的数目,而 threshold0=700
  * /
  *allocated是个全局变量,记录从上次回收以来
  *分配的对象的数目,而 threshold0=700
  */
Line 116: Line 181:
   . enabled && threshold0 && !collecting && !PyErr_Occurred()) {       enabled &&
     
threshold0 &&
     
!collecting &&
     
!PyErr_Occurred()) {
Line 118: Line 186:
   . collect_generations();    collect_generations();
Line 120: Line 188:
 } op = FROM_GC(g);  }
op = FROM_GC(g);
Line 122: Line 191:

 .
op = PyObject_MALLOC(basicsize); if (op == NULL)
  . return (PyObject *)PyErr_NoMemory();
 op = PyObject_MALLOC(basicsize);
if (op == NULL)
  return (PyObject *)PyErr_NoMemory();
Line 126: Line 196:

 .
return op;
 return op;
Line 134: Line 203:
PyObject * _PyObject_GC_New(PyTypeObject *tp) {

 .
PyObject *op = _PyObject_GC_Malloc(tp, 0); if (op != NULL)
  . op = PyObject_INIT(op, tp);
#!cplusplus
PyObject *
_PyObject_GC_New(PyTypeObject *tp)
{
 PyObject *op = _PyObject_GC_Malloc(tp, 0);
if (op != NULL)
  op = PyObject_INIT(op, tp);

我是个懒人啊,老早就研究过一小点gc,但是一看c语言代码就郁闷了。

今天下午把一直想试试的leo弄下来玩,顺便下下来pythonGC.leo,一会就把整个脉络弄清楚了,leo在阅读复杂代码方面确实不错,其他的好处,挖掘中...,呵呵

如果对一些gc基本不了解,建议先看我以前写的一篇blog,开始了:

   1 // 这个函数最关键,阅读这个函数能够帮助理解收集工作大致是如何进行的
   2 
   3 static long
   4 collect(PyGC_Head *young, PyGC_Head *old)
   5 {
   6 /*young是当前收集的一代,还有用的对象放到old这一代*/
   7  long n = 0;
   8  long m = 0;
   9  PyGC_Head reachable;
  10  PyGC_Head unreachable;
  11  PyGC_Head finalizers;
  12  PyGC_Head *gc;
  13 
  14  if (debug & DEBUG_STATS) {
  15   PySys_WriteStderr(
  16    "gc: collecting generation %d...\n"
  17    "gc: objects in each generation: %ld %ld %ld\n",
  18    generation,
  19    gc_list_size(&_PyGC_generation0),
  20    gc_list_size(&generation1),
  21    gc_list_size(&generation2));
  22  }
  23 
  24  /* 使用 ob_refcnt 和 gc_refs, 计算容器集合中哪些对象
  25   * 从该集合外部可达的 (比如考虑了容器内部所有应用
  26   * 之后,refcount仍然大于0) */
  27  update_refs(young);
  28  subtract_refs(young);
  29 
  30  /* 将所有从外部可达的对象移动到
  31   * reachable 集合(ie. gc_refs > 0).  然后, 
  32   * 移动所有从reachable集合中的对象可达的对象
  33   */
  34  gc_list_init(&reachable);
  35  move_roots(young, &reachable);
  36  move_root_reachable(&reachable);
  37 
  38  /* 将不可达的对象移动到临时列表, 
  39   * 在这之后就可以分配新对象了 */
  40  gc_list_init(&unreachable);
  41  gc_list_move(young, &unreachable);
  42 
  43  /* 将可达的对象移动到下一代 */
  44  gc_list_merge(&reachable, old);
  45 
  46  /* 移动从finalizers可达的对象, 我们还不能安全的删除他们
  47   * .  Python程序员注意不要去创建这样的东西。
  48   * 对Python来说 finalizers 就是拥有 __del__ 方法的对象实例. */
  49  gc_list_init(&finalizers);
  50  move_finalizers(&unreachable, &finalizers);
  51  move_finalizer_reachable(&finalizers);
  52 
  53  /* 收集已发现的可回收的对象的统计数据,
  54   * 然后打印调试信息 */
  55  for (gc = unreachable.gc.gc_next; gc != &unreachable;
  56    gc = gc->gc.gc_next) {
  57   m++;
  58   if (debug & DEBUG_COLLECTABLE) {
  59    debug_cycle("collectable", FROM_GC(gc));
  60   }
  61  }
  62  /* 对每个可回收的对象调用tp_clear.  它可以打破引用循环
  63   * .也可能导致一些finalizer对象被释放*/
  64  delete_garbage(&unreachable, old);
  65 
  66  /* 收集不可回收对象的统计数据。
  67   * 打印调试信息. */
  68  for (gc = finalizers.gc.gc_next; gc != &finalizers;
  69    gc = gc->gc.gc_next) {
  70   n++;
  71   if (debug & DEBUG_UNCOLLECTABLE) {
  72    debug_cycle("uncollectable", FROM_GC(gc));
  73   }
  74  }
  75  if (debug & DEBUG_STATS) {
  76   if (m == 0 && n == 0) {
  77    PySys_WriteStderr("gc: done.\n");
  78   }
  79   else {
  80    PySys_WriteStderr(
  81        "gc: done, %ld unreachable, %ld uncollectable.\n",
  82        n+m, n);
  83   }
  84  }
  85 
  86  /* 将finalizers加到一个可达的 Python 垃圾列表
  87   * 程序员必须处理这个问题,如果他们非要创建这种结构的话. */
  88  handle_finalizers(&finalizers, old);
  89 
  90  if (PyErr_Occurred()) {
  91   if (gc_str == NULL) {
  92       gc_str = PyString_FromString("garbage collection");
  93   }
  94   PyErr_WriteUnraisable(gc_str);
  95   Py_FatalError("unexpected exception during garbage collection");
  96  }
  97  allocated = 0;
  98  return n+m;
  99 }

注释够清晰吧(可能翻译有点烂,受不了的话就赶紧去下个leo玩吧) 下面一段是分代机制:

   1 static long
   2 collect_generations(void)
   3 {
   4 /* static就相当于是全局变量了
   5  * collections0的意思就是第0代收集了多少次了
   6  * threshold1的意思就是第0代需要收集多少次才开始收集第1代
   7  * 这里可以先告诉大家threshold1=threshold2=10
   8  * 中心思想就是:从0代开始收集,10次之后收集第1代,
   9  * 又10次之后收集第2代,收集一次后回到第0代
  10  */
  11  static long collections0 = 0;
  12  static long collections1 = 0;
  13  long n = 0;
  14 
  15 
  16  if (collections1 > threshold2) {
  17   generation = 2;
  18   gc_list_merge(&_PyGC_generation0, &generation2);
  19   gc_list_merge(&generation1, &generation2);
  20   if (generation2.gc.gc_next != &generation2) {
  21    n = collect(&generation2, &generation2);
  22   }
  23   collections1 = 0;
  24  }
  25  else if (collections0 > threshold1) {
  26   generation = 1;
  27   collections1++;
  28   gc_list_merge(&_PyGC_generation0, &generation1);
  29   if (generation1.gc.gc_next != &generation1) {
  30    n = collect(&generation1, &generation2);
  31   }
  32   collections0 = 0;
  33  }
  34  else {
  35   generation = 0;
  36   collections0++;
  37   if (_PyGC_generation0.gc.gc_next != &_PyGC_generation0) {
  38    n = collect(&_PyGC_generation0, &generation1);
  39   }
  40  }
  41  return n;
  42 }

再往上走就是_PyObject_GC_Malloc了,这里大家可以看到垃圾回收是什么条件下触发的。

   1 PyObject *
   2 _PyObject_GC_Malloc(PyTypeObject *tp, int nitems)
   3 {
   4  PyObject *op;
   5  const size_t basicsize = _PyObject_VAR_SIZE(tp, nitems);
   6 #ifdef WITH_CYCLE_GC
   7  const size_t nbytes = sizeof(PyGC_Head) + basicsize;
   8  PyGC_Head *g = PyObject_MALLOC(nbytes);
   9  if (g == NULL)
  10   return (PyObject *)PyErr_NoMemory();
  11  g->gc.gc_next = NULL;
  12  allocated++;
  13  /*
  14   *allocated是个全局变量,记录从上次回收以来
  15   *分配的对象的数目,而 threshold0=700
  16   */
  17   if (allocated > threshold0 &&
  18       enabled &&
  19       threshold0 &&
  20       !collecting &&
  21       !PyErr_Occurred()) {
  22   collecting = 1;
  23    collect_generations();
  24   collecting = 0;
  25  }
  26  op = FROM_GC(g);
  27 #else
  28  op = PyObject_MALLOC(basicsize);
  29  if (op == NULL)
  30   return (PyObject *)PyErr_NoMemory();
  31 
  32 #endif
  33  return op;
  34 }

如果还要往上走,就是

   1 PyObject *
   2 _PyObject_GC_New(PyTypeObject *tp)
   3 {
   4  PyObject *op = _PyObject_GC_Malloc(tp, 0);
   5  if (op != NULL)
   6   op = PyObject_INIT(op, tp);
   7  return op;
   8 }

了,分配内存然后初始化,所有的对象模型都是一个样。

当然内部还有许多细节了,比如确定一个对象是否可达,处理引用循环,处理finalizers (想了半天还是不知道该怎么翻译这个词) 等等。大家自己用leo看吧,保证越看越爽,哈哈。

python_ref_circle_gc (last edited 2009-12-25 07:12:49 by localhost)