::-- ZoomQuiet [2007-11-10 07:39:01]

CPUG联盟::

CPUG::门户plone

BPUG

SPUG

ZPUG

SpreadPython Python宣传

1. zshelve 对象持久模块

{{{Jiahua Huang <[email protected]> reply-to [email protected], to "python. cn" <[email protected]>, date Nov 8, 2007 5:41 PM subject [CPyUG:34726] 贴个 zlib 压缩的 zshelve 对象持久模块 }}} 这个给 Python 标准库的 shelve.py 添加了 zlib 压缩, 减小数据库文件体积,以改善磁盘 io 性能

1.1. 发布

http://zshelve.googlecode.com/svn/trunk/

加了个命令行工具:

huahua@huahua:tmp$ zshelve
commandline tool for zshelve databases

Usage: zshelve  FILE  dump                    Dump the data tree
      zshelve  FILE  keys                    List of keys
      zshelve  FILE  get          KEY        Dump value for key
      zshelve  FILE  set          KEY VALUE  Set db[key] = value
      zshelve  FILE  has_key      KEY        True if database has the key
      zshelve  FILE  search_key   KEY        Search key
      zshelve  FILE  search_value VALUE      Search value

huahua@huahua:tmp$ zshelve set tes.db a 1
huahua@huahua:tmp$ zshelve dump tes.db
   |- a
   |    | - 1
huahua@huahua:tmp$ zshelve set tes.db b "dict(a=1,b=2,c=3,d={'s':'4'})"
huahua@huahua:tmp$ zshelve dump tes.db
   |- a
   |    |- 1
   |- b
   |    |- a
   |    |    |- 1
   |    |- c
   |    |    |- 3
   |    |- b
   |    |    |- 2
   |    |- d
   |    |    |- s
   |    |    |    |- 4

对比::

>>> import zshelve
>>> import shelve
>>> zdb = zshelve.open('/tmp/zshelve.db')
>>> db  = shelve.open('/tmp/shelve.db')
>>> zdb['1'] = dict(a='0123456789'*10000000)
>>> db['1']  = dict(a='0123456789'*10000000)
>>> zdb.sync()
>>> db.sync()

看看文件大小差异::

huahua@huahua:zshelve$ ll /tmp/*shelve.db
-rw-r--r-- 1 huahua huahua  96M 2007-11-08 17:36 /tmp/shelve.db
-rw-r--r-- 1 huahua huahua 204K 2007-11-08 17:36 /tmp/zshelve.db

1.2. 补丁::

--- shelve.py   2007-05-03 00:56:36.000000000 +0800
+++ zshelve.py  2007-11-08 17:25:59.000000000 +0800
@@ -70,6 +70,7 @@ except ImportError:

 import UserDict
 import warnings
+import zlib        ## use zlib to compress dbfile

 __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]

@@ -80,13 +81,14 @@ class Shelf(UserDict.DictMixin):
    See the module's __doc__ string for an overview of the interface.
    """

-    def __init__(self, dict, protocol=None, writeback=False):
+    def __init__(self, dict, protocol=None, writeback=False, compresslevel=2):
        self.dict = dict
        if protocol is None:
             protocol = 0
        self._protocol = protocol
        self.writeback = writeback
        self.cache = {}
+        self.compresslevel = compresslevel

    def keys(self):
        return self.dict.keys()
@@ -109,7 +111,7 @@ class Shelf(UserDict.DictMixin):
        try:
            value = self.cache[key]
        except KeyError:
-            f = StringIO(self.dict[key])
+            f = StringIO(zlib.decompress(self.dict[key]))
            value = Unpickler(f).load()
            if self.writeback:
                self.cache[key] = value
@@ -121,7 +123,7 @@ class Shelf(UserDict.DictMixin):
        f = StringIO()
        p = Pickler(f, self._protocol)
        p.dump(value)
-        self.dict[key] = f.getvalue()
+        self.dict[key] = zlib.compress(f.getvalue(), self.compresslevel)

    def __delitem__(self, key):
        del self.dict[key]
@@ -168,32 +170,32 @@ class BsdDbShelf(Shelf):
    See the module's __doc__ string for an overview of the interface.
    """

-    def __init__(self, dict, protocol=None, writeback=False):
-        Shelf.__init__(self, dict, protocol, writeback)
+    def __init__(self, dict, protocol=None, writeback=False, compresslevel=2):
+        Shelf.__init__(self, dict, protocol, writeback, compresslevel)

    def set_location(self, key):
        (key, value) = self.dict.set_location(key)
-        f = StringIO(value)
+        f = StringIO(zlib.decompress(value))
        return (key, Unpickler(f).load())

    def next(self):
        (key, value) = self.dict.next()
-        f = StringIO(value)
+        f = StringIO(zlib.decompress(value))
        return (key, Unpickler(f).load())

    def previous(self):
        (key, value) = self.dict.previous()
-        f = StringIO(value)
+        f = StringIO(zlib.decompress(value))
        return (key, Unpickler(f).load())

    def first(self):
        (key, value) = self.dict.first()
-        f = StringIO(value)
+        f = StringIO(zlib.decompress(value))
        return (key, Unpickler(f).load())

    def last(self):
        (key, value) = self.dict.last()
-        f = StringIO(value)
+        f = StringIO(zlib.decompress(value))
        return (key, Unpickler(f).load())


@@ -204,12 +206,12 @@ class DbfilenameShelf(Shelf):
    See the module's __doc__ string for an overview of the interface.
    """

-    def __init__(self, filename, flag='c', protocol=None, writeback=False):
+    def __init__(self, filename, flag='c', protocol=None,
writeback=False, compresslevel=2):
        import anydbm
-        Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
+        Shelf.__init__(self, anydbm.open(filename, flag), protocol,
writeback, compresslevel)


-def open(filename, flag='c', protocol=None, writeback=False):
+def open(filename, flag='c', protocol=None, writeback=False, compresslevel=2):
    """Open a persistent dictionary for reading and writing.

    The filename parameter is the base filename for the underlying
@@ -222,4 +224,4 @@ def open(filename, flag='c', protocol=No
    See the module's __doc__ string for an overview of the interface.
    """

-    return DbfilenameShelf(filename, flag, protocol, writeback)
+    return DbfilenameShelf(filename, flag, protocol, writeback, compresslevel)