::-- ZoomQuiet [2007-11-10 07:39:01]
1. zshelve 对象持久模块
{{{Jiahua Huang <[email protected]> reply-to [email protected], to "python. cn" <[email protected]>, date Nov 8, 2007 5:41 PM subject [CPyUG:34726] 贴个 zlib 压缩的 zshelve 对象持久模块 }}} 这个给 Python 标准库的 shelve.py 添加了 zlib 压缩, 减小数据库文件体积,以改善磁盘 io 性能
1.1. 发布
http://zshelve.googlecode.com/svn/trunk/
加了个命令行工具:
huahua@huahua:tmp$ zshelve commandline tool for zshelve databases Usage: zshelve FILE dump Dump the data tree zshelve FILE keys List of keys zshelve FILE get KEY Dump value for key zshelve FILE set KEY VALUE Set db[key] = value zshelve FILE has_key KEY True if database has the key zshelve FILE search_key KEY Search key zshelve FILE search_value VALUE Search value huahua@huahua:tmp$ zshelve set tes.db a 1 huahua@huahua:tmp$ zshelve dump tes.db |- a | | - 1 huahua@huahua:tmp$ zshelve set tes.db b "dict(a=1,b=2,c=3,d={'s':'4'})" huahua@huahua:tmp$ zshelve dump tes.db |- a | |- 1 |- b | |- a | | |- 1 | |- c | | |- 3 | |- b | | |- 2 | |- d | | |- s | | | |- 4
对比::
>>> import zshelve >>> import shelve >>> zdb = zshelve.open('/tmp/zshelve.db') >>> db = shelve.open('/tmp/shelve.db') >>> zdb['1'] = dict(a='0123456789'*10000000) >>> db['1'] = dict(a='0123456789'*10000000) >>> zdb.sync() >>> db.sync()
看看文件大小差异::
huahua@huahua:zshelve$ ll /tmp/*shelve.db -rw-r--r-- 1 huahua huahua 96M 2007-11-08 17:36 /tmp/shelve.db -rw-r--r-- 1 huahua huahua 204K 2007-11-08 17:36 /tmp/zshelve.db
1.2. 补丁::
--- shelve.py 2007-05-03 00:56:36.000000000 +0800 +++ zshelve.py 2007-11-08 17:25:59.000000000 +0800 @@ -70,6 +70,7 @@ except ImportError: import UserDict import warnings +import zlib ## use zlib to compress dbfile __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"] @@ -80,13 +81,14 @@ class Shelf(UserDict.DictMixin): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict, protocol=None, writeback=False): + def __init__(self, dict, protocol=None, writeback=False, compresslevel=2): self.dict = dict if protocol is None: protocol = 0 self._protocol = protocol self.writeback = writeback self.cache = {} + self.compresslevel = compresslevel def keys(self): return self.dict.keys() @@ -109,7 +111,7 @@ class Shelf(UserDict.DictMixin): try: value = self.cache[key] except KeyError: - f = StringIO(self.dict[key]) + f = StringIO(zlib.decompress(self.dict[key])) value = Unpickler(f).load() if self.writeback: self.cache[key] = value @@ -121,7 +123,7 @@ class Shelf(UserDict.DictMixin): f = StringIO() p = Pickler(f, self._protocol) p.dump(value) - self.dict[key] = f.getvalue() + self.dict[key] = zlib.compress(f.getvalue(), self.compresslevel) def __delitem__(self, key): del self.dict[key] @@ -168,32 +170,32 @@ class BsdDbShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, dict, protocol=None, writeback=False): - Shelf.__init__(self, dict, protocol, writeback) + def __init__(self, dict, protocol=None, writeback=False, compresslevel=2): + Shelf.__init__(self, dict, protocol, writeback, compresslevel) def set_location(self, key): (key, value) = self.dict.set_location(key) - f = StringIO(value) + f = StringIO(zlib.decompress(value)) return (key, Unpickler(f).load()) def next(self): (key, value) = self.dict.next() - f = StringIO(value) + f = StringIO(zlib.decompress(value)) return (key, Unpickler(f).load()) def previous(self): (key, value) = self.dict.previous() - f = StringIO(value) + f = StringIO(zlib.decompress(value)) return (key, Unpickler(f).load()) def first(self): (key, value) = self.dict.first() - f = StringIO(value) + f = StringIO(zlib.decompress(value)) return (key, Unpickler(f).load()) def last(self): (key, value) = self.dict.last() - f = StringIO(value) + f = StringIO(zlib.decompress(value)) return (key, Unpickler(f).load()) @@ -204,12 +206,12 @@ class DbfilenameShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, filename, flag='c', protocol=None, writeback=False): + def __init__(self, filename, flag='c', protocol=None, writeback=False, compresslevel=2): import anydbm - Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback) + Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, compresslevel) -def open(filename, flag='c', protocol=None, writeback=False): +def open(filename, flag='c', protocol=None, writeback=False, compresslevel=2): """Open a persistent dictionary for reading and writing. The filename parameter is the base filename for the underlying @@ -222,4 +224,4 @@ def open(filename, flag='c', protocol=No See the module's __doc__ string for an overview of the interface. """ - return DbfilenameShelf(filename, flag, protocol, writeback) + return DbfilenameShelf(filename, flag, protocol, writeback, compresslevel)