##language:zh #pragma section-numbers off ##含有章节索引导航的 ZPyUG 文章通用模板 <> ## 默许导航,请保留 <> = 提取文档中的图象标识信息 = {{{ Shuguang Yang reply-to python-cn@googlegroups.com to python-cn`CPyUG`华蟒用户组 date Wed, Aug 20, 2008 at 15:50 subject [CPyUG:62863] 用python提取文档中的图象,图象的标识信息 }}} ##startInc 应用 Python 解决一些实际问题( * '''http://www.ibm.com/developerworks/cn/linux/tips/l-python/''' 的提取嵌入在文档中的图像部分,原文中的程序思路正确但代码不够简洁,不够pythonic。 改进后的代码如下: {{{ #!python import sys import os import string headers = [('JFIF', 6, 'jpg'), ('GIF', 0, 'gif'), ('PNG', 1, 'png')] #不同图片格式的标识信息 marker = [] filename = '/path/to/your/file' try: fid = open(filename, 'rb') except: sys.exit(1) s = 0 for line in fid: #按行迭代 for flag, offset, ext in headers: index = string.find(line, flag) if index > 0: pos = s + index - offset marker.append((pos, ext)) s += len(line) fid.seek(0) imgnum = 0 if len(marker) == 0: print 'No images included in this document' sys.exit(1) for info in marker: thispos = info[0] thisext = info[1] index = marker.index(info) try: nextinfo = marker[index + 1] nextpos = nextinfo[0] gap = nextpos - thispos except IndexError: nextpos = s gap = nextpos - thispos fid.seek(thispos) data = fid.read(gap) imgname = 'imgname%02d.%s' % (index, thisext) fid1 = open(imgname, 'wb') fid1.write(data) fid1.close() imgnum += 1 fid.close() print '%02d images have been extracted' % imgnum }}} ##endInc ---- '''反馈''' 创建 by -- ZoomQuiet [<>]