Xapian 初体验之 hello xapian

文件夹结构:

~/helloxapian

~/helloxapian/indexfiles.py

~/helloxapian/search.py

~/helloxapian/test

~/helloxapian/test/hello.txt

~/helloxapian/test/world.txt

~/helloxapian/test/abc.txt

hello.txt文件内容:

world.txt文件内容:

abc.txt文件内容:

indexfiles.py文件: #!/usr/bin/env python #coding=utf-8 import sys import xapian import string from os import listdir import re rex=re.compile('[a-zA-Z0-9]+') MAX_TERM_LENGTH = 64 DBPATH='indexdb' if len(sys.argv) < 2: print >> sys.stderr, "缺少参数,请提供需要建立索引的目录" sys.exit(1) try: database = xapian.WritableDatabase(DBPATH, xapian.DB_CREATE_OR_OPEN) stemmer = xapian.Stem("english") for file in listdir(sys.argv[1]): if file[-4:]=='.txt': filename=sys.argv[1] + '/' + file try: fr=open(filename,'r') content=fr.read() fr.close() content=string.strip(content) doc = xapian.Document() doc.set_data(content) doc.add_value(0,filename) doc.add_term(file[:-4]) pos = 0 terms=rex.findall(content) for term in terms: if len(term) > MAX_TERM_LENGTH: term=term[:MAX_TERM_LENGTH] doc.add_posting(stemmer(term.lower()),pos) pos += 1 database.add_document(doc) except: pass except Exception, e: print >> sys.stderr, "Exception: %s" % str(e) sys.exit(1)