含有章节索引的 *PUG 文章通用模板 ::-- ehu4ever [2005-08-21 01:37:08]
1. 关于Python字符编码的一些经验
这几天为unicode、cp936、gb18030这些个东东搞得身心憔悴,也总算是有了些经验。今天小结一下。
1.1. 处理windows中的unicode文件名
这里所谓的unicode文件名是指文件中包含了一些不在中文windows默认字符集gb18030之内的字符,比如©它占用了16个bit。
- 事情的起因是我想提取一个unicode文件名,在pygtk+的GUI是显示。因为pygtk的GUI上只能显示unicode,我最初的解决方法是在model层存储gb18030字符集的文件名,在view层显示的时候转换为unicode。可是这个copyright符号总是显示有问题。
一番研究之后发现,model层里的©已经变成了\xa9少了前一半,看来是取文件名的时候出错。
对于unicode、cp936、gb18030这些理论我不是很懂,但是我知道我的目标是取文件名的时候完整地取出\xc2\xa9,也就是完整地取出一个unicode字符,这样在GUI上显示的时候就不会有问题了,而且也不需要别处的转换。
- 下面是两个示例程序:
1.1.1. 有关情况说明
>>> import os,sys >>> sys.stdout.encoding 'cp936' >>> sys.stdin.encoding 'cp936' >>> dir = 'e:\\PythonSpace\\copyright' >>> os.listdir(dir) ['John Wiley & Sons ? 2004 - John Wiley And Sons Professional Jakarta Struts.txt', 'winunichanges', 'winunichanges.zip'] >>> >>> dir = u'e:\\PythonSpace\\copyright' >>> print os.listdir(dir) [u'John Wiley & Sons \xa9 2004 - John Wiley And Sons Professional Jakarta Struts.txt', u'winunichan ges', u'winunichanges.zip'] >>> os.listdir(dir) [u'John Wiley & Sons \xa9 2004 - John Wiley And Sons Professional Jakarta Struts.txt', u'winunichan ges', u'winunichanges.zip'] >>> print [f.encode('utf8') for f in os.listdir(dir)] ['John Wiley & Sons \xc2\xa9 2004 - John Wiley And Sons Professional Jakarta Struts.txt', 'winunich anges', 'winunichanges.zip'] >>> print '\n'.join([f.encode('utf8') for f in os.listdir(dir)]) John Wiley & Sons 漏 2004 - John Wiley And Sons Professional Jakarta Struts.txt winunichanges winunichanges.zip >>>
1.1.2. 示例1
Toggle line numbers
1 import pygtk
2 pygtk.require('2.0')
3 import gtk, gobject
4
5
6 tasks = {
7 "Buy groceries": "Go to Asda after work",
8 "Do some programming": "Remember to update your software",
9 "Power up systems": "Turn on the client but leave the server",
10 "Watch some tv": "Remember to catch ER",
11 "copyright": "\xc2\xa9"
12 }
13
14
15 class GUI_Controller:
16 def __init__(self):
17
18 self.root = gtk.Window()
19 self.root.set_title('CellRenderder Example')
20 self.root.connect('destroy', lambda w:gtk.main_quit())
21
22 self.mdl = Store.get_model()
23 self.view = Display.make_view(self.mdl)
24
25 self.root.add(self.view)
26 self.root.show_all()
27
28 return
29
30 def run(self):
31 gtk.main()
32 return
33
34 class InfoModel:
35 def __init__(self):
36 self.tree_store = gtk.TreeStore(gobject.TYPE_STRING, gobject.TYPE_BOOLEAN)
37
38 for item in tasks.keys():
39 parent = self.tree_store.append(None, (item, None))
40 self.tree_store.append(parent, (tasks[item], None))
41 return
42
43 def get_model(self):
44 if self.tree_store:
45 return self.tree_store
46 else:
47 return None
48
49
50 class DisplayModel:
51 def make_view(self, model):
52 self.view = gtk.TreeView(model)
53 self.renderer = gtk.CellRendererText()
54 self.renderer.set_property('editable', True)
55 self.renderer.connect('edited', self.col0_edited_cb, model)
56
57 self.renderer1 = gtk.CellRendererToggle()
58 self.renderer1.set_property('activatable', True)
59 self.renderer1.connect('toggled', self.col1_toggled_cb, model)
60
61 self.col0 = gtk.TreeViewColumn('Name', self.renderer, text=0)
62
63 self.col1 = gtk.TreeViewColumn('Complete', self.renderer1)
64 self.col1.add_attribute(self.renderer1, 'active', 1)
65
66 self.view.append_column(self.col0)
67 self.view.append_column(self.col1)
68
69 return self.view
70
71 def col0_edited_cb(self, cell, path, new_text, model):
72 print "change '%s' to '%s'" % (model[path][0], new_text.encode('cp936'))
73 model[path][0] = new_text
74 return
75
76 def col1_toggled_cb(self, cell, path, model):
77 model[path][1] = not model[path][1]
78 print "toggle '%s' to: '%s'" % (model[path][0], model[path][1])
79 return
80
81
82
83 if __name__ == '__main__':
84 Store = InfoModel()
85 Display = DisplayModel()
86 myGUI = GUI_Controller()
87 myGUI.run()
1.1.3. 示例2
Toggle line numbers
1 import os, stat, time, sys
2 import pygtk
3 pygtk.require('2.0')
4 import gtk
5
6 folderxpm = [
7 "17 16 7 1",
8 " c #000000",
9 ". c #808000",
10 "X c yellow",
11 "o c #808080",
12 "O c #c0c0c0",
13 "+ c white",
14 "@ c None",
15 "@@@@@@@@@@@@@@@@@",
16 "@@@@@@@@@@@@@@@@@",
17 "@@+XXXX.@@@@@@@@@",
18 "@+OOOOOO.@@@@@@@@",
19 "@+OXOXOXOXOXOXO. ",
20 "@+XOXOXOXOXOXOX. ",
21 "@+OXOXOXOXOXOXO. ",
22 "@+XOXOXOXOXOXOX. ",
23 "@+OXOXOXOXOXOXO. ",
24 "@+XOXOXOXOXOXOX. ",
25 "@+OXOXOXOXOXOXO. ",
26 "@+XOXOXOXOXOXOX. ",
27 "@+OOOOOOOOOOOOO. ",
28 "@ ",
29 "@@@@@@@@@@@@@@@@@",
30 "@@@@@@@@@@@@@@@@@"
31 ]
32 folderpb = gtk.gdk.pixbuf_new_from_xpm_data(folderxpm)
33
34 filexpm = [
35 "12 12 3 1",
36 " c #000000",
37 ". c #ffff04",
38 "X c #b2c0dc",
39 "X XXX",
40 "X ...... XXX",
41 "X ...... X",
42 "X . ... X",
43 "X ........ X",
44 "X . .... X",
45 "X ........ X",
46 "X . .. X",
47 "X ........ X",
48 "X . .. X",
49 "X ........ X",
50 "X X"
51 ]
52 filepb = gtk.gdk.pixbuf_new_from_xpm_data(filexpm)
53
54
55
56 class FileListingCellDataExample:
57 column_names = ['Name', 'Size', 'Mode', 'Last Changed']
58
59 def __init__(self, dname=None):
60 cell_data_funcs = (None, self.file_size, self.file_mode, self.file_last_changed)
61
62 window = gtk.Window()
63 window.set_size_request(400, 300)
64 window.connect('destroy', lambda w: gtk.main_quit())
65
66 self.window = window
67
68 listmodel = self.make_list(dname)
69
70 self.treeview = gtk.TreeView()
71
72 self.tvclm = [None]*len(self.column_names)
73 cellpb = gtk.CellRendererPixbuf()
74 self.tvclm[0] = gtk.TreeViewColumn(self.column_names[0], cellpb)
75 self.tvclm[0].set_cell_data_func(cellpb, self.file_pixbuf)
76 cell = gtk.CellRendererText()
77 self.tvclm[0].pack_start(cell, False)
78 self.tvclm[0].set_cell_data_func(cell, self.file_name)
79 self.treeview.append_column(self.tvclm[0])
80 for n in range(1, len(self.column_names)):
81 cell = gtk.CellRendererText()
82 self.tvclm[n] = gtk.TreeViewColumn(self.column_names[n], cell)
83 if n == 1:
84 cell.set_property('xalign', 1.0)
85 self.tvclm[n].set_cell_data_func(cell, cell_data_funcs[n])
86 self.treeview.append_column(self.tvclm[n])
87
88 self.treeview.connect('row-activated', self.open_file)
89 self.scrolledwindow = gtk.ScrolledWindow()
90 self.scrolledwindow.add(self.treeview)
91 self.window.add(self.scrolledwindow)
92 self.treeview.set_model(listmodel)
93
94 self.window.show_all()
95 return
96
97
98
99
100
101 def make_list(self, dname=None):
102 if not dname:
103 # self.dirname = os.path.expanduser('~')
104 self.dirname = u'e:\\'
105 else:
106 self.dirname = os.path.abspath(dname)
107
108 self.window.set_title(self.dirname)
109
110 files = [f for f in os.listdir(self.dirname) if f[0] <> '.']
111 files.sort()
112 files= ['..'] + files
113 listmodel = gtk.ListStore(object)
114 for f in files:
115 listmodel.append([f])
116 return listmodel
117
118 def open_file(self, treeview, path, column):
119 model = treeview.get_model()
120 iter = model.get_iter(path)
121
122 filename = os.path.join(self.dirname, model.get_value(iter, 0))
123
124 filestat = os.stat(filename)
125 if stat.S_ISDIR(filestat.st_mode):
126 new_model = self.make_list(filename)
127 treeview.set_model(new_model)
128 return
129
130 def file_pixbuf(self, column, cell, model, iter):
131 filename = os.path.join(self.dirname, model.get_value(iter, 0))
132 filestat = os.stat(filename)
133 if stat.S_ISDIR(filestat.st_mode):
134 pb = folderpb
135 else:
136 pb = filepb
137
138 cell.set_property('pixbuf', pb)
139 return
140
141 def file_name(self,column, cell, model, iter):
142 # cell.set_property('text', unicode(model.get_value(iter, 0), 'gb18030'))
143 cell.set_property('text', model.get_value(iter, 0))
144 return
145
146 def file_size(self, column, cell, model, iter):
147 filename = os.path.join(self.dirname, model.get_value(iter, 0))
148 filestat = os.stat(filename)
149 cell.set_property('text', filestat.st_size)
150 return
151
152 def file_mode(self, column, cell, model, iter):
153 filename = os.path.join(self.dirname, model.get_value(iter, 0))
154 filestat = os.stat(filename)
155 cell.set_property('text', oct(stat.S_IMODE(filestat.st_mode)))
156 return
157
158 def file_last_changed(self, column, cell, model, iter):
159 filename = os.path.join(self.dirname, model.get_value(iter, 0))
160 filestat = os.stat(filename)
161 cell.set_property('text', time.ctime(filestat.st_mtime))
162 return
163
164 if __name__ == '__main__':
165 FileListingCellDataExample()
166 gtk.main()
1.2. 希望有人用理论解释这些
我这里只是提出了一种方法,希望有朋友用理论来解释这一切。这样就可以知其然而知其所以然了。
- 我个人理解版权符号并不是gb码的一个,但它是unicode的一个符号。也就是说unicode是一个全集,有时我们看到的以为是汉字符号,但其实是不存在对应的汉字的,因此这种情况下转为汉字编码就不会成功。因此最好的方法就是使用utf-8。关于是不是有可以查汉字编码表的区间,如果不在这个区间内自然就不是汉字符号。我没有查过,但猜想是这个问题。 -- Limodou