Toggle line numbers
1 #!/usr/bin/env python
2 """
3 12_to_13.py - migration from < moin--main--patch-248 to >= patch 249
4 * convert event-log from iso8859-1 to config.charset (utf-8) encoding
5
6 Steps for a successful migration to utf-8:
7 1. stop your wiki and make a backup
8 2. make a copy of the wiki's "data" directory to your working dir
9 3. clean up your working copy of the data dir:
10 a. if you use CVS or GNU arch remove stuff like CVS/, .cvsignore
11 or .arch-ids/ etc.
12 b. remove *.pickle (used by moin for caching some information,
13 will be re-created automatically), especially:
14 I. data/user/userdict.pickle
15 II. data/dicts.pickle
16 c. if you used symlinks in data/text or elsewhere, remove them
17 4. make sure that from_encoding and to_encoding matches your needs (see
18 beginning of script below and config.charset in moin_config.py) and
19 run python2.3 12_to_13_mig6.py from your working dir
20 5. if there was no error, you will find:
21 data.pre-mig6 (the script renames your data directory copy to that name)
22 data (result, converted to utf-8)
23 6. verify conversion results (number of pages, size of logs, attachments,
24 number of backup copies) - everything should be reasonable before
25 you proceed. Usually the file size gets larger when converting from
26 iso8859-1 (or other non-unicode charset) to utf-8 except if your
27 content is ASCII-only, then it will keep its size.
28 7. copy additional files from data.pre-mig6 to data (maybe intermaps, logs,
29 etc.). Be aware that the file contents AND file names of wiki content
30 may have changed, so DO NOT copy the cache/ directory, but let
31 the wiki recreate it.
32 8. replace the data directory your wiki uses with the data directory
33 you created by previous steps. DO NOT simply copy the converted stuff
34 into the original or you will duplicate pages and create chaos!
35 9. test it. if something has gone wrong, you still have your backup.
36
37
38 10. if you use dictionaries for spellchecking, you have to convert them
39 to config.charset, too. Remove your dict.cache before re-starting
40 your wiki.
41
42 @copyright: 2004 Thomas Waldmann
43 @license: GPL, see COPYING for details
44 """
45
46 #from_encoding = 'iso8859-1'
47 from_encoding = 'utf-8'
48 to_encoding = 'utf-8'
49
50 import os.path, sys, shutil, urllib
51
52 sys.path.insert(0, '../../..')
53 from MoinMoin import wikiutil
54
55 from migutil import opj, listdir, copy_file, copy_dir
56
57 errorcount = 0
58
59 def convert_string(str, enc_from, enc_to):
60 #print str
61 global errorcount
62 try:
63 return str.decode(enc_from).encode(enc_to)
64 except:
65 #print sys.exc_info()
66 errorcount +=1
67 return "ERROR"
68 #return str.decode(enc_from).encode(enc_to)
69
70 def convert_eventlog(fname_from, fname_to, enc_from, enc_to):
71 print "%s -> %s" % (fname_from, fname_to)
72 file_from = open(fname_from)
73 file_to = open(fname_to, "w")
74
75 for line in file_from:
76 line = line.replace('\r','')
77 line = line.replace('\n','')
78 fields = line.split('\t')
79 kvpairs = fields[2]
80 kvpairs = kvpairs.split('&')
81 kvlist = []
82 for kvpair in kvpairs:
83 key, val = kvpair.split('=')
84 key = urllib.unquote(key)
85 val = urllib.unquote(val)
86 key = convert_string(key, enc_from, enc_to)
87 val = convert_string(val, enc_from, enc_to)
88 if("ERROR"==key or "ERROR"==val):
89 # setp by setp find error log line..
90 # not log in new format log file..
91 pass
92 else:
93 key = urllib.quote(key)
94 val = urllib.quote(val)
95 kvlist.append("%s=%s" % (key,val))
96 #key = urllib.quote(key)
97 #val = urllib.quote(val)
98 #kvlist.append("%s=%s" % (key,val))
99 fields[2] = '&'.join(kvlist)
100 line = '\t'.join(fields) + '\n'
101 file_to.write(line)
102
103 file_to.close()
104 file_from.close()
105 st=os.stat(fname_from)
106 os.utime(fname_to, (st.st_atime,st.st_mtime))
107
108
109 origdir = 'data.pre-mig6'
110
111 try:
112 os.rename('data', origdir)
113 pass
114 except OSError:
115 print "You need to be in the directory where your copy of the 'data' directory is located."
116 sys.exit(1)
117
118 copy_dir(origdir, 'data')
119 os.remove(opj('data','event-log')) # old format
120 convert_eventlog(opj(origdir, 'event-log'), opj('data', 'event-log'), from_encoding, to_encoding)
121 #Zoomq::050205 add for jump out Unicode error skip log that bad line
122 print "mig6 finished but maybe there is %d lines log not transition for some unicode ERROR!!"%errorcount