Attachment 'html2moin.py'
Download 1 #coding=cp936
2 #---------------------------------------------------
3 # NewEdit script
4 # Author :limodou
5 # Date :2005/07/16
6 # Version :0.1
7 # Description:
8 # Convert Html to Moin
9 #
10 #---------------------------------------------------
11 from sgmllib import SGMLParser
12 import htmlentitydefs
13 import tidy
14 import StringIO
15 import re
16
17 re_div = re.compile('</?div.*?>')
18 re_spe = re.compile('(__\w+__)')
19
20 class Converter(SGMLParser):
21 def reset(self):
22 SGMLParser.reset(self)
23 self.text=[]
24 self.ul = 0
25 self.ol = 0
26 self.pre = 0
27 self.a_flag = False
28
29 def start_a(self, attrs):
30 for attr, value in attrs:
31 if attr.lower() == "href":
32 if value and value[0] != '#':
33 self.text.append('[' + value + ' ')
34 self.a_flag = True
35
36 def end_a(self):
37 if self.a_flag:
38 self.text.append(']')
39 self.a_flag = False
40
41 def start_h1(self, attrs):
42 self.text.append('= ')
43
44 def end_h1(self):
45 self.text.append(' =')
46
47 def start_h2(self, attrs):
48 self.text.append('== ')
49
50 def end_h2(self):
51 self.text.append(' ==')
52
53 def start_h3(self, attrs):
54 self.text.append('=== ')
55
56 def end_h3(self):
57 self.text.append(' ===')
58
59 def start_h4(self, attrs):
60 self.text.append('==== ')
61
62 def end_h4(self):
63 self.text.append(' ====')
64
65 def start_h5(self, attrs):
66 self.text.append('===== ')
67
68 def end_h5(self):
69 self.text.append(' =====')
70
71 def start_pre(self, attrs):
72 self.text.append('{{{#!python\n')
73 self.pre = 1
74
75 def end_pre(self):
76 self.text.append('}}}')
77 self.pre = 0
78
79 def start_img(self, attrs):
80 for attr, value in attrs:
81 if attr.lower() == "src":
82 self.text.append(value)
83
84 # def end_img(self):
85 # self.text.append("'''")
86 #
87 def handle_data(self, text):
88 if not self.pre:
89 text = re_spe.sub('`\g<1>`', text)
90 self.text.append(text)
91
92 def output(self):
93 def c(t):
94 if not isinstance(t, unicode):
95 return unicode(t)
96 else:
97 return t
98 return ''.join([c(x) for x in self.text])
99
100 def handle_entityref(self, ref):
101 if ref == 'nbsp':
102 self.text.append(' ')
103 else:
104 self.text.append(htmlentitydefs.entitydefs[ref])
105
106 def start_ul(self, attrs):
107 self.ul = 1
108
109 def start_li(self, attrs):
110 if self.ul:
111 self.text.append(" * ")
112 elif self.ol:
113 self.text.append(" 1. ")
114
115 def end_ul(self):
116 self.ul = 0
117
118 def start_ol(self, attrs):
119 self.ol = 1
120
121 def end_ol(self):
122 self.ol = 0
123
124 def start_strong(self, attrs):
125 self.text.append("'''")
126
127 def end_strong(self):
128 self.text.append("'''")
129
130 def start_em(self, attrs):
131 self.text.append("'''")
132
133 def end_em(self):
134 self.text.append("'''")
135
136 def do_hr(self, attrs):
137 self.text.append("----\n")
138
139 def parse_file(self, filename):
140 text=file(filename).read()
141 self.parse_string(text)
142
143 def parse_string(self, text):
144 self.feed(text)
145 self.close()
146
147 def convert(text):
148 unicodeflag = False
149 if isinstance(text, unicode):
150 text = text.encode('utf-8')
151 unicodeflag = True
152 text = re_div.sub('', text)
153 options = dict(output_xhtml=1,add_xml_decl=0, indent='auto', tidy_mark=0,
154 wrap=0,drop_empty_paras=1,logical_emphasis=1,lower_literals=1,
155 show_body_only=1,char_encoding='utf8')
156 dom = tidy.parseString(text, **options)
157 buf = StringIO.StringIO()
158 dom.write(buf)
159 text = buf.getvalue()
160 if unicodeflag:
161 text = text.decode('utf-8')
162 con = Converter()
163 con.parse_string(text)
164 text = con.output()
165 return text
166
167 def run(win):
168 text = win.document.GetText()
169 text = convert(text)
170 win.createMessageWindow()
171 win.panel.showPage(tr('Message'))
172 win.messagewindow.SetText(text)
173
174 run(win)
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.