Attachment 'html2moin.py'

Download

   1 #coding=cp936
   2 #---------------------------------------------------
   3 #                   NewEdit script
   4 # Author  :limodou
   5 # Date    :2005/07/16
   6 # Version :0.1
   7 # Description:
   8 #     Convert Html to Moin
   9 #
  10 #---------------------------------------------------
  11 from sgmllib import SGMLParser
  12 import htmlentitydefs
  13 import tidy
  14 import StringIO
  15 import re
  16 
  17 re_div = re.compile('</?div.*?>')
  18 re_spe = re.compile('(__\w+__)')
  19 
  20 class Converter(SGMLParser):
  21     def reset(self):
  22         SGMLParser.reset(self)
  23         self.text=[]
  24         self.ul = 0
  25         self.ol = 0
  26         self.pre = 0
  27         self.a_flag = False
  28 
  29     def start_a(self, attrs):
  30         for attr, value in attrs:
  31             if attr.lower() == "href":
  32                 if value and value[0] != '#':
  33                     self.text.append('[' + value + ' ')
  34                     self.a_flag = True
  35     
  36     def end_a(self):
  37         if self.a_flag:
  38             self.text.append(']')
  39         self.a_flag = False
  40         
  41     def start_h1(self, attrs):
  42         self.text.append('= ')
  43         
  44     def end_h1(self):
  45         self.text.append(' =')
  46         
  47     def start_h2(self, attrs):
  48         self.text.append('== ')
  49         
  50     def end_h2(self):
  51         self.text.append(' ==')
  52         
  53     def start_h3(self, attrs):
  54         self.text.append('=== ')
  55         
  56     def end_h3(self):
  57         self.text.append(' ===')
  58         
  59     def start_h4(self, attrs):
  60         self.text.append('==== ')
  61         
  62     def end_h4(self):
  63         self.text.append(' ====')
  64         
  65     def start_h5(self, attrs):
  66         self.text.append('===== ')
  67         
  68     def end_h5(self):
  69         self.text.append(' =====')
  70         
  71     def start_pre(self, attrs):
  72         self.text.append('{{{#!python\n')
  73         self.pre = 1
  74         
  75     def end_pre(self):
  76         self.text.append('}}}')
  77         self.pre = 0
  78         
  79     def start_img(self, attrs):
  80         for attr, value in attrs:
  81             if attr.lower() == "src":
  82                 self.text.append(value)
  83                 
  84 #    def end_img(self):
  85 #        self.text.append("'''")
  86 #
  87     def handle_data(self, text):
  88         if not self.pre:
  89             text = re_spe.sub('`\g<1>`', text)
  90         self.text.append(text)
  91 
  92     def output(self):
  93         def c(t):
  94             if not isinstance(t, unicode):
  95                 return unicode(t)
  96             else:
  97                 return t
  98         return ''.join([c(x) for x in self.text])
  99     
 100     def handle_entityref(self, ref):
 101         if ref == 'nbsp':
 102             self.text.append(' ')
 103         else:
 104             self.text.append(htmlentitydefs.entitydefs[ref])
 105         
 106     def start_ul(self, attrs):
 107         self.ul = 1
 108         
 109     def start_li(self, attrs):
 110         if self.ul:
 111             self.text.append(" * ")
 112         elif self.ol:
 113             self.text.append(" 1. ")
 114             
 115     def end_ul(self):
 116         self.ul = 0
 117         
 118     def start_ol(self, attrs):
 119         self.ol = 1
 120     
 121     def end_ol(self):
 122         self.ol = 0
 123         
 124     def start_strong(self, attrs):
 125         self.text.append("'''")
 126         
 127     def end_strong(self):
 128         self.text.append("'''")
 129 
 130     def start_em(self, attrs):
 131         self.text.append("'''")
 132         
 133     def end_em(self):
 134         self.text.append("'''")
 135         
 136     def do_hr(self, attrs):
 137         self.text.append("----\n")
 138         
 139     def parse_file(self, filename):
 140         text=file(filename).read()
 141         self.parse_string(text)
 142 
 143     def parse_string(self, text):
 144         self.feed(text)
 145         self.close()
 146 
 147 def convert(text):
 148     unicodeflag = False
 149     if isinstance(text, unicode):
 150         text = text.encode('utf-8')
 151         unicodeflag = True
 152     text = re_div.sub('', text)
 153     options = dict(output_xhtml=1,add_xml_decl=0, indent='auto', tidy_mark=0,
 154         wrap=0,drop_empty_paras=1,logical_emphasis=1,lower_literals=1,
 155         show_body_only=1,char_encoding='utf8')
 156     dom = tidy.parseString(text, **options)
 157     buf = StringIO.StringIO()
 158     dom.write(buf)
 159     text = buf.getvalue()
 160     if unicodeflag:
 161         text = text.decode('utf-8')
 162     con = Converter()
 163     con.parse_string(text)
 164     text = con.output()
 165     return text
 166             
 167 def run(win):
 168     text = win.document.GetText()
 169     text = convert(text)
 170     win.createMessageWindow()
 171     win.panel.showPage(tr('Message'))
 172     win.messagewindow.SetText(text)
 173 
 174 run(win)

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2021-05-11 08:52:36, 4.4 KB) [[attachment:html2moin.py]]
  • [get | view] (2021-05-11 08:52:36, 1.3 KB) [[attachment:makedot.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.