| OLD | NEW |
| (Empty) |
| 1 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
| 2 # See LICENSE for details. | |
| 3 | |
| 4 # | |
| 5 | |
| 6 from twisted.web import microdom, domhelpers | |
| 7 from twisted.python import text, procutils | |
| 8 import os, os.path, re, string | |
| 9 from cStringIO import StringIO | |
| 10 | |
| 11 import urlparse | |
| 12 | |
| 13 import tree | |
| 14 | |
| 15 escapingRE = re.compile(r'([\[\]#$%&_{}^~\\])') | |
| 16 lowerUpperRE = re.compile(r'([a-z])([A-Z])') | |
| 17 | |
| 18 def _escapeMatch(match): | |
| 19 c = match.group() | |
| 20 if c == '\\': | |
| 21 return '$\\backslash$' | |
| 22 elif c == '~': | |
| 23 return '\\~{}' | |
| 24 elif c == '^': | |
| 25 return '\\^{}' | |
| 26 elif c in '[]': | |
| 27 return '{'+c+'}' | |
| 28 else: | |
| 29 return '\\' + c | |
| 30 | |
| 31 def latexEscape(text): | |
| 32 text = escapingRE.sub(_escapeMatch, text) | |
| 33 return text.replace('\n', ' ') | |
| 34 | |
| 35 entities = {'amp': '\&', 'gt': '>', 'lt': '<', 'quot': '"', | |
| 36 'copy': '\\copyright', 'mdash': '---', 'rdquo': '``', | |
| 37 'ldquo': "''"} | |
| 38 | |
| 39 | |
| 40 def realpath(path): | |
| 41 # Normalise path | |
| 42 cwd = os.getcwd() | |
| 43 path = os.path.normpath(os.path.join(cwd, path)) | |
| 44 if path.startswith(cwd + '/'): | |
| 45 path = path[len(cwd)+1:] | |
| 46 return path.replace('\\', '/') # windows slashes make LaTeX blow up | |
| 47 | |
| 48 | |
| 49 def getLatexText(node, writer, filter=lambda x:x, entities=entities): | |
| 50 if hasattr(node, 'eref'): | |
| 51 return writer(entities.get(node.eref, '')) | |
| 52 if hasattr(node, 'data'): | |
| 53 return writer(filter(node.data)) | |
| 54 for child in node.childNodes: | |
| 55 getLatexText(child, writer, filter, entities) | |
| 56 | |
| 57 class BaseLatexSpitter: | |
| 58 | |
| 59 def __init__(self, writer, currDir='.', filename=''): | |
| 60 self.writer = writer | |
| 61 self.currDir = currDir | |
| 62 self.filename = filename | |
| 63 | |
| 64 def visitNode(self, node): | |
| 65 if isinstance(node, microdom.Comment): | |
| 66 return | |
| 67 if not hasattr(node, 'tagName'): | |
| 68 self.writeNodeData(node) | |
| 69 return | |
| 70 getattr(self, 'visitNode_'+node.tagName, self.visitNodeDefault)(node) | |
| 71 | |
| 72 def visitNodeDefault(self, node): | |
| 73 self.writer(getattr(self, 'start_'+node.tagName, '')) | |
| 74 for child in node.childNodes: | |
| 75 self.visitNode(child) | |
| 76 self.writer(getattr(self, 'end_'+node.tagName, '')) | |
| 77 | |
| 78 def visitNode_a(self, node): | |
| 79 if node.hasAttribute('class'): | |
| 80 if node.getAttribute('class').endswith('listing'): | |
| 81 return self.visitNode_a_listing(node) | |
| 82 if node.hasAttribute('href'): | |
| 83 return self.visitNode_a_href(node) | |
| 84 if node.hasAttribute('name'): | |
| 85 return self.visitNode_a_name(node) | |
| 86 self.visitNodeDefault(node) | |
| 87 | |
| 88 def visitNode_span(self, node): | |
| 89 if not node.hasAttribute('class'): | |
| 90 return self.visitNodeDefault(node) | |
| 91 node.tagName += '_'+node.getAttribute('class') | |
| 92 self.visitNode(node) | |
| 93 | |
| 94 visitNode_div = visitNode_span | |
| 95 | |
| 96 def visitNode_h1(self, node): | |
| 97 pass | |
| 98 | |
| 99 def visitNode_style(self, node): | |
| 100 pass | |
| 101 | |
| 102 | |
| 103 class LatexSpitter(BaseLatexSpitter): | |
| 104 | |
| 105 baseLevel = 0 | |
| 106 diaHack = bool(procutils.which("dia")) | |
| 107 | |
| 108 def writeNodeData(self, node): | |
| 109 buf = StringIO() | |
| 110 getLatexText(node, buf.write, latexEscape) | |
| 111 self.writer(buf.getvalue().replace('<', '$<$').replace('>', '$>$')) | |
| 112 | |
| 113 def visitNode_head(self, node): | |
| 114 authorNodes = domhelpers.findElementsWithAttribute(node, 'rel', 'author'
) | |
| 115 authorNodes = [n for n in authorNodes if n.tagName == 'link'] | |
| 116 | |
| 117 if authorNodes: | |
| 118 self.writer('\\author{') | |
| 119 authors = [] | |
| 120 for aNode in authorNodes: | |
| 121 name = aNode.getAttribute('title', '') | |
| 122 href = aNode.getAttribute('href', '') | |
| 123 if href.startswith('mailto:'): | |
| 124 href = href[7:] | |
| 125 if href: | |
| 126 if name: | |
| 127 name += ' ' | |
| 128 name += '$<$' + href + '$>$' | |
| 129 if name: | |
| 130 authors.append(name) | |
| 131 | |
| 132 self.writer(' \\and '.join(authors)) | |
| 133 self.writer('}') | |
| 134 | |
| 135 self.visitNodeDefault(node) | |
| 136 | |
| 137 def visitNode_pre(self, node): | |
| 138 self.writer('\\begin{verbatim}\n') | |
| 139 buf = StringIO() | |
| 140 getLatexText(node, buf.write) | |
| 141 self.writer(text.removeLeadingTrailingBlanks(buf.getvalue())) | |
| 142 self.writer('\\end{verbatim}\n') | |
| 143 | |
| 144 def visitNode_code(self, node): | |
| 145 fout = StringIO() | |
| 146 getLatexText(node, fout.write, latexEscape) | |
| 147 data = lowerUpperRE.sub(r'\1\\linebreak[1]\2', fout.getvalue()) | |
| 148 data = data[:1] + data[1:].replace('.', '.\\linebreak[1]') | |
| 149 self.writer('\\texttt{'+data+'}') | |
| 150 | |
| 151 def visitNode_img(self, node): | |
| 152 fileName = os.path.join(self.currDir, node.getAttribute('src')) | |
| 153 target, ext = os.path.splitext(fileName) | |
| 154 if self.diaHack and os.access(target + '.dia', os.R_OK): | |
| 155 ext = '.dia' | |
| 156 fileName = target + ext | |
| 157 f = getattr(self, 'convert_'+ext[1:], None) | |
| 158 if not f: | |
| 159 return | |
| 160 target = os.path.join(self.currDir, os.path.basename(target)+'.eps') | |
| 161 f(fileName, target) | |
| 162 target = os.path.basename(target) | |
| 163 self._write_img(target) | |
| 164 | |
| 165 def _write_img(self, target): | |
| 166 """Write LaTeX for image.""" | |
| 167 self.writer('\\begin{center}\\includegraphics[%%\n' | |
| 168 'width=1.0\n' | |
| 169 '\\textwidth,height=1.0\\textheight,\nkeepaspectratio]' | |
| 170 '{%s}\\end{center}\n' % target) | |
| 171 | |
| 172 def convert_png(self, src, target): | |
| 173 # XXX there's a *reason* Python comes with the pipes module - | |
| 174 # someone fix this to use it. | |
| 175 r = os.system('pngtopnm "%s" | pnmtops -noturn > "%s"' % (src, target)) | |
| 176 if r != 0: | |
| 177 raise OSError(r) | |
| 178 | |
| 179 def convert_dia(self, src, target): | |
| 180 # EVIL DISGUSTING HACK | |
| 181 data = os.popen("gunzip -dc %s" % (src)).read() | |
| 182 pre = '<dia:attribute name="scaling">\n <dia:real val="1"/>' | |
| 183 post = '<dia:attribute name="scaling">\n <dia:real val="0.5"/>' | |
| 184 open('%s_hacked.dia' % (src), 'wb').write(data.replace(pre, post)) | |
| 185 os.system('gzip %s_hacked.dia' % (src,)) | |
| 186 os.system('mv %s_hacked.dia.gz %s_hacked.dia' % (src,src)) | |
| 187 # Let's pretend we never saw that. | |
| 188 | |
| 189 # Silly dia needs an X server, even though it doesn't display anything. | |
| 190 # If this is a problem for you, try using Xvfb. | |
| 191 os.system("dia %s_hacked.dia -n -e %s" % (src, target)) | |
| 192 | |
| 193 def visitNodeHeader(self, node): | |
| 194 level = (int(node.tagName[1])-2)+self.baseLevel | |
| 195 self.writer('\n\n\\'+level*'sub'+'section{') | |
| 196 spitter = HeadingLatexSpitter(self.writer, self.currDir, self.filename) | |
| 197 spitter.visitNodeDefault(node) | |
| 198 self.writer('}\n') | |
| 199 | |
| 200 def visitNode_a_listing(self, node): | |
| 201 fileName = os.path.join(self.currDir, node.getAttribute('href')) | |
| 202 self.writer('\\begin{verbatim}\n') | |
| 203 lines = map(string.rstrip, open(fileName).readlines()) | |
| 204 lines = lines[int(node.getAttribute('skipLines', 0)):] | |
| 205 self.writer(text.removeLeadingTrailingBlanks('\n'.join(lines))) | |
| 206 self.writer('\\end{verbatim}') | |
| 207 | |
| 208 # Write a caption for this source listing | |
| 209 fileName = os.path.basename(fileName) | |
| 210 caption = domhelpers.getNodeText(node) | |
| 211 if caption == fileName: | |
| 212 caption = 'Source listing' | |
| 213 self.writer('\parbox[b]{\linewidth}{\\begin{center}%s --- ' | |
| 214 '\\begin{em}%s\\end{em}\\end{center}}' | |
| 215 % (latexEscape(caption), latexEscape(fileName))) | |
| 216 | |
| 217 def visitNode_a_href(self, node): | |
| 218 supported_schemes=['http', 'https', 'ftp', 'mailto'] | |
| 219 href = node.getAttribute('href') | |
| 220 if urlparse.urlparse(href)[0] in supported_schemes: | |
| 221 text = domhelpers.getNodeText(node) | |
| 222 self.visitNodeDefault(node) | |
| 223 if text != href: | |
| 224 self.writer('\\footnote{%s}' % latexEscape(href)) | |
| 225 else: | |
| 226 path, fragid = (href.split('#', 1) + [None])[:2] | |
| 227 if path == '': | |
| 228 path = self.filename | |
| 229 else: | |
| 230 path = os.path.join(os.path.dirname(self.filename), path) | |
| 231 #if path == '': | |
| 232 #path = os.path.basename(self.filename) | |
| 233 #else: | |
| 234 # # Hack for linking to man pages from howtos, i.e. | |
| 235 # # ../doc/foo-man.html -> foo-man.html | |
| 236 # path = os.path.basename(path) | |
| 237 | |
| 238 path = realpath(path) | |
| 239 | |
| 240 if fragid: | |
| 241 ref = path + 'HASH' + fragid | |
| 242 else: | |
| 243 ref = path | |
| 244 self.writer('\\textit{') | |
| 245 self.visitNodeDefault(node) | |
| 246 self.writer('}') | |
| 247 self.writer('\\loreref{%s}' % ref) | |
| 248 | |
| 249 def visitNode_a_name(self, node): | |
| 250 #self.writer('\\label{%sHASH%s}' % (os.path.basename(self.filename), | |
| 251 # node.getAttribute('name'))) | |
| 252 self.writer('\\label{%sHASH%s}' % (realpath(self.filename), | |
| 253 node.getAttribute('name'))) | |
| 254 self.visitNodeDefault(node) | |
| 255 | |
| 256 def visitNode_table(self, node): | |
| 257 rows = [[col for col in row.childNodes | |
| 258 if getattr(col, 'tagName', None) in ('th', 'td')] | |
| 259 for row in node.childNodes if getattr(row, 'tagName', None)=='tr'] | |
| 260 numCols = 1+max([len(row) for row in rows]) | |
| 261 self.writer('\\begin{table}[ht]\\begin{center}') | |
| 262 self.writer('\\begin{tabular}{@{}'+'l'*numCols+'@{}}') | |
| 263 for row in rows: | |
| 264 th = 0 | |
| 265 for col in row: | |
| 266 self.visitNode(col) | |
| 267 self.writer('&') | |
| 268 if col.tagName == 'th': | |
| 269 th = 1 | |
| 270 self.writer('\\\\\n') #\\ ends lines | |
| 271 if th: | |
| 272 self.writer('\\hline\n') | |
| 273 self.writer('\\end{tabular}\n') | |
| 274 if node.hasAttribute('title'): | |
| 275 self.writer('\\caption{%s}' | |
| 276 % latexEscape(node.getAttribute('title'))) | |
| 277 self.writer('\\end{center}\\end{table}\n') | |
| 278 | |
| 279 def visitNode_span_footnote(self, node): | |
| 280 self.writer('\\footnote{') | |
| 281 spitter = FootnoteLatexSpitter(self.writer, self.currDir, self.filename) | |
| 282 spitter.visitNodeDefault(node) | |
| 283 self.writer('}') | |
| 284 | |
| 285 def visitNode_span_index(self, node): | |
| 286 self.writer('\\index{%s}\n' % node.getAttribute('value')) | |
| 287 self.visitNodeDefault(node) | |
| 288 | |
| 289 visitNode_h2 = visitNode_h3 = visitNode_h4 = visitNodeHeader | |
| 290 | |
| 291 start_title = '\\title{' | |
| 292 end_title = '}\n' | |
| 293 | |
| 294 start_sub = '$_{' | |
| 295 end_sub = '}$' | |
| 296 | |
| 297 start_sup = '$^{' | |
| 298 end_sup = '}$' | |
| 299 | |
| 300 start_html = '''\\documentclass{article} | |
| 301 \\newcommand{\\loreref}[1]{% | |
| 302 \\ifthenelse{\\value{page}=\\pageref{#1}}% | |
| 303 { (this page)}% | |
| 304 { (page \\pageref{#1})}% | |
| 305 }''' | |
| 306 | |
| 307 start_body = '\\begin{document}\n\\maketitle\n' | |
| 308 end_body = '\\end{document}' | |
| 309 | |
| 310 start_dl = '\\begin{description}\n' | |
| 311 end_dl = '\\end{description}\n' | |
| 312 start_ul = '\\begin{itemize}\n' | |
| 313 end_ul = '\\end{itemize}\n' | |
| 314 | |
| 315 start_ol = '\\begin{enumerate}\n' | |
| 316 end_ol = '\\end{enumerate}\n' | |
| 317 | |
| 318 start_li = '\\item ' | |
| 319 end_li = '\n' | |
| 320 | |
| 321 start_dt = '\\item[' | |
| 322 end_dt = ']' | |
| 323 end_dd = '\n' | |
| 324 | |
| 325 start_p = '\n\n' | |
| 326 | |
| 327 start_strong = start_em = '\\begin{em}' | |
| 328 end_strong = end_em = '\\end{em}' | |
| 329 | |
| 330 start_q = "``" | |
| 331 end_q = "''" | |
| 332 | |
| 333 start_div_note = '\\begin{quotation}\\textbf{Note:}' | |
| 334 end_div_note = '\\end{quotation}' | |
| 335 | |
| 336 start_th = '\\textbf{' | |
| 337 end_th = '}' | |
| 338 | |
| 339 | |
| 340 class SectionLatexSpitter(LatexSpitter): | |
| 341 | |
| 342 baseLevel = 1 | |
| 343 | |
| 344 start_title = '\\section{' | |
| 345 | |
| 346 def visitNode_title(self, node): | |
| 347 self.visitNodeDefault(node) | |
| 348 #self.writer('\\label{%s}}\n' % os.path.basename(self.filename)) | |
| 349 self.writer('\\label{%s}}\n' % realpath(self.filename)) | |
| 350 | |
| 351 end_title = end_body = start_body = start_html = '' | |
| 352 | |
| 353 | |
| 354 class ChapterLatexSpitter(SectionLatexSpitter): | |
| 355 baseLevel = 0 | |
| 356 start_title = '\\chapter{' | |
| 357 | |
| 358 | |
| 359 class HeadingLatexSpitter(BaseLatexSpitter): | |
| 360 start_q = "``" | |
| 361 end_q = "''" | |
| 362 | |
| 363 writeNodeData = LatexSpitter.writeNodeData.im_func | |
| 364 | |
| 365 | |
| 366 class FootnoteLatexSpitter(LatexSpitter): | |
| 367 """For multi-paragraph footnotes, this avoids having an empty leading | |
| 368 paragraph.""" | |
| 369 | |
| 370 start_p = '' | |
| 371 | |
| 372 def visitNode_span_footnote(self, node): | |
| 373 self.visitNodeDefault(node) | |
| 374 | |
| 375 def visitNode_p(self, node): | |
| 376 self.visitNodeDefault(node) | |
| 377 self.start_p = LatexSpitter.start_p | |
| 378 | |
| 379 class BookLatexSpitter(LatexSpitter): | |
| 380 def visitNode_body(self, node): | |
| 381 tocs=domhelpers.locateNodes([node], 'class', 'toc') | |
| 382 domhelpers.clearNode(node) | |
| 383 if len(tocs): | |
| 384 toc=tocs[0] | |
| 385 node.appendChild(toc) | |
| 386 self.visitNodeDefault(node) | |
| 387 | |
| 388 def visitNode_link(self, node): | |
| 389 if not node.hasAttribute('rel'): | |
| 390 return self.visitNodeDefault(node) | |
| 391 node.tagName += '_'+node.getAttribute('rel') | |
| 392 self.visitNode(node) | |
| 393 | |
| 394 def visitNode_link_author(self, node): | |
| 395 self.writer('\\author{%s}\n' % node.getAttribute('text')) | |
| 396 | |
| 397 def visitNode_link_stylesheet(self, node): | |
| 398 if node.hasAttribute('type') and node.hasAttribute('href'): | |
| 399 if node.getAttribute('type')=='application/x-latex': | |
| 400 packagename=node.getAttribute('href') | |
| 401 packagebase,ext=os.path.splitext(packagename) | |
| 402 self.writer('\\usepackage{%s}\n' % packagebase) | |
| 403 | |
| 404 start_html = r'''\documentclass[oneside]{book} | |
| 405 \usepackage{graphicx} | |
| 406 \usepackage{times,mathptmx} | |
| 407 ''' | |
| 408 | |
| 409 start_body = r'''\begin{document} | |
| 410 \maketitle | |
| 411 \tableofcontents | |
| 412 ''' | |
| 413 | |
| 414 start_li='' | |
| 415 end_li='' | |
| 416 start_ul='' | |
| 417 end_ul='' | |
| 418 | |
| 419 | |
| 420 def visitNode_a(self, node): | |
| 421 if node.hasAttribute('class'): | |
| 422 a_class=node.getAttribute('class') | |
| 423 if a_class.endswith('listing'): | |
| 424 return self.visitNode_a_listing(node) | |
| 425 else: | |
| 426 return getattr(self, 'visitNode_a_%s' % a_class)(node) | |
| 427 if node.hasAttribute('href'): | |
| 428 return self.visitNode_a_href(node) | |
| 429 if node.hasAttribute('name'): | |
| 430 return self.visitNode_a_name(node) | |
| 431 self.visitNodeDefault(node) | |
| 432 | |
| 433 def visitNode_a_chapter(self, node): | |
| 434 self.writer('\\chapter{') | |
| 435 self.visitNodeDefault(node) | |
| 436 self.writer('}\n') | |
| 437 | |
| 438 def visitNode_a_sect(self, node): | |
| 439 base,ext=os.path.splitext(node.getAttribute('href')) | |
| 440 self.writer('\\input{%s}\n' % base) | |
| 441 | |
| 442 | |
| 443 | |
| 444 def processFile(spitter, fin): | |
| 445 dom = microdom.parse(fin).documentElement | |
| 446 spitter.visitNode(dom) | |
| 447 | |
| 448 | |
| 449 def convertFile(filename, spitterClass): | |
| 450 fout = open(os.path.splitext(filename)[0]+".tex", 'w') | |
| 451 spitter = spitterClass(fout.write, os.path.dirname(filename), filename) | |
| 452 fin = open(filename) | |
| 453 processFile(spitter, fin) | |
| 454 fin.close() | |
| 455 fout.close() | |
| OLD | NEW |