OLD | NEW |
| (Empty) |
1 # Copyright (c) 2001-2004 Twisted Matrix Laboratories. | |
2 # See LICENSE for details. | |
3 | |
4 # | |
5 | |
6 from twisted.web import microdom, domhelpers | |
7 from twisted.python import text, procutils | |
8 import os, os.path, re, string | |
9 from cStringIO import StringIO | |
10 | |
11 import urlparse | |
12 | |
13 import tree | |
14 | |
15 escapingRE = re.compile(r'([\[\]#$%&_{}^~\\])') | |
16 lowerUpperRE = re.compile(r'([a-z])([A-Z])') | |
17 | |
18 def _escapeMatch(match): | |
19 c = match.group() | |
20 if c == '\\': | |
21 return '$\\backslash$' | |
22 elif c == '~': | |
23 return '\\~{}' | |
24 elif c == '^': | |
25 return '\\^{}' | |
26 elif c in '[]': | |
27 return '{'+c+'}' | |
28 else: | |
29 return '\\' + c | |
30 | |
31 def latexEscape(text): | |
32 text = escapingRE.sub(_escapeMatch, text) | |
33 return text.replace('\n', ' ') | |
34 | |
35 entities = {'amp': '\&', 'gt': '>', 'lt': '<', 'quot': '"', | |
36 'copy': '\\copyright', 'mdash': '---', 'rdquo': '``', | |
37 'ldquo': "''"} | |
38 | |
39 | |
40 def realpath(path): | |
41 # Normalise path | |
42 cwd = os.getcwd() | |
43 path = os.path.normpath(os.path.join(cwd, path)) | |
44 if path.startswith(cwd + '/'): | |
45 path = path[len(cwd)+1:] | |
46 return path.replace('\\', '/') # windows slashes make LaTeX blow up | |
47 | |
48 | |
49 def getLatexText(node, writer, filter=lambda x:x, entities=entities): | |
50 if hasattr(node, 'eref'): | |
51 return writer(entities.get(node.eref, '')) | |
52 if hasattr(node, 'data'): | |
53 return writer(filter(node.data)) | |
54 for child in node.childNodes: | |
55 getLatexText(child, writer, filter, entities) | |
56 | |
57 class BaseLatexSpitter: | |
58 | |
59 def __init__(self, writer, currDir='.', filename=''): | |
60 self.writer = writer | |
61 self.currDir = currDir | |
62 self.filename = filename | |
63 | |
64 def visitNode(self, node): | |
65 if isinstance(node, microdom.Comment): | |
66 return | |
67 if not hasattr(node, 'tagName'): | |
68 self.writeNodeData(node) | |
69 return | |
70 getattr(self, 'visitNode_'+node.tagName, self.visitNodeDefault)(node) | |
71 | |
72 def visitNodeDefault(self, node): | |
73 self.writer(getattr(self, 'start_'+node.tagName, '')) | |
74 for child in node.childNodes: | |
75 self.visitNode(child) | |
76 self.writer(getattr(self, 'end_'+node.tagName, '')) | |
77 | |
78 def visitNode_a(self, node): | |
79 if node.hasAttribute('class'): | |
80 if node.getAttribute('class').endswith('listing'): | |
81 return self.visitNode_a_listing(node) | |
82 if node.hasAttribute('href'): | |
83 return self.visitNode_a_href(node) | |
84 if node.hasAttribute('name'): | |
85 return self.visitNode_a_name(node) | |
86 self.visitNodeDefault(node) | |
87 | |
88 def visitNode_span(self, node): | |
89 if not node.hasAttribute('class'): | |
90 return self.visitNodeDefault(node) | |
91 node.tagName += '_'+node.getAttribute('class') | |
92 self.visitNode(node) | |
93 | |
94 visitNode_div = visitNode_span | |
95 | |
96 def visitNode_h1(self, node): | |
97 pass | |
98 | |
99 def visitNode_style(self, node): | |
100 pass | |
101 | |
102 | |
103 class LatexSpitter(BaseLatexSpitter): | |
104 | |
105 baseLevel = 0 | |
106 diaHack = bool(procutils.which("dia")) | |
107 | |
108 def writeNodeData(self, node): | |
109 buf = StringIO() | |
110 getLatexText(node, buf.write, latexEscape) | |
111 self.writer(buf.getvalue().replace('<', '$<$').replace('>', '$>$')) | |
112 | |
113 def visitNode_head(self, node): | |
114 authorNodes = domhelpers.findElementsWithAttribute(node, 'rel', 'author'
) | |
115 authorNodes = [n for n in authorNodes if n.tagName == 'link'] | |
116 | |
117 if authorNodes: | |
118 self.writer('\\author{') | |
119 authors = [] | |
120 for aNode in authorNodes: | |
121 name = aNode.getAttribute('title', '') | |
122 href = aNode.getAttribute('href', '') | |
123 if href.startswith('mailto:'): | |
124 href = href[7:] | |
125 if href: | |
126 if name: | |
127 name += ' ' | |
128 name += '$<$' + href + '$>$' | |
129 if name: | |
130 authors.append(name) | |
131 | |
132 self.writer(' \\and '.join(authors)) | |
133 self.writer('}') | |
134 | |
135 self.visitNodeDefault(node) | |
136 | |
137 def visitNode_pre(self, node): | |
138 self.writer('\\begin{verbatim}\n') | |
139 buf = StringIO() | |
140 getLatexText(node, buf.write) | |
141 self.writer(text.removeLeadingTrailingBlanks(buf.getvalue())) | |
142 self.writer('\\end{verbatim}\n') | |
143 | |
144 def visitNode_code(self, node): | |
145 fout = StringIO() | |
146 getLatexText(node, fout.write, latexEscape) | |
147 data = lowerUpperRE.sub(r'\1\\linebreak[1]\2', fout.getvalue()) | |
148 data = data[:1] + data[1:].replace('.', '.\\linebreak[1]') | |
149 self.writer('\\texttt{'+data+'}') | |
150 | |
151 def visitNode_img(self, node): | |
152 fileName = os.path.join(self.currDir, node.getAttribute('src')) | |
153 target, ext = os.path.splitext(fileName) | |
154 if self.diaHack and os.access(target + '.dia', os.R_OK): | |
155 ext = '.dia' | |
156 fileName = target + ext | |
157 f = getattr(self, 'convert_'+ext[1:], None) | |
158 if not f: | |
159 return | |
160 target = os.path.join(self.currDir, os.path.basename(target)+'.eps') | |
161 f(fileName, target) | |
162 target = os.path.basename(target) | |
163 self._write_img(target) | |
164 | |
165 def _write_img(self, target): | |
166 """Write LaTeX for image.""" | |
167 self.writer('\\begin{center}\\includegraphics[%%\n' | |
168 'width=1.0\n' | |
169 '\\textwidth,height=1.0\\textheight,\nkeepaspectratio]' | |
170 '{%s}\\end{center}\n' % target) | |
171 | |
172 def convert_png(self, src, target): | |
173 # XXX there's a *reason* Python comes with the pipes module - | |
174 # someone fix this to use it. | |
175 r = os.system('pngtopnm "%s" | pnmtops -noturn > "%s"' % (src, target)) | |
176 if r != 0: | |
177 raise OSError(r) | |
178 | |
179 def convert_dia(self, src, target): | |
180 # EVIL DISGUSTING HACK | |
181 data = os.popen("gunzip -dc %s" % (src)).read() | |
182 pre = '<dia:attribute name="scaling">\n <dia:real val="1"/>' | |
183 post = '<dia:attribute name="scaling">\n <dia:real val="0.5"/>' | |
184 open('%s_hacked.dia' % (src), 'wb').write(data.replace(pre, post)) | |
185 os.system('gzip %s_hacked.dia' % (src,)) | |
186 os.system('mv %s_hacked.dia.gz %s_hacked.dia' % (src,src)) | |
187 # Let's pretend we never saw that. | |
188 | |
189 # Silly dia needs an X server, even though it doesn't display anything. | |
190 # If this is a problem for you, try using Xvfb. | |
191 os.system("dia %s_hacked.dia -n -e %s" % (src, target)) | |
192 | |
193 def visitNodeHeader(self, node): | |
194 level = (int(node.tagName[1])-2)+self.baseLevel | |
195 self.writer('\n\n\\'+level*'sub'+'section{') | |
196 spitter = HeadingLatexSpitter(self.writer, self.currDir, self.filename) | |
197 spitter.visitNodeDefault(node) | |
198 self.writer('}\n') | |
199 | |
200 def visitNode_a_listing(self, node): | |
201 fileName = os.path.join(self.currDir, node.getAttribute('href')) | |
202 self.writer('\\begin{verbatim}\n') | |
203 lines = map(string.rstrip, open(fileName).readlines()) | |
204 lines = lines[int(node.getAttribute('skipLines', 0)):] | |
205 self.writer(text.removeLeadingTrailingBlanks('\n'.join(lines))) | |
206 self.writer('\\end{verbatim}') | |
207 | |
208 # Write a caption for this source listing | |
209 fileName = os.path.basename(fileName) | |
210 caption = domhelpers.getNodeText(node) | |
211 if caption == fileName: | |
212 caption = 'Source listing' | |
213 self.writer('\parbox[b]{\linewidth}{\\begin{center}%s --- ' | |
214 '\\begin{em}%s\\end{em}\\end{center}}' | |
215 % (latexEscape(caption), latexEscape(fileName))) | |
216 | |
217 def visitNode_a_href(self, node): | |
218 supported_schemes=['http', 'https', 'ftp', 'mailto'] | |
219 href = node.getAttribute('href') | |
220 if urlparse.urlparse(href)[0] in supported_schemes: | |
221 text = domhelpers.getNodeText(node) | |
222 self.visitNodeDefault(node) | |
223 if text != href: | |
224 self.writer('\\footnote{%s}' % latexEscape(href)) | |
225 else: | |
226 path, fragid = (href.split('#', 1) + [None])[:2] | |
227 if path == '': | |
228 path = self.filename | |
229 else: | |
230 path = os.path.join(os.path.dirname(self.filename), path) | |
231 #if path == '': | |
232 #path = os.path.basename(self.filename) | |
233 #else: | |
234 # # Hack for linking to man pages from howtos, i.e. | |
235 # # ../doc/foo-man.html -> foo-man.html | |
236 # path = os.path.basename(path) | |
237 | |
238 path = realpath(path) | |
239 | |
240 if fragid: | |
241 ref = path + 'HASH' + fragid | |
242 else: | |
243 ref = path | |
244 self.writer('\\textit{') | |
245 self.visitNodeDefault(node) | |
246 self.writer('}') | |
247 self.writer('\\loreref{%s}' % ref) | |
248 | |
249 def visitNode_a_name(self, node): | |
250 #self.writer('\\label{%sHASH%s}' % (os.path.basename(self.filename), | |
251 # node.getAttribute('name'))) | |
252 self.writer('\\label{%sHASH%s}' % (realpath(self.filename), | |
253 node.getAttribute('name'))) | |
254 self.visitNodeDefault(node) | |
255 | |
256 def visitNode_table(self, node): | |
257 rows = [[col for col in row.childNodes | |
258 if getattr(col, 'tagName', None) in ('th', 'td')] | |
259 for row in node.childNodes if getattr(row, 'tagName', None)=='tr'] | |
260 numCols = 1+max([len(row) for row in rows]) | |
261 self.writer('\\begin{table}[ht]\\begin{center}') | |
262 self.writer('\\begin{tabular}{@{}'+'l'*numCols+'@{}}') | |
263 for row in rows: | |
264 th = 0 | |
265 for col in row: | |
266 self.visitNode(col) | |
267 self.writer('&') | |
268 if col.tagName == 'th': | |
269 th = 1 | |
270 self.writer('\\\\\n') #\\ ends lines | |
271 if th: | |
272 self.writer('\\hline\n') | |
273 self.writer('\\end{tabular}\n') | |
274 if node.hasAttribute('title'): | |
275 self.writer('\\caption{%s}' | |
276 % latexEscape(node.getAttribute('title'))) | |
277 self.writer('\\end{center}\\end{table}\n') | |
278 | |
279 def visitNode_span_footnote(self, node): | |
280 self.writer('\\footnote{') | |
281 spitter = FootnoteLatexSpitter(self.writer, self.currDir, self.filename) | |
282 spitter.visitNodeDefault(node) | |
283 self.writer('}') | |
284 | |
285 def visitNode_span_index(self, node): | |
286 self.writer('\\index{%s}\n' % node.getAttribute('value')) | |
287 self.visitNodeDefault(node) | |
288 | |
289 visitNode_h2 = visitNode_h3 = visitNode_h4 = visitNodeHeader | |
290 | |
291 start_title = '\\title{' | |
292 end_title = '}\n' | |
293 | |
294 start_sub = '$_{' | |
295 end_sub = '}$' | |
296 | |
297 start_sup = '$^{' | |
298 end_sup = '}$' | |
299 | |
300 start_html = '''\\documentclass{article} | |
301 \\newcommand{\\loreref}[1]{% | |
302 \\ifthenelse{\\value{page}=\\pageref{#1}}% | |
303 { (this page)}% | |
304 { (page \\pageref{#1})}% | |
305 }''' | |
306 | |
307 start_body = '\\begin{document}\n\\maketitle\n' | |
308 end_body = '\\end{document}' | |
309 | |
310 start_dl = '\\begin{description}\n' | |
311 end_dl = '\\end{description}\n' | |
312 start_ul = '\\begin{itemize}\n' | |
313 end_ul = '\\end{itemize}\n' | |
314 | |
315 start_ol = '\\begin{enumerate}\n' | |
316 end_ol = '\\end{enumerate}\n' | |
317 | |
318 start_li = '\\item ' | |
319 end_li = '\n' | |
320 | |
321 start_dt = '\\item[' | |
322 end_dt = ']' | |
323 end_dd = '\n' | |
324 | |
325 start_p = '\n\n' | |
326 | |
327 start_strong = start_em = '\\begin{em}' | |
328 end_strong = end_em = '\\end{em}' | |
329 | |
330 start_q = "``" | |
331 end_q = "''" | |
332 | |
333 start_div_note = '\\begin{quotation}\\textbf{Note:}' | |
334 end_div_note = '\\end{quotation}' | |
335 | |
336 start_th = '\\textbf{' | |
337 end_th = '}' | |
338 | |
339 | |
340 class SectionLatexSpitter(LatexSpitter): | |
341 | |
342 baseLevel = 1 | |
343 | |
344 start_title = '\\section{' | |
345 | |
346 def visitNode_title(self, node): | |
347 self.visitNodeDefault(node) | |
348 #self.writer('\\label{%s}}\n' % os.path.basename(self.filename)) | |
349 self.writer('\\label{%s}}\n' % realpath(self.filename)) | |
350 | |
351 end_title = end_body = start_body = start_html = '' | |
352 | |
353 | |
354 class ChapterLatexSpitter(SectionLatexSpitter): | |
355 baseLevel = 0 | |
356 start_title = '\\chapter{' | |
357 | |
358 | |
359 class HeadingLatexSpitter(BaseLatexSpitter): | |
360 start_q = "``" | |
361 end_q = "''" | |
362 | |
363 writeNodeData = LatexSpitter.writeNodeData.im_func | |
364 | |
365 | |
366 class FootnoteLatexSpitter(LatexSpitter): | |
367 """For multi-paragraph footnotes, this avoids having an empty leading | |
368 paragraph.""" | |
369 | |
370 start_p = '' | |
371 | |
372 def visitNode_span_footnote(self, node): | |
373 self.visitNodeDefault(node) | |
374 | |
375 def visitNode_p(self, node): | |
376 self.visitNodeDefault(node) | |
377 self.start_p = LatexSpitter.start_p | |
378 | |
379 class BookLatexSpitter(LatexSpitter): | |
380 def visitNode_body(self, node): | |
381 tocs=domhelpers.locateNodes([node], 'class', 'toc') | |
382 domhelpers.clearNode(node) | |
383 if len(tocs): | |
384 toc=tocs[0] | |
385 node.appendChild(toc) | |
386 self.visitNodeDefault(node) | |
387 | |
388 def visitNode_link(self, node): | |
389 if not node.hasAttribute('rel'): | |
390 return self.visitNodeDefault(node) | |
391 node.tagName += '_'+node.getAttribute('rel') | |
392 self.visitNode(node) | |
393 | |
394 def visitNode_link_author(self, node): | |
395 self.writer('\\author{%s}\n' % node.getAttribute('text')) | |
396 | |
397 def visitNode_link_stylesheet(self, node): | |
398 if node.hasAttribute('type') and node.hasAttribute('href'): | |
399 if node.getAttribute('type')=='application/x-latex': | |
400 packagename=node.getAttribute('href') | |
401 packagebase,ext=os.path.splitext(packagename) | |
402 self.writer('\\usepackage{%s}\n' % packagebase) | |
403 | |
404 start_html = r'''\documentclass[oneside]{book} | |
405 \usepackage{graphicx} | |
406 \usepackage{times,mathptmx} | |
407 ''' | |
408 | |
409 start_body = r'''\begin{document} | |
410 \maketitle | |
411 \tableofcontents | |
412 ''' | |
413 | |
414 start_li='' | |
415 end_li='' | |
416 start_ul='' | |
417 end_ul='' | |
418 | |
419 | |
420 def visitNode_a(self, node): | |
421 if node.hasAttribute('class'): | |
422 a_class=node.getAttribute('class') | |
423 if a_class.endswith('listing'): | |
424 return self.visitNode_a_listing(node) | |
425 else: | |
426 return getattr(self, 'visitNode_a_%s' % a_class)(node) | |
427 if node.hasAttribute('href'): | |
428 return self.visitNode_a_href(node) | |
429 if node.hasAttribute('name'): | |
430 return self.visitNode_a_name(node) | |
431 self.visitNodeDefault(node) | |
432 | |
433 def visitNode_a_chapter(self, node): | |
434 self.writer('\\chapter{') | |
435 self.visitNodeDefault(node) | |
436 self.writer('}\n') | |
437 | |
438 def visitNode_a_sect(self, node): | |
439 base,ext=os.path.splitext(node.getAttribute('href')) | |
440 self.writer('\\input{%s}\n' % base) | |
441 | |
442 | |
443 | |
444 def processFile(spitter, fin): | |
445 dom = microdom.parse(fin).documentElement | |
446 spitter.visitNode(dom) | |
447 | |
448 | |
449 def convertFile(filename, spitterClass): | |
450 fout = open(os.path.splitext(filename)[0]+".tex", 'w') | |
451 spitter = spitterClass(fout.write, os.path.dirname(filename), filename) | |
452 fin = open(filename) | |
453 processFile(spitter, fin) | |
454 fin.close() | |
455 fout.close() | |
OLD | NEW |