OLD | NEW |
| (Empty) |
1 # -*- test-case-name: twisted.lore.test.test_man2lore -*- | |
2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories. | |
3 # See LICENSE for details. | |
4 | |
5 | |
6 """ | |
7 man2lore: Converts man page source (i.e. groff) into lore-compatible html. | |
8 | |
9 This is nasty and hackish (and doesn't support lots of real groff), but is good | |
10 enough for converting fairly simple man pages. | |
11 """ | |
12 | |
13 import re, os | |
14 | |
15 quoteRE = re.compile('"(.*?)"') | |
16 | |
17 | |
18 | |
19 def escape(text): | |
20 text = text.replace('<', '<').replace('>', '>') | |
21 text = quoteRE.sub('<q>\\1</q>', text) | |
22 return text | |
23 | |
24 | |
25 | |
26 def stripQuotes(s): | |
27 if s[0] == s[-1] == '"': | |
28 s = s[1:-1] | |
29 return s | |
30 | |
31 | |
32 | |
33 class ManConverter(object): | |
34 """ | |
35 Convert a man page to the Lore format. | |
36 | |
37 @ivar tp: State variable for handling text inside a C{TP} token. It can | |
38 take values from 0 to 3: | |
39 - 0: when outside of a C{TP} token. | |
40 - 1: once a C{TP} token has been encountered. If the previous value | |
41 was 0, a definition list is started. Then, at the first line of | |
42 text, a definition term is started. | |
43 - 2: when the first line after the C{TP} token has been handled. | |
44 The definition term is closed, and a definition is started with | |
45 the next line of text. | |
46 - 3: when the first line as definition data has been handled. | |
47 @type tp: C{int} | |
48 """ | |
49 state = 'regular' | |
50 name = None | |
51 tp = 0 | |
52 dl = 0 | |
53 para = 0 | |
54 | |
55 def convert(self, inf, outf): | |
56 self.write = outf.write | |
57 longline = '' | |
58 for line in inf.readlines(): | |
59 if line.rstrip() and line.rstrip()[-1] == '\\': | |
60 longline += line.rstrip()[:-1] + ' ' | |
61 continue | |
62 if longline: | |
63 line = longline + line | |
64 longline = '' | |
65 self.lineReceived(line) | |
66 self.closeTags() | |
67 self.write('</body>\n</html>\n') | |
68 | |
69 | |
70 def lineReceived(self, line): | |
71 if line[0] == '.': | |
72 f = getattr(self, 'macro_' + line[1:3].rstrip().upper(), None) | |
73 if f: | |
74 f(line[3:].strip()) | |
75 else: | |
76 self.text(line) | |
77 | |
78 | |
79 def continueReceived(self, cont): | |
80 if not cont: | |
81 return | |
82 if cont[0].isupper(): | |
83 f = getattr(self, 'macro_' + cont[:2].rstrip().upper(), None) | |
84 if f: | |
85 f(cont[2:].strip()) | |
86 else: | |
87 self.text(cont) | |
88 | |
89 | |
90 def closeTags(self): | |
91 if self.state != 'regular': | |
92 self.write('</%s>' % self.state) | |
93 if self.tp == 3: | |
94 self.write('</dd>\n\n') | |
95 self.tp = 0 | |
96 if self.dl: | |
97 self.write('</dl>\n\n') | |
98 self.dl = 0 | |
99 if self.para: | |
100 self.write('</p>\n\n') | |
101 self.para = 0 | |
102 | |
103 | |
104 def paraCheck(self): | |
105 if not self.tp and not self.para: | |
106 self.write('<p>') | |
107 self.para = 1 | |
108 | |
109 | |
110 def macro_TH(self, line): | |
111 self.write('<html><head>\n') | |
112 parts = [stripQuotes(x) for x in line.split(' ', 2)] + ['', ''] | |
113 title, manSection = parts[:2] | |
114 self.write('<title>%s.%s</title>' % (title, manSection)) | |
115 self.write('</head>\n<body>\n\n') | |
116 self.write('<h1>%s.%s</h1>\n\n' % (title, manSection)) | |
117 | |
118 macro_DT = macro_TH | |
119 | |
120 | |
121 def macro_SH(self, line): | |
122 self.closeTags() | |
123 self.write('<h2>') | |
124 self.para = 1 | |
125 self.text(stripQuotes(line)) | |
126 self.para = 0 | |
127 self.closeTags() | |
128 self.write('</h2>\n\n') | |
129 | |
130 | |
131 def macro_B(self, line): | |
132 words = line.split() | |
133 words[0] = '\\fB' + words[0] + '\\fR ' | |
134 self.text(' '.join(words)) | |
135 | |
136 | |
137 def macro_NM(self, line): | |
138 if not self.name: | |
139 self.name = line | |
140 self.text(self.name + ' ') | |
141 | |
142 | |
143 def macro_NS(self, line): | |
144 parts = line.split(' Ns ') | |
145 i = 0 | |
146 for l in parts: | |
147 i = not i | |
148 if i: | |
149 self.text(l) | |
150 else: | |
151 self.continueReceived(l) | |
152 | |
153 | |
154 def macro_OO(self, line): | |
155 self.text('[') | |
156 self.continueReceived(line) | |
157 | |
158 | |
159 def macro_OC(self, line): | |
160 self.text(']') | |
161 self.continueReceived(line) | |
162 | |
163 | |
164 def macro_OP(self, line): | |
165 self.text('[') | |
166 self.continueReceived(line) | |
167 self.text(']') | |
168 | |
169 | |
170 def macro_FL(self, line): | |
171 parts = line.split() | |
172 self.text('\\fB-%s\\fR' % parts[0]) | |
173 self.continueReceived(' '.join(parts[1:])) | |
174 | |
175 | |
176 def macro_AR(self, line): | |
177 parts = line.split() | |
178 self.text('\\fI %s\\fR' % parts[0]) | |
179 self.continueReceived(' '.join(parts[1:])) | |
180 | |
181 | |
182 def macro_PP(self, line): | |
183 self.closeTags() | |
184 | |
185 | |
186 def macro_IC(self, line): | |
187 cmd = line.split(' ', 1)[0] | |
188 args = line[line.index(cmd) + len(cmd):] | |
189 args = args.split(' ') | |
190 text = cmd | |
191 while args: | |
192 arg = args.pop(0) | |
193 if arg.lower() == "ar": | |
194 text += " \\fU%s\\fR" % (args.pop(0),) | |
195 elif arg.lower() == "op": | |
196 ign = args.pop(0) | |
197 text += " [\\fU%s\\fR]" % (args.pop(0),) | |
198 | |
199 self.text(text) | |
200 | |
201 | |
202 def macro_TP(self, line): | |
203 """ | |
204 Handle C{TP} token: start a definition list if it's first token, or | |
205 close previous definition data. | |
206 """ | |
207 if self.tp == 3: | |
208 self.write('</dd>\n\n') | |
209 self.tp = 1 | |
210 else: | |
211 self.tp = 1 | |
212 self.write('<dl>') | |
213 self.dl = 1 | |
214 | |
215 | |
216 def macro_BL(self, line): | |
217 self.write('<dl>') | |
218 self.tp = 1 | |
219 | |
220 | |
221 def macro_EL(self, line): | |
222 if self.tp == 3: | |
223 self.write('</dd>') | |
224 self.tp = 1 | |
225 self.write('</dl>\n\n') | |
226 self.tp = 0 | |
227 | |
228 | |
229 def macro_IT(self, line): | |
230 if self.tp == 3: | |
231 self.write('</dd>') | |
232 self.tp = 1 | |
233 self.continueReceived(line) | |
234 | |
235 | |
236 def text(self, line): | |
237 """ | |
238 Handle a line of text without detected token. | |
239 """ | |
240 if self.tp == 1: | |
241 self.write('<dt>') | |
242 if self.tp == 2: | |
243 self.write('<dd>') | |
244 self.paraCheck() | |
245 | |
246 bits = line.split('\\') | |
247 self.write(escape(bits[0])) | |
248 for bit in bits[1:]: | |
249 if bit[:2] == 'fI': | |
250 self.write('<em>' + escape(bit[2:])) | |
251 self.state = 'em' | |
252 elif bit[:2] == 'fB': | |
253 self.write('<strong>' + escape(bit[2:])) | |
254 self.state = 'strong' | |
255 elif bit[:2] == 'fR': | |
256 self.write('</%s>' % self.state) | |
257 self.write(escape(bit[2:])) | |
258 self.state = 'regular' | |
259 elif bit[:2] == 'fU': | |
260 # fU doesn't really exist, but it helps us to manage underlined | |
261 # text. | |
262 self.write('<u>' + escape(bit[2:])) | |
263 self.state = 'u' | |
264 elif bit[:3] == '(co': | |
265 self.write('©' + escape(bit[3:])) | |
266 else: | |
267 self.write(escape(bit)) | |
268 | |
269 if self.tp == 1: | |
270 self.write('</dt>') | |
271 self.tp = 2 | |
272 elif self.tp == 2: | |
273 self.tp = 3 | |
274 | |
275 | |
276 | |
277 class ProcessingFunctionFactory: | |
278 | |
279 def generate_lore(self, d, filenameGenerator=None): | |
280 ext = d.get('ext', '.html') | |
281 return lambda file,_: ManConverter().convert(open(file), | |
282 open(os.path.splitext(file)[0]+ext, 'w')) | |
283 | |
284 | |
285 | |
286 factory = ProcessingFunctionFactory() | |
287 | |
288 | |
289 if __name__ == '__main__': | |
290 import sys | |
291 mc = ManConverter().convert(open(sys.argv[1]), sys.stdout) | |
OLD | NEW |