| OLD | NEW |
| (Empty) |
| 1 # -*- test-case-name: twisted.lore.test.test_man2lore -*- | |
| 2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories. | |
| 3 # See LICENSE for details. | |
| 4 | |
| 5 | |
| 6 """ | |
| 7 man2lore: Converts man page source (i.e. groff) into lore-compatible html. | |
| 8 | |
| 9 This is nasty and hackish (and doesn't support lots of real groff), but is good | |
| 10 enough for converting fairly simple man pages. | |
| 11 """ | |
| 12 | |
| 13 import re, os | |
| 14 | |
| 15 quoteRE = re.compile('"(.*?)"') | |
| 16 | |
| 17 | |
| 18 | |
| 19 def escape(text): | |
| 20 text = text.replace('<', '<').replace('>', '>') | |
| 21 text = quoteRE.sub('<q>\\1</q>', text) | |
| 22 return text | |
| 23 | |
| 24 | |
| 25 | |
| 26 def stripQuotes(s): | |
| 27 if s[0] == s[-1] == '"': | |
| 28 s = s[1:-1] | |
| 29 return s | |
| 30 | |
| 31 | |
| 32 | |
| 33 class ManConverter(object): | |
| 34 """ | |
| 35 Convert a man page to the Lore format. | |
| 36 | |
| 37 @ivar tp: State variable for handling text inside a C{TP} token. It can | |
| 38 take values from 0 to 3: | |
| 39 - 0: when outside of a C{TP} token. | |
| 40 - 1: once a C{TP} token has been encountered. If the previous value | |
| 41 was 0, a definition list is started. Then, at the first line of | |
| 42 text, a definition term is started. | |
| 43 - 2: when the first line after the C{TP} token has been handled. | |
| 44 The definition term is closed, and a definition is started with | |
| 45 the next line of text. | |
| 46 - 3: when the first line as definition data has been handled. | |
| 47 @type tp: C{int} | |
| 48 """ | |
| 49 state = 'regular' | |
| 50 name = None | |
| 51 tp = 0 | |
| 52 dl = 0 | |
| 53 para = 0 | |
| 54 | |
| 55 def convert(self, inf, outf): | |
| 56 self.write = outf.write | |
| 57 longline = '' | |
| 58 for line in inf.readlines(): | |
| 59 if line.rstrip() and line.rstrip()[-1] == '\\': | |
| 60 longline += line.rstrip()[:-1] + ' ' | |
| 61 continue | |
| 62 if longline: | |
| 63 line = longline + line | |
| 64 longline = '' | |
| 65 self.lineReceived(line) | |
| 66 self.closeTags() | |
| 67 self.write('</body>\n</html>\n') | |
| 68 | |
| 69 | |
| 70 def lineReceived(self, line): | |
| 71 if line[0] == '.': | |
| 72 f = getattr(self, 'macro_' + line[1:3].rstrip().upper(), None) | |
| 73 if f: | |
| 74 f(line[3:].strip()) | |
| 75 else: | |
| 76 self.text(line) | |
| 77 | |
| 78 | |
| 79 def continueReceived(self, cont): | |
| 80 if not cont: | |
| 81 return | |
| 82 if cont[0].isupper(): | |
| 83 f = getattr(self, 'macro_' + cont[:2].rstrip().upper(), None) | |
| 84 if f: | |
| 85 f(cont[2:].strip()) | |
| 86 else: | |
| 87 self.text(cont) | |
| 88 | |
| 89 | |
| 90 def closeTags(self): | |
| 91 if self.state != 'regular': | |
| 92 self.write('</%s>' % self.state) | |
| 93 if self.tp == 3: | |
| 94 self.write('</dd>\n\n') | |
| 95 self.tp = 0 | |
| 96 if self.dl: | |
| 97 self.write('</dl>\n\n') | |
| 98 self.dl = 0 | |
| 99 if self.para: | |
| 100 self.write('</p>\n\n') | |
| 101 self.para = 0 | |
| 102 | |
| 103 | |
| 104 def paraCheck(self): | |
| 105 if not self.tp and not self.para: | |
| 106 self.write('<p>') | |
| 107 self.para = 1 | |
| 108 | |
| 109 | |
| 110 def macro_TH(self, line): | |
| 111 self.write('<html><head>\n') | |
| 112 parts = [stripQuotes(x) for x in line.split(' ', 2)] + ['', ''] | |
| 113 title, manSection = parts[:2] | |
| 114 self.write('<title>%s.%s</title>' % (title, manSection)) | |
| 115 self.write('</head>\n<body>\n\n') | |
| 116 self.write('<h1>%s.%s</h1>\n\n' % (title, manSection)) | |
| 117 | |
| 118 macro_DT = macro_TH | |
| 119 | |
| 120 | |
| 121 def macro_SH(self, line): | |
| 122 self.closeTags() | |
| 123 self.write('<h2>') | |
| 124 self.para = 1 | |
| 125 self.text(stripQuotes(line)) | |
| 126 self.para = 0 | |
| 127 self.closeTags() | |
| 128 self.write('</h2>\n\n') | |
| 129 | |
| 130 | |
| 131 def macro_B(self, line): | |
| 132 words = line.split() | |
| 133 words[0] = '\\fB' + words[0] + '\\fR ' | |
| 134 self.text(' '.join(words)) | |
| 135 | |
| 136 | |
| 137 def macro_NM(self, line): | |
| 138 if not self.name: | |
| 139 self.name = line | |
| 140 self.text(self.name + ' ') | |
| 141 | |
| 142 | |
| 143 def macro_NS(self, line): | |
| 144 parts = line.split(' Ns ') | |
| 145 i = 0 | |
| 146 for l in parts: | |
| 147 i = not i | |
| 148 if i: | |
| 149 self.text(l) | |
| 150 else: | |
| 151 self.continueReceived(l) | |
| 152 | |
| 153 | |
| 154 def macro_OO(self, line): | |
| 155 self.text('[') | |
| 156 self.continueReceived(line) | |
| 157 | |
| 158 | |
| 159 def macro_OC(self, line): | |
| 160 self.text(']') | |
| 161 self.continueReceived(line) | |
| 162 | |
| 163 | |
| 164 def macro_OP(self, line): | |
| 165 self.text('[') | |
| 166 self.continueReceived(line) | |
| 167 self.text(']') | |
| 168 | |
| 169 | |
| 170 def macro_FL(self, line): | |
| 171 parts = line.split() | |
| 172 self.text('\\fB-%s\\fR' % parts[0]) | |
| 173 self.continueReceived(' '.join(parts[1:])) | |
| 174 | |
| 175 | |
| 176 def macro_AR(self, line): | |
| 177 parts = line.split() | |
| 178 self.text('\\fI %s\\fR' % parts[0]) | |
| 179 self.continueReceived(' '.join(parts[1:])) | |
| 180 | |
| 181 | |
| 182 def macro_PP(self, line): | |
| 183 self.closeTags() | |
| 184 | |
| 185 | |
| 186 def macro_IC(self, line): | |
| 187 cmd = line.split(' ', 1)[0] | |
| 188 args = line[line.index(cmd) + len(cmd):] | |
| 189 args = args.split(' ') | |
| 190 text = cmd | |
| 191 while args: | |
| 192 arg = args.pop(0) | |
| 193 if arg.lower() == "ar": | |
| 194 text += " \\fU%s\\fR" % (args.pop(0),) | |
| 195 elif arg.lower() == "op": | |
| 196 ign = args.pop(0) | |
| 197 text += " [\\fU%s\\fR]" % (args.pop(0),) | |
| 198 | |
| 199 self.text(text) | |
| 200 | |
| 201 | |
| 202 def macro_TP(self, line): | |
| 203 """ | |
| 204 Handle C{TP} token: start a definition list if it's first token, or | |
| 205 close previous definition data. | |
| 206 """ | |
| 207 if self.tp == 3: | |
| 208 self.write('</dd>\n\n') | |
| 209 self.tp = 1 | |
| 210 else: | |
| 211 self.tp = 1 | |
| 212 self.write('<dl>') | |
| 213 self.dl = 1 | |
| 214 | |
| 215 | |
| 216 def macro_BL(self, line): | |
| 217 self.write('<dl>') | |
| 218 self.tp = 1 | |
| 219 | |
| 220 | |
| 221 def macro_EL(self, line): | |
| 222 if self.tp == 3: | |
| 223 self.write('</dd>') | |
| 224 self.tp = 1 | |
| 225 self.write('</dl>\n\n') | |
| 226 self.tp = 0 | |
| 227 | |
| 228 | |
| 229 def macro_IT(self, line): | |
| 230 if self.tp == 3: | |
| 231 self.write('</dd>') | |
| 232 self.tp = 1 | |
| 233 self.continueReceived(line) | |
| 234 | |
| 235 | |
| 236 def text(self, line): | |
| 237 """ | |
| 238 Handle a line of text without detected token. | |
| 239 """ | |
| 240 if self.tp == 1: | |
| 241 self.write('<dt>') | |
| 242 if self.tp == 2: | |
| 243 self.write('<dd>') | |
| 244 self.paraCheck() | |
| 245 | |
| 246 bits = line.split('\\') | |
| 247 self.write(escape(bits[0])) | |
| 248 for bit in bits[1:]: | |
| 249 if bit[:2] == 'fI': | |
| 250 self.write('<em>' + escape(bit[2:])) | |
| 251 self.state = 'em' | |
| 252 elif bit[:2] == 'fB': | |
| 253 self.write('<strong>' + escape(bit[2:])) | |
| 254 self.state = 'strong' | |
| 255 elif bit[:2] == 'fR': | |
| 256 self.write('</%s>' % self.state) | |
| 257 self.write(escape(bit[2:])) | |
| 258 self.state = 'regular' | |
| 259 elif bit[:2] == 'fU': | |
| 260 # fU doesn't really exist, but it helps us to manage underlined | |
| 261 # text. | |
| 262 self.write('<u>' + escape(bit[2:])) | |
| 263 self.state = 'u' | |
| 264 elif bit[:3] == '(co': | |
| 265 self.write('©' + escape(bit[3:])) | |
| 266 else: | |
| 267 self.write(escape(bit)) | |
| 268 | |
| 269 if self.tp == 1: | |
| 270 self.write('</dt>') | |
| 271 self.tp = 2 | |
| 272 elif self.tp == 2: | |
| 273 self.tp = 3 | |
| 274 | |
| 275 | |
| 276 | |
| 277 class ProcessingFunctionFactory: | |
| 278 | |
| 279 def generate_lore(self, d, filenameGenerator=None): | |
| 280 ext = d.get('ext', '.html') | |
| 281 return lambda file,_: ManConverter().convert(open(file), | |
| 282 open(os.path.splitext(file)[0]+ext, 'w')) | |
| 283 | |
| 284 | |
| 285 | |
| 286 factory = ProcessingFunctionFactory() | |
| 287 | |
| 288 | |
| 289 if __name__ == '__main__': | |
| 290 import sys | |
| 291 mc = ManConverter().convert(open(sys.argv[1]), sys.stdout) | |
| OLD | NEW |