|
OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 | |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
dmichael (off chromium)
2012/01/09 20:18:27
2012
jond
2012/01/10 17:43:00
Done.
| |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 | |
7 '''This utility cleans up the html files as emitted by doxygen so | |
8 that they are suitable for publication on a Google documentation site. | |
9 ''' | |
dmichael (off chromium)
2012/01/09 20:18:27
Didn't we already a script for this? Why is it sho
jond
2012/01/10 17:43:00
Right now I need to get our docs for Pepper 16 out
dmichael (off chromium)
2012/01/12 20:19:27
we just discussed via IM... this is an exact copy
| |
10 | |
11 import optparse | |
12 import os | |
13 import re | |
14 import shutil | |
15 import string | |
16 import sys | |
17 try: | |
18 from BeautifulSoup import BeautifulSoup, Tag | |
19 except (ImportError, NotImplementedError): | |
20 print ("This tool requires the BeautifulSoup package " | |
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n" | |
22 "Make sure that the file BeautifulSoup.py is either in this directory " | |
23 "or is available in your PYTHON_PATH") | |
24 raise | |
25 | |
26 | |
27 class HTMLFixer(object): | |
28 '''This class cleans up the html strings as produced by Doxygen | |
29 ''' | |
30 | |
31 def __init__(self, html): | |
32 self.soup = BeautifulSoup(html) | |
33 | |
34 def FixTableHeadings(self): | |
35 '''Fixes the doxygen table headings. | |
36 | |
37 This includes: | |
38 - Using bare <h2> title row instead of row embedded in <tr><td> in table | |
39 - Putting the "name" attribute into the "id" attribute of the <tr> tag. | |
40 - Splitting up tables into multiple separate tables if a table | |
41 heading appears in the middle of a table. | |
42 | |
43 For example, this html: | |
44 <table> | |
45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> | |
46 Data Fields List</h2></td></tr> | |
47 ... | |
48 </table> | |
49 | |
50 would be converted to this: | |
51 <h2>Data Fields List</h2> | |
52 <table> | |
53 ... | |
54 </table> | |
55 ''' | |
56 | |
57 table_headers = [] | |
58 for tag in self.soup.findAll('tr'): | |
59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: | |
60 #tag['id'] = tag.td.h2.a['name'] | |
61 tag.string = tag.td.h2.a.next | |
62 tag.name = 'h2' | |
63 table_headers.append(tag) | |
64 | |
65 # reverse the list so that earlier tags don't delete later tags | |
66 table_headers.reverse() | |
67 # Split up tables that have multiple table header (th) rows | |
68 for tag in table_headers: | |
69 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) | |
70 # Is this a heading in the middle of a table? | |
71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': | |
72 print "Splitting Table named %s" % tag.string.strip() | |
73 table = tag.parent | |
74 table_parent = table.parent | |
75 table_index = table_parent.contents.index(table) | |
76 new_table = Tag(self.soup, name='table', attrs=table.attrs) | |
77 table_parent.insert(table_index + 1, new_table) | |
78 tag_index = table.contents.index(tag) | |
79 for index, row in enumerate(table.contents[tag_index:]): | |
80 new_table.insert(index, row) | |
81 # Now move the <h2> tag to be in front of the <table> tag | |
82 assert tag.parent.name == 'table' | |
83 table = tag.parent | |
84 table_parent = table.parent | |
85 table_index = table_parent.contents.index(table) | |
86 table_parent.insert(table_index, tag) | |
87 | |
88 def RemoveTopHeadings(self): | |
89 '''Removes <div> sections with a header, tabs, or navpath class attribute''' | |
90 header_tags = self.soup.findAll( | |
91 name='div', | |
92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) | |
93 [tag.extract() for tag in header_tags] | |
94 | |
95 def FixAll(self): | |
96 self.FixTableHeadings() | |
97 self.RemoveTopHeadings() | |
98 | |
99 def __str__(self): | |
100 return str(self.soup) | |
101 | |
102 | |
103 def main(): | |
104 '''Main entry for the doxy_cleanup utility | |
105 | |
106 doxy_cleanup takes a list of html files and modifies them in place.''' | |
107 | |
108 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') | |
109 | |
110 parser.add_option('-m', '--move', dest='move', action='store_true', | |
111 default=False, help='move html files to "original_html"') | |
112 | |
113 options, files = parser.parse_args() | |
114 | |
115 if not files: | |
116 parser.print_usage() | |
117 return 1 | |
118 | |
119 for filename in files: | |
120 try: | |
121 with open(filename, 'r') as file: | |
122 html = file.read() | |
123 | |
124 print "Processing %s" % filename | |
125 fixer = HTMLFixer(html) | |
126 fixer.FixAll() | |
127 with open(filename, 'w') as file: | |
128 file.write(str(fixer)) | |
129 if options.move: | |
130 new_directory = os.path.join( | |
131 os.path.dirname(os.path.dirname(filename)), 'original_html') | |
132 if not os.path.exists(new_directory): | |
133 os.mkdir(new_directory) | |
134 shutil.move(filename, new_directory) | |
135 except: | |
136 print "Error while processing %s" % filename | |
137 raise | |
138 | |
139 return 0 | |
140 | |
141 if __name__ == '__main__': | |
142 sys.exit(main()) | |
OLD | NEW |