OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 '''This utility cleans up the html files as emitted by doxygen so | 7 '''This utility cleans up the html files as emitted by doxygen so |
8 that they are suitable for publication on a Google documentation site. | 8 that they are suitable for publication on a Google documentation site. |
9 ''' | 9 ''' |
10 | 10 |
| 11 import glob |
11 import optparse | 12 import optparse |
12 import os | 13 import os |
13 import re | 14 import re |
14 import shutil | 15 import shutil |
15 import string | |
16 import sys | 16 import sys |
17 try: | 17 try: |
18 from BeautifulSoup import BeautifulSoup, Tag | 18 from BeautifulSoup import BeautifulSoup, Tag |
19 except (ImportError, NotImplementedError): | 19 except (ImportError, NotImplementedError): |
20 print ("This tool requires the BeautifulSoup package " | 20 print ("This tool requires the BeautifulSoup package " |
21 "(see http://www.crummy.com/software/BeautifulSoup/).\n" | 21 "(see http://www.crummy.com/software/BeautifulSoup/).\n" |
22 "Make sure that the file BeautifulSoup.py is either in this directory " | 22 "Make sure that the file BeautifulSoup.py is either in this directory " |
23 "or is available in your PYTHON_PATH") | 23 "or is available in your PYTHON_PATH") |
24 raise | 24 raise |
25 | 25 |
26 | 26 |
| 27 def Trace(msg): |
| 28 if Trace.verbose: |
| 29 sys.stderr.write(str(msg) + '\n') |
| 30 |
| 31 Trace.verbose = False |
| 32 |
| 33 |
| 34 FILES_TO_REMOVE = [ |
| 35 '*.css', |
| 36 '*.map', |
| 37 '*.md5', |
| 38 'annotated.html', |
| 39 'bc_s.png', |
| 40 'classes.html', |
| 41 'closed.png', |
| 42 'doxygen.png', |
| 43 'files.html', |
| 44 'functions*.html', |
| 45 'globals_0x*.html', |
| 46 'globals_enum.html', |
| 47 'globals_eval.html', |
| 48 'globals_func.html', |
| 49 'globals.html', |
| 50 'globals_type.html', |
| 51 'globals_vars.html', |
| 52 'graph_legend.html', |
| 53 'graph_legend.png', |
| 54 'hierarchy.html', |
| 55 'index_8dox.html', |
| 56 'index.html', |
| 57 'modules.html', |
| 58 'namespacemembers_func.html', |
| 59 'namespacemembers.html', |
| 60 'namespaces.html', |
| 61 'nav_f.png', |
| 62 'nav_h.png', |
| 63 'open.png', |
| 64 'tab_a.png', |
| 65 'tab_b.png', |
| 66 'tab_h.png', |
| 67 'tab_s.png', |
| 68 ] |
| 69 |
| 70 |
27 class HTMLFixer(object): | 71 class HTMLFixer(object): |
28 '''This class cleans up the html strings as produced by Doxygen | 72 '''This class cleans up the html strings as produced by Doxygen |
29 ''' | 73 ''' |
30 | 74 |
31 def __init__(self, html): | 75 def __init__(self, html): |
32 self.soup = BeautifulSoup(html) | 76 self.soup = BeautifulSoup(html) |
33 | 77 |
34 def FixTableHeadings(self): | 78 def FixTableHeadings(self): |
35 '''Fixes the doxygen table headings. | 79 '''Fixes the doxygen table headings. |
36 | 80 |
(...skipping 22 matching lines...) Expand all Loading... |
59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: | 103 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: |
60 #tag['id'] = tag.td.h2.a['name'] | 104 #tag['id'] = tag.td.h2.a['name'] |
61 tag.string = tag.td.h2.a.next | 105 tag.string = tag.td.h2.a.next |
62 tag.name = 'h2' | 106 tag.name = 'h2' |
63 table_headers.append(tag) | 107 table_headers.append(tag) |
64 | 108 |
65 # reverse the list so that earlier tags don't delete later tags | 109 # reverse the list so that earlier tags don't delete later tags |
66 table_headers.reverse() | 110 table_headers.reverse() |
67 # Split up tables that have multiple table header (th) rows | 111 # Split up tables that have multiple table header (th) rows |
68 for tag in table_headers: | 112 for tag in table_headers: |
69 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) | 113 Trace("Header tag: %s is %s" % (tag.name, tag.string.strip())) |
70 # Is this a heading in the middle of a table? | 114 # Is this a heading in the middle of a table? |
71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': | 115 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': |
72 print "Splitting Table named %s" % tag.string.strip() | 116 Trace("Splitting Table named %s" % tag.string.strip()) |
73 table = tag.parent | 117 table = tag.parent |
74 table_parent = table.parent | 118 table_parent = table.parent |
75 table_index = table_parent.contents.index(table) | 119 table_index = table_parent.contents.index(table) |
76 new_table = Tag(self.soup, name='table', attrs=table.attrs) | 120 new_table = Tag(self.soup, name='table', attrs=table.attrs) |
77 table_parent.insert(table_index + 1, new_table) | 121 table_parent.insert(table_index + 1, new_table) |
78 tag_index = table.contents.index(tag) | 122 tag_index = table.contents.index(tag) |
79 for index, row in enumerate(table.contents[tag_index:]): | 123 for index, row in enumerate(table.contents[tag_index:]): |
80 new_table.insert(index, row) | 124 new_table.insert(index, row) |
81 # Now move the <h2> tag to be in front of the <table> tag | 125 # Now move the <h2> tag to be in front of the <table> tag |
82 assert tag.parent.name == 'table' | 126 assert tag.parent.name == 'table' |
83 table = tag.parent | 127 table = tag.parent |
84 table_parent = table.parent | 128 table_parent = table.parent |
85 table_index = table_parent.contents.index(table) | 129 table_index = table_parent.contents.index(table) |
86 table_parent.insert(table_index, tag) | 130 table_parent.insert(table_index, tag) |
87 | 131 |
88 def RemoveTopHeadings(self): | 132 def RemoveTopHeadings(self): |
89 '''Removes <div> sections with a header, tabs, or navpath class attribute''' | 133 '''Removes <div> sections with a header, tabs, or navpath class attribute''' |
90 header_tags = self.soup.findAll( | 134 header_tags = self.soup.findAll( |
91 name='div', | 135 name='div', |
92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) | 136 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) |
93 [tag.extract() for tag in header_tags] | 137 [tag.extract() for tag in header_tags] |
94 | 138 |
| 139 def RemoveVersionNumbers(self, html): |
| 140 '''Horrible hack to strip _#_# from struct names.''' |
| 141 return re.sub(r'(_\d_\d)(?=[": <])', '', html) |
| 142 |
95 def FixAll(self): | 143 def FixAll(self): |
96 self.FixTableHeadings() | 144 self.FixTableHeadings() |
97 self.RemoveTopHeadings() | 145 self.RemoveTopHeadings() |
98 | 146 html = str(self.soup) |
99 def __str__(self): | 147 html = self.RemoveVersionNumbers(html) |
100 return str(self.soup) | 148 return html |
101 | 149 |
102 | 150 |
103 def main(): | 151 def main(argv): |
104 '''Main entry for the doxy_cleanup utility | 152 """Main entry for the doxy_cleanup utility |
105 | 153 |
106 doxy_cleanup takes a list of html files and modifies them in place.''' | 154 doxy_cleanup cleans up the html files generated by doxygen. |
| 155 """ |
107 | 156 |
108 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') | 157 parser = optparse.OptionParser(usage='Usage: %prog [options] directory') |
| 158 parser.add_option('-v', '--verbose', help='verbose output.', |
| 159 action='store_true') |
| 160 options, files = parser.parse_args(argv) |
109 | 161 |
110 parser.add_option('-m', '--move', dest='move', action='store_true', | 162 if len(files) != 1: |
111 default=False, help='move html files to "original_html"') | 163 parser.error('Expected one directory') |
112 | 164 |
113 options, files = parser.parse_args() | 165 if options.verbose: |
| 166 Trace.verbose = True |
114 | 167 |
115 if not files: | 168 root_dir = files[0] |
116 parser.print_usage() | 169 html_dir = os.path.join(root_dir, 'html') |
117 return 1 | |
118 | 170 |
119 for filename in files: | 171 # Doxygen puts all files in an 'html' directory. |
120 try: | 172 # First, move all files from that directory to root_dir. |
121 with open(filename, 'r') as file: | 173 for filename in glob.glob(os.path.join(html_dir, '*')): |
122 html = file.read() | 174 Trace('Moving %s -> %s' % (filename, root_dir)) |
| 175 shutil.move(filename, root_dir) |
123 | 176 |
124 print "Processing %s" % filename | 177 # Now remove the 'html' directory. |
125 fixer = HTMLFixer(html) | 178 Trace('Removing %s' % html_dir) |
126 fixer.FixAll() | 179 os.rmdir(html_dir) |
127 with open(filename, 'w') as file: | 180 |
128 file.write(str(fixer)) | 181 # Then remove unneeded files. |
129 if options.move: | 182 for wildcard in FILES_TO_REMOVE: |
130 new_directory = os.path.join( | 183 Trace('Removing "%s":' % wildcard) |
131 os.path.dirname(os.path.dirname(filename)), 'original_html') | 184 path = os.path.join(root_dir, wildcard) |
132 if not os.path.exists(new_directory): | 185 for filename in glob.glob(path): |
133 os.mkdir(new_directory) | 186 Trace(' Removing "%s"' % filename) |
134 shutil.move(filename, new_directory) | 187 os.remove(filename) |
135 except: | 188 |
136 print "Error while processing %s" % filename | 189 # Now, fix the HTML files we've kept. |
137 raise | 190 Trace('Fixing HTML files...') |
| 191 for root, _, files in os.walk(root_dir): |
| 192 for filename in files: |
| 193 if not os.path.splitext(filename)[1] == '.html': |
| 194 Trace('Skipping %s' % filename) |
| 195 continue |
| 196 |
| 197 filename = os.path.join(root, filename) |
| 198 Trace('Processing "%s"...' % filename) |
| 199 try: |
| 200 with open(filename) as f: |
| 201 html = f.read() |
| 202 |
| 203 fixer = HTMLFixer(html) |
| 204 output = fixer.FixAll() |
| 205 with open(filename, 'w') as f: |
| 206 f.write(output) |
| 207 except: |
| 208 sys.stderr.write("Error while processing %s\n" % filename) |
| 209 raise |
138 | 210 |
139 return 0 | 211 return 0 |
140 | 212 |
141 if __name__ == '__main__': | 213 if __name__ == '__main__': |
142 sys.exit(main()) | 214 try: |
| 215 rtn = main(sys.argv[1:]) |
| 216 except KeyboardInterrupt: |
| 217 sys.stderr.write('%s: interrupted\n' % os.path.basename(__file__)) |
| 218 rtn = 1 |
| 219 sys.exit(rtn) |
OLD | NEW |