OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 '''This utility cleans up the html files as emitted by doxygen so | 7 '''This utility cleans up the html files as emitted by doxygen so |
8 that they are suitable for publication on a Google documentation site. | 8 that they are suitable for publication on a Google documentation site. |
9 ''' | 9 ''' |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 class HTMLFixer(object): | 27 class HTMLFixer(object): |
28 '''This class cleans up the html strings as produced by Doxygen | 28 '''This class cleans up the html strings as produced by Doxygen |
29 ''' | 29 ''' |
30 | 30 |
31 def __init__(self, html): | 31 def __init__(self, html): |
32 self.soup = BeautifulSoup(html) | 32 self.soup = BeautifulSoup(html) |
33 | 33 |
34 def FixTableHeadings(self): | 34 def FixTableHeadings(self): |
35 '''Fixes the doxygen table headings. | 35 '''Fixes the doxygen table headings. |
36 | 36 |
37 This includes using <th> instead of <h2> for the heading, and putting | 37 This includes: |
38 the "name" attribute into the "id" attribute of the <tr> tag. | 38 - Using bare <h2> title row instead of row embedded in <tr><td> in table |
| 39 - Putting the "name" attribute into the "id" attribute of the <tr> tag. |
| 40 - Splitting up tables into multiple separate tables if a table |
| 41 heading appears in the middle of a table. |
39 | 42 |
40 For example, this html: | 43 For example, this html: |
| 44 <table> |
41 <tr><td colspan="2"><h2><a name="pub-attribs"></a> | 45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> |
42 Data Fields List</h2></td></tr> | 46 Data Fields List</h2></td></tr> |
| 47 ... |
| 48 </table> |
43 | 49 |
44 would be converted to this: | 50 would be converted to this: |
45 <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr> | 51 <h2>Data Fields List</h2> |
46 | 52 <table> |
47 Also, this function splits up tables into multiple separate tables if | 53 ... |
48 a table heading appears in the middle of a table. | 54 </table> |
49 ''' | 55 ''' |
50 | 56 |
51 table_headers = [] | 57 table_headers = [] |
52 for tag in self.soup.findAll('tr'): | 58 for tag in self.soup.findAll('tr'): |
53 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: | 59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: |
54 tag['id'] = tag.td.h2.a['name'] | 60 #tag['id'] = tag.td.h2.a['name'] |
55 tag.td.string = tag.td.h2.a.next | 61 tag.string = tag.td.h2.a.next |
56 tag.td.name = 'th' | 62 tag.name = 'h2' |
57 table_headers.append(tag) | 63 table_headers.append(tag) |
58 | 64 |
59 # reverse the list so that earlier tags don't delete later tags | 65 # reverse the list so that earlier tags don't delete later tags |
60 table_headers.reverse() | 66 table_headers.reverse() |
61 # Split up tables that have multiple table header (th) rows | 67 # Split up tables that have multiple table header (th) rows |
62 for tag in table_headers: | 68 for tag in table_headers: |
| 69 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) |
63 # Is this a heading in the middle of a table? | 70 # Is this a heading in the middle of a table? |
64 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': | 71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': |
| 72 print "Splitting Table named %s" % tag.string.strip() |
65 table = tag.parent | 73 table = tag.parent |
66 table_parent = table.parent | 74 table_parent = table.parent |
67 table_index = table_parent.contents.index(table) | 75 table_index = table_parent.contents.index(table) |
68 new_table = Tag(self.soup, name='table', attrs=table.attrs) | 76 new_table = Tag(self.soup, name='table', attrs=table.attrs) |
69 table_parent.insert(table_index + 1, new_table) | 77 table_parent.insert(table_index + 1, new_table) |
70 tag_index = table.contents.index(tag) | 78 tag_index = table.contents.index(tag) |
71 new_table.contents = table.contents[tag_index:] | 79 for index, row in enumerate(table.contents[tag_index:]): |
72 del table.contents[tag_index:] | 80 new_table.insert(index, row) |
| 81 # Now move the <h2> tag to be in front of the <table> tag |
| 82 assert tag.parent.name == 'table' |
| 83 table = tag.parent |
| 84 table_parent = table.parent |
| 85 table_index = table_parent.contents.index(table) |
| 86 table_parent.insert(table_index, tag) |
73 | 87 |
74 def RemoveTopHeadings(self): | 88 def RemoveTopHeadings(self): |
75 '''Removes <div> sections with a header, tabs, or navpath class attribute''' | 89 '''Removes <div> sections with a header, tabs, or navpath class attribute''' |
76 header_tags = self.soup.findAll( | 90 header_tags = self.soup.findAll( |
77 name='div', | 91 name='div', |
78 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) | 92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) |
79 [tag.extract() for tag in header_tags] | 93 [tag.extract() for tag in header_tags] |
80 | 94 |
81 def FixAll(self): | 95 def FixAll(self): |
82 self.FixTableHeadings() | 96 self.FixTableHeadings() |
(...skipping 17 matching lines...) Expand all Loading... |
100 | 114 |
101 if not files: | 115 if not files: |
102 parser.print_usage() | 116 parser.print_usage() |
103 return 1 | 117 return 1 |
104 | 118 |
105 for filename in files: | 119 for filename in files: |
106 try: | 120 try: |
107 with open(filename, 'r') as file: | 121 with open(filename, 'r') as file: |
108 html = file.read() | 122 html = file.read() |
109 | 123 |
| 124 print "Processing %s" % filename |
110 fixer = HTMLFixer(html) | 125 fixer = HTMLFixer(html) |
111 fixer.FixAll() | 126 fixer.FixAll() |
112 with open(filename, 'w') as file: | 127 with open(filename, 'w') as file: |
113 file.write(str(fixer)) | 128 file.write(str(fixer)) |
114 if options.move: | 129 if options.move: |
115 new_directory = os.path.join( | 130 new_directory = os.path.join( |
116 os.path.dirname(os.path.dirname(filename)), 'original_html') | 131 os.path.dirname(os.path.dirname(filename)), 'original_html') |
117 if not os.path.exists(new_directory): | 132 if not os.path.exists(new_directory): |
118 os.mkdir(new_directory) | 133 os.mkdir(new_directory) |
119 shutil.move(filename, new_directory) | 134 shutil.move(filename, new_directory) |
120 except: | 135 except: |
121 print "Error while processing %s" % filename | 136 print "Error while processing %s" % filename |
122 raise | 137 raise |
123 | 138 |
124 return 0 | 139 return 0 |
125 | 140 |
126 if __name__ == '__main__': | 141 if __name__ == '__main__': |
127 sys.exit(main()) | 142 sys.exit(main()) |
OLD | NEW |