OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 | |
3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
4 # Use of this source code is governed by a BSD-style license that can be | |
5 # found in the LICENSE file. | |
6 | |
7 '''This utility converts the html files as emitted by doxygen into ezt files | |
8 that are suitable for inclusion into Google code site. | |
9 | |
10 EZT stands for "EaZy Templating (for Python)". For more information, see | |
11 http://code.google.com/p/ezt/ | |
12 ''' | |
13 | |
14 import optparse | |
15 import os | |
16 import re | |
17 import shutil | |
18 import string | |
19 import sys | |
20 try: | |
21 from BeautifulSoup import BeautifulSoup, Tag | |
22 except (ImportError, NotImplementedError): | |
23 print ("This tool requires the BeautifulSoup package " | |
24 "(see http://www.crummy.com/software/BeautifulSoup/).\n" | |
25 "Make sure that the file BeautifulSoup.py is either in this directory " | |
26 "or is available in your PYTHON_PATH") | |
27 raise | |
28 | |
29 | |
30 class EZTFixer(object): | |
31 '''This class converts the html strings as produced by Doxygen into ezt | |
32 strings as used by the Google code site tools | |
33 ''' | |
34 | |
35 def __init__(self, html): | |
36 self.soup = BeautifulSoup(html) | |
37 | |
38 def FixTableHeadings(self): | |
39 '''Fixes the doxygen table headings to EZT's liking. | |
40 | |
41 This includes using <th> instead of <h2> for the heading, and putting | |
42 the "name" attribute into the "id" attribute of the <tr> tag. | |
43 | |
44 For example, this html: | |
45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> | |
46 Data Fields List</h2></td></tr> | |
47 | |
48 would be converted to this: | |
49 <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr> | |
50 | |
51 Also, this function splits up tables into multiple separate tables if | |
52 a table heading appears in the middle of a table. | |
53 ''' | |
54 | |
55 table_headers = [] | |
56 for tag in self.soup.findAll('tr'): | |
57 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: | |
58 tag['id'] = tag.td.h2.a['name'] | |
59 tag.td.string = tag.td.h2.a.next | |
60 tag.td.name = 'th' | |
61 table_headers.append(tag) | |
62 | |
63 # reverse the list so that earlier tags don't delete later tags | |
64 table_headers.reverse() | |
65 # Split up tables that have multiple table header (th) rows | |
66 for tag in table_headers: | |
67 # Is this a heading in the middle of a table? | |
68 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': | |
69 table = tag.parent | |
70 table_parent = table.parent | |
71 table_index = table_parent.contents.index(table) | |
72 new_table = Tag(self.soup, name='table', attrs=table.attrs) | |
73 table_parent.insert(table_index + 1, new_table) | |
74 tag_index = table.contents.index(tag) | |
75 new_table.contents = table.contents[tag_index:] | |
76 del table.contents[tag_index:] | |
77 | |
78 def RemoveTopHeadings(self): | |
79 '''Removes <div> sections with a header, tabs, or navpath class attribute''' | |
80 header_tags = self.soup.findAll( | |
81 name='div', | |
82 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) | |
83 [tag.extract() for tag in header_tags] | |
84 | |
85 def FixAll(self): | |
86 self.FixTableHeadings() | |
87 self.RemoveTopHeadings() | |
88 | |
89 def __str__(self): | |
90 return str(self.soup) | |
91 | |
92 | |
93 def main(): | |
94 '''Main entry for the html2ezt utility | |
95 | |
96 html2ezt takes a list of html files and creates a set of ezt files with | |
97 the same basename and in the same directory as the original html files. | |
98 Each new ezt file contains a file that is suitable for presentation | |
99 on Google Codesite using the EZT tool.''' | |
100 | |
101 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') | |
102 | |
103 parser.add_option('-m', '--move', dest='move', action='store_true', | |
104 default=False, help='move html files to "original_html"') | |
105 | |
106 options, files = parser.parse_args() | |
107 | |
108 if not files: | |
109 parser.print_usage() | |
110 return 1 | |
111 | |
112 for filename in files: | |
113 try: | |
114 with open(filename, 'r') as file: | |
115 html = file.read() | |
116 | |
117 fixer = EZTFixer(html) | |
118 fixer.FixAll() | |
119 new_name = re.sub(re.compile('\.html$'), '.ezt', filename) | |
120 with open(new_name, 'w') as file: | |
121 file.write(str(fixer)) | |
122 if options.move: | |
123 new_directory = os.path.join( | |
124 os.path.dirname(os.path.dirname(filename)), 'original_html') | |
125 if not os.path.exists(new_directory): | |
126 os.mkdir(new_directory) | |
127 shutil.move(filename, new_directory) | |
128 except: | |
129 print "Error while processing %s" % filename | |
130 raise | |
131 | |
132 return 0 | |
133 | |
134 if __name__ == '__main__': | |
135 sys.exit(main()) | |
OLD | NEW |